package io.prophecy.libs.data;

import io.prophecy.libs.data.DataMatcher;
import io.prophecy.libs.package$;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Predef$;
import scala.StringContext;
import scala.collection.SetLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Nil$;
import scala.collection.immutable.Set;
import scala.collection.mutable.Map;
import scala.collection.mutable.Map$;
import scala.collection.mutable.Set$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;
import scala.runtime.IntRef;

/* compiled from: DataMatcher.scala */
/* loaded from: input_file:io/prophecy/libs/data/DataMatcher$.class */
public final class DataMatcher$ {
    public static final DataMatcher$ MODULE$ = null;

    static {
        new DataMatcher$();
    }

    public void prettyDataMatch(Dataset<Row> dataset, Dataset<Row> dataset2, List<String> list) {
        long count = dataset.count();
        long count2 = dataset2.count();
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"New data count: ", "; Original data count: ", ""})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToLong(count), BoxesRunTime.boxToLong(count2)})));
        if (count == count2) {
            Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"✅ Data counts are matching!"})).s(Nil$.MODULE$));
        } else {
            Predef$.MODULE$.println("⛔️ Data counts are not matching!");
        }
        Predef$.MODULE$.println("\nMatching column by column:");
        IntRef create = IntRef.create(0);
        IntRef create2 = IntRef.create(0);
        ((List) dataMatch(dataset, dataset2, list).zipWithIndex(List$.MODULE$.canBuildFrom())).foreach(new DataMatcher$$anonfun$prettyDataMatch$1(create, create2, dataset2.columns().length));
        Predef$.MODULE$.println(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"\\nData matching finished. Found ", " matching and ", " mismatching columns."})).s(Predef$.MODULE$.genericWrapArray(new Object[]{BoxesRunTime.boxToInteger(create.elem), BoxesRunTime.boxToInteger(create2.elem)})));
    }

    public List<DataMatcher.Result> dataMatch(Dataset<Row> dataset, Dataset<Row> dataset2, List<String> list) {
        return Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(dataset2.columns()).map(new DataMatcher$$anonfun$1(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).toSet().grouped(100).flatMap(new DataMatcher$$anonfun$2(dataset, dataset2, list)).flatMap(new DataMatcher$$anonfun$5(Set$.MODULE$.apply(Nil$.MODULE$))).toList();
    }

    public List<DataMatcher.Result> io$prophecy$libs$data$DataMatcher$$dataMatchPartial(Dataset<Row> dataset, Dataset<Row> dataset2, List<String> list) {
        Set set = Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(dataset2.columns()).map(new DataMatcher$$anonfun$6(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)))).toSet();
        Dataset join = dataset.join(dataset2.select(((SetLike) set.map(new DataMatcher$$anonfun$7(), scala.collection.immutable.Set$.MODULE$.canBuildFrom())).toSeq()), (Column) list.foldLeft(functions$.MODULE$.lit(BoxesRunTime.boxToBoolean(true)), new DataMatcher$$anonfun$8()));
        Dataset select = join.select(Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(join.columns()).filter(new DataMatcher$$anonfun$11())).map(new DataMatcher$$anonfun$12(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))).toList().$colon$colon$colon(((TraversableOnce) set.flatMap(new DataMatcher$$anonfun$10(), scala.collection.immutable.Set$.MODULE$.canBuildFrom())).toList()).$colon$colon$colon(Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(join.columns()).map(new DataMatcher$$anonfun$9(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))).toList()));
        StructType schema = select.schema();
        Set set2 = (Set) set.map(new DataMatcher$$anonfun$16((Row) package$.MODULE$.measure(new DataMatcher$$anonfun$14(select, schema, (String[]) set.toArray(ClassTag$.MODULE$.apply(String.class)), new GenericRowWithSchema((Object[]) Predef$.MODULE$.refArrayOps(schema.fields()).map(new DataMatcher$$anonfun$13(), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.Any())), schema)), "Data matching - counting matches and mismatches")), scala.collection.immutable.Set$.MODULE$.canBuildFrom());
        Set set3 = (Set) ((SetLike) set2.filter(new DataMatcher$$anonfun$17())).map(new DataMatcher$$anonfun$18(), scala.collection.immutable.Set$.MODULE$.canBuildFrom());
        Dataset filter = select.filter((Column) ((TraversableOnce) set.map(new DataMatcher$$anonfun$19(), scala.collection.immutable.Set$.MODULE$.canBuildFrom())).reduce(new DataMatcher$$anonfun$20()));
        Map apply = Map$.MODULE$.apply(Nil$.MODULE$);
        package$.MODULE$.measure(new DataMatcher$$anonfun$io$prophecy$libs$data$DataMatcher$$dataMatchPartial$1(set3, apply, filter.limit(100).toLocalIterator()), "Data matching - fetching mimsmatched examples");
        return ((TraversableOnce) set2.map(new DataMatcher$$anonfun$io$prophecy$libs$data$DataMatcher$$dataMatchPartial$2(apply), scala.collection.immutable.Set$.MODULE$.canBuildFrom())).toList();
    }

    public DataMatcher.Results Results(List<DataMatcher.Result> list) {
        return new DataMatcher.Results(list);
    }

    private DataMatcher$() {
        MODULE$ = this;
    }
}
