/*
 * Decompiled with CFR 0.152.
 */
package ai.starlake.job.infer;

import ai.starlake.config.Settings;
import ai.starlake.config.SparkEnv;
import ai.starlake.config.SparkEnv$;
import ai.starlake.schema.handlers.InferSchemaHandler$;
import ai.starlake.schema.model.Attribute;
import ai.starlake.schema.model.Domain;
import ai.starlake.schema.model.Format;
import ai.starlake.schema.model.Metadata;
import ai.starlake.schema.model.Metadata$;
import ai.starlake.schema.model.Mode;
import ai.starlake.schema.model.Partition;
import ai.starlake.schema.model.Schema;
import ai.starlake.schema.model.Sink;
import ai.starlake.schema.model.WriteMode;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.Path;
import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders$;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.MatchError;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.Tuple2;
import scala.collection.LinearSeqOptimized;
import scala.collection.Seq;
import scala.collection.TraversableLike;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.StringOps;
import scala.io.Codec$;
import scala.io.Source$;
import scala.math.Ordering;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.util.Try;
import scala.util.Try$;
import scala.util.matching.Regex;

@ScalaSignature(bytes="\u0006\u0001\u0005mb\u0001B\u0001\u0003\u0001-\u0011a\"\u00138gKJ\u001c6\r[3nC*{'M\u0003\u0002\u0004\t\u0005)\u0011N\u001c4fe*\u0011QAB\u0001\u0004U>\u0014'BA\u0004\t\u0003!\u0019H/\u0019:mC.,'\"A\u0005\u0002\u0005\u0005L7\u0001A\n\u0003\u00011\u0001\"!\u0004\t\u000e\u00039Q\u0011aD\u0001\u0006g\u000e\fG.Y\u0005\u0003#9\u0011a!\u00118z%\u00164\u0007\u0002C\n\u0001\u0005\u0003\u0005\u000b1\u0002\u000b\u0002\u0011M,G\u000f^5oON\u0004\"!\u0006\r\u000e\u0003YQ!a\u0006\u0004\u0002\r\r|gNZ5h\u0013\tIbC\u0001\u0005TKR$\u0018N\\4t\u0011\u0015Y\u0002\u0001\"\u0001\u001d\u0003\u0019a\u0014N\\5u}Q\tQ\u0004\u0006\u0002\u001fAA\u0011q\u0004A\u0007\u0002\u0005!)1C\u0007a\u0002)!)!\u0005\u0001C\u0001G\u0005!a.Y7f+\u0005!\u0003CA\u0013)\u001d\tia%\u0003\u0002(\u001d\u00051\u0001K]3eK\u001aL!!\u000b\u0016\u0003\rM#(/\u001b8h\u0015\t9c\u0002C\u0004-\u0001\t\u0007I\u0011B\u0017\u0002\u0011M\u0004\u0018M]6F]Z,\u0012A\f\t\u0003+=J!\u0001\r\f\u0003\u0011M\u0003\u0018M]6F]ZDaA\r\u0001!\u0002\u0013q\u0013!C:qCJ\\WI\u001c<!\u0011\u001d!\u0004A1A\u0005\nU\nqa]3tg&|g.F\u00017!\t9\u0004)D\u00019\u0015\tI$(A\u0002tc2T!a\u000f\u001f\u0002\u000bM\u0004\u0018M]6\u000b\u0005ur\u0014AB1qC\u000eDWMC\u0001@\u0003\ry'oZ\u0005\u0003\u0003b\u0012Ab\u00159be.\u001cVm]:j_:Daa\u0011\u0001!\u0002\u00131\u0014\u0001C:fgNLwN\u001c\u0011\t\u000b\u0015\u0003A\u0011\u0001$\u0002\u0011I,\u0017\r\u001a$jY\u0016$\"a\u0012&\u0011\u0007]BE%\u0003\u0002Jq\t9A)\u0019;bg\u0016$\b\"B&E\u0001\u0004a\u0015\u0001\u00029bi\"\u0004\"!\u0014*\u000e\u00039S!a\u0014)\u0002\u0005\u0019\u001c(BA)=\u0003\u0019A\u0017\rZ8pa&\u00111K\u0014\u0002\u0005!\u0006$\b\u000eC\u0003V\u0001\u0011\u0005a+A\u0007hKR4uN]7bi\u001aKG.\u001a\u000b\u0003I]CQ\u0001\u0017+A\u0002e\u000bQ\u0001\\5oKN\u00042A\u00172%\u001d\tY\u0006M\u0004\u0002]?6\tQL\u0003\u0002_\u0015\u00051AH]8pizJ\u0011aD\u0005\u0003C:\tq\u0001]1dW\u0006<W-\u0003\u0002dI\n!A*[:u\u0015\t\tg\u0002C\u0003g\u0001\u0011\u0005q-\u0001\u0007hKR\u001cV\r]1sCR|'\u000f\u0006\u0002%Q\")\u0001,\u001aa\u00013\")!\u000e\u0001C\u0001W\u00061r-\u001a;E_6\f\u0017N\u001c#je\u0016\u001cGo\u001c:z\u001d\u0006lW\r\u0006\u0002%Y\")1*\u001ba\u0001\u0019\")a\u000e\u0001C\u0001_\u0006\u0001r-\u001a;TG\",W.\u0019)biR,'O\u001c\u000b\u0003IADQaS7A\u00021CQA\u001d\u0001\u0005\u0002M\f\u0011d\u0019:fCR,G)\u0019;b\rJ\fW.Z,ji\"4uN]7biR9A/!\u0002\u0002\b\u0005-\u0001CA;\u0000\u001d\t1hP\u0004\u0002x{:\u0011\u0001\u0010 \b\u0003snt!\u0001\u0018>\n\u0003}J!!\u0010 \n\u0005mb\u0014BA\u001d;\u0013\t\t\u0007(\u0003\u0003\u0002\u0002\u0005\r!!\u0003#bi\u00064%/Y7f\u0015\t\t\u0007\bC\u0003Yc\u0002\u0007\u0011\f\u0003\u0004\u0002\nE\u0004\r\u0001J\u0001\tI\u0006$\u0018\rU1uQ\"9\u0011QB9A\u0002\u0005=\u0011A\u00025fC\u0012,'\u000fE\u0002\u000e\u0003#I1!a\u0005\u000f\u0005\u001d\u0011un\u001c7fC:Daa\u0001\u0001\u0005\u0002\u0005]A\u0003DA\r\u0003W\ty#a\r\u00026\u0005e\u0002CBA\u000e\u0003C\t)#\u0004\u0002\u0002\u001e)\u0019\u0011q\u0004\b\u0002\tU$\u0018\u000e\\\u0005\u0005\u0003G\tiBA\u0002Uef\u00042!DA\u0014\u0013\r\tIC\u0004\u0002\u0005+:LG\u000fC\u0004\u0002.\u0005U\u0001\u0019\u0001\u0013\u0002\u0015\u0011|W.Y5o\u001d\u0006lW\rC\u0004\u00022\u0005U\u0001\u0019\u0001\u0013\u0002\u0015M\u001c\u0007.Z7b\u001d\u0006lW\rC\u0004\u0002\n\u0005U\u0001\u0019\u0001\u0013\t\u000f\u0005]\u0012Q\u0003a\u0001I\u0005A1/\u0019<f!\u0006$\b\u000e\u0003\u0005\u0002\u000e\u0005U\u0001\u0019AA\b\u0001")
public class InferSchemaJob {
    public final Settings ai$starlake$job$infer$InferSchemaJob$$settings;
    private final SparkEnv sparkEnv;
    private final SparkSession session;

    public String name() {
        return "InferSchema";
    }

    private SparkEnv sparkEnv() {
        return this.sparkEnv;
    }

    private SparkSession session() {
        return this.session;
    }

    public Dataset<String> readFile(Path path) {
        return this.session().read().textFile(path.toString());
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public String getFormatFile(List<String> lines) {
        String firstLine = (String)lines.head();
        String lastLine = (String)lines.last();
        Regex jsonRegexStart = new StringOps(Predef$.MODULE$.augmentString("\\{.*")).r();
        Regex jsonArrayRegexStart = new StringOps(Predef$.MODULE$.augmentString("\\[.*")).r();
        Regex jsonRegexEnd = new StringOps(Predef$.MODULE$.augmentString(".*\\}")).r();
        Regex jsonArrayRegexEnd = new StringOps(Predef$.MODULE$.augmentString(".*\\]")).r();
        Regex xmlRegexStart = new StringOps(Predef$.MODULE$.augmentString("<.*")).r();
        Regex xmlRegexEnd = new StringOps(Predef$.MODULE$.augmentString(".*>")).r();
        Tuple2 tuple2 = new Tuple2((Object)firstLine, (Object)lastLine);
        if (tuple2 != null) {
            Option option2;
            CharSequence charSequence = (CharSequence)tuple2._1();
            CharSequence charSequence2 = (CharSequence)tuple2._2();
            Option option3 = jsonRegexStart.unapplySeq(charSequence);
            if (!option3.isEmpty() && option3.get() != null && ((LinearSeqOptimized)option3.get()).lengthCompare(0) == 0 && !(option2 = jsonRegexEnd.unapplySeq(charSequence2)).isEmpty() && option2.get() != null && ((LinearSeqOptimized)option2.get()).lengthCompare(0) == 0) {
                return "JSON";
            }
        }
        if (tuple2 != null) {
            Option option4;
            CharSequence charSequence = (CharSequence)tuple2._1();
            CharSequence charSequence3 = (CharSequence)tuple2._2();
            Option option5 = jsonArrayRegexStart.unapplySeq(charSequence);
            if (!option5.isEmpty() && option5.get() != null && ((LinearSeqOptimized)option5.get()).lengthCompare(0) == 0 && !(option4 = jsonArrayRegexEnd.unapplySeq(charSequence3)).isEmpty() && option4.get() != null && ((LinearSeqOptimized)option4.get()).lengthCompare(0) == 0) {
                return "ARRAY_JSON";
            }
        }
        if (tuple2 == null) return "DSV";
        CharSequence charSequence = (CharSequence)tuple2._1();
        CharSequence charSequence4 = (CharSequence)tuple2._2();
        Option option6 = xmlRegexStart.unapplySeq(charSequence);
        if (option6.isEmpty()) return "DSV";
        if (option6.get() == null) return "DSV";
        if (((LinearSeqOptimized)option6.get()).lengthCompare(0) != 0) return "DSV";
        Option option7 = xmlRegexEnd.unapplySeq(charSequence4);
        if (option7.isEmpty()) return "DSV";
        if (option7.get() == null) return "DSV";
        if (((LinearSeqOptimized)option7.get()).lengthCompare(0) != 0) return "DSV";
        return "XML";
    }

    public String getSeparator(List<String> lines) {
        String firstLine = (String)lines.head();
        Tuple2 tuple2 = (Tuple2)Predef$.MODULE$.refArrayOps((Object[])Predef$.MODULE$.charArrayOps(firstLine.replaceAll("[A-Za-z0-9 \"'()@?!\u00e9\u00e8\u00ee\u00e0\u00c0\u00c9\u00c8\u00e7+]", "").toCharArray()).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<Object, Object> apply(char x$1) {
                return new Tuple2.mcCI.sp(x$1, 1);
            }
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class)))).groupBy((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final char apply(Tuple2<Object, Object> x$2) {
                return x$2._1$mcC$sp();
            }
        }).mapValues((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final int apply(Tuple2<Object, Object>[] x$3) {
                return x$3.length;
            }
        }).toList().maxBy((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final int apply(Tuple2<Object, Object> x0$1) {
                Tuple2<Object, Object> tuple2 = x0$1;
                if (tuple2 != null) {
                    int count;
                    int n = count = tuple2._2$mcI$sp();
                    return n;
                }
                throw new MatchError(tuple2);
            }
        }, (Ordering)Ordering.Int$.MODULE$);
        if (tuple2 != null) {
            Tuple2.mcCI.sp sp2;
            char separator = tuple2._1$mcC$sp();
            int count = tuple2._2$mcI$sp();
            Tuple2.mcCI.sp sp3 = sp2 = new Tuple2.mcCI.sp(separator, count);
            char separator2 = sp3._1$mcC$sp();
            int count2 = sp3._2$mcI$sp();
            return ((Object)BoxesRunTime.boxToCharacter((char)separator2)).toString();
        }
        throw new MatchError((Object)tuple2);
    }

    public String getDomainDirectoryName(Path path) {
        return path.toString().replace(path.getName(), "");
    }

    public String getSchemaPattern(Path path) {
        return path.getName();
    }

    public Dataset<Row> createDataFrameWithFormat(List<String> lines, String dataPath, boolean header) {
        String string;
        block6: {
            Dataset dataset;
            block3: {
                block5: {
                    block4: {
                        block2: {
                            String formatFile = this.getFormatFile(lines);
                            string = formatFile;
                            if (!"ARRAY_JSON".equals(string)) break block2;
                            RDD jsonRDD = this.session().sparkContext().wholeTextFiles(dataPath, this.session().sparkContext().wholeTextFiles$default$2()).map((Function1)new Serializable(this){
                                public static final long serialVersionUID = 0L;

                                public final String apply(Tuple2<String, String> x0$2) {
                                    Tuple2<String, String> tuple2 = x0$2;
                                    if (tuple2 != null) {
                                        String content;
                                        String string = content = (String)tuple2._2();
                                        return string;
                                    }
                                    throw new MatchError(tuple2);
                                }
                            }, ClassTag$.MODULE$.apply(String.class));
                            dataset = this.session().read().option("inferSchema", true).json(this.session().createDataset(jsonRDD, Encoders$.MODULE$.STRING()));
                            break block3;
                        }
                        if (!"JSON".equals(string)) break block4;
                        dataset = this.session().read().format("json").option("inferSchema", true).load(dataPath);
                        break block3;
                    }
                    if (!"XML".equals(string)) break block5;
                    dataset = this.session().read().format("com.databricks.spark.xml").option("inferSchema", true).load(dataPath);
                    break block3;
                }
                if (!"DSV".equals(string)) break block6;
                dataset = this.session().read().format("com.databricks.spark.csv").option("header", header).option("inferSchema", true).option("delimiter", this.getSeparator(lines)).option("parserLib", "UNIVOCITY").load(dataPath);
            }
            return dataset;
        }
        throw new MatchError((Object)string);
    }

    public Try<BoxedUnit> infer(String domainName, String schemaName, String dataPath, String savePath, boolean header) {
        return Try$.MODULE$.apply((Function0)new Serializable(this, domainName, schemaName, dataPath, savePath, header){
            public static final long serialVersionUID = 0L;
            private final /* synthetic */ InferSchemaJob $outer;
            private final String domainName$1;
            private final String schemaName$1;
            private final String dataPath$1;
            private final String savePath$1;
            private final boolean header$1;

            public final void apply() {
                this.apply$mcV$sp();
            }

            public void apply$mcV$sp() {
                String format;
                Path path = new Path(this.dataPath$1);
                List lines = (List)((TraversableLike)Source$.MODULE$.fromFile(path.toString(), Codec$.MODULE$.fallbackSystemCodec()).getLines().toList().map((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final String apply(String x$5) {
                        return x$5.trim();
                    }
                }, List$.MODULE$.canBuildFrom())).filter((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final boolean apply(String x$6) {
                        return new StringOps(Predef$.MODULE$.augmentString(x$6)).nonEmpty();
                    }
                });
                Dataset<Row> dataframeWithFormat = this.$outer.createDataFrameWithFormat((List<String>)lines, this.dataPath$1, this.header$1);
                String string = format = this.$outer.getFormatFile((List<String>)lines);
                String string2 = "ARRAY_JSON";
                boolean array = !(string != null ? !string.equals(string2) : string2 != null);
                boolean withHeader = this.header$1;
                String separator = this.$outer.getSeparator((List<String>)lines);
                InferSchemaHandler$ inferSchema = InferSchemaHandler$.MODULE$;
                List<Attribute> attributes = inferSchema.createAttributes(dataframeWithFormat.schema(), this.$outer.ai$starlake$job$infer$InferSchemaJob$$settings);
                Metadata metadata = inferSchema.createMetaData(format, (Option<Object>)Option$.MODULE$.apply((Object)BoxesRunTime.boxToBoolean((boolean)array)), (Option<Object>)Option$.MODULE$.apply((Object)BoxesRunTime.boxToBoolean((boolean)withHeader)), (Option<String>)Option$.MODULE$.apply((Object)separator));
                Schema schema = inferSchema.createSchema(this.schemaName$1, Pattern.compile(this.$outer.getSchemaPattern(path)), attributes, (Option<Metadata>)new Some((Object)metadata));
                Some x$7 = new Some((Object)this.$outer.getDomainDirectoryName(path));
                Option<Mode> x$8 = Metadata$.MODULE$.apply$default$1();
                Option<Format> x$9 = Metadata$.MODULE$.apply$default$2();
                Option<String> x$10 = Metadata$.MODULE$.apply$default$3();
                Option<Object> x$11 = Metadata$.MODULE$.apply$default$4();
                Option<Object> x$12 = Metadata$.MODULE$.apply$default$5();
                Option<Object> x$13 = Metadata$.MODULE$.apply$default$6();
                Option<String> x$14 = Metadata$.MODULE$.apply$default$7();
                Option<String> x$15 = Metadata$.MODULE$.apply$default$8();
                Option<String> x$16 = Metadata$.MODULE$.apply$default$9();
                Option<WriteMode> x$17 = Metadata$.MODULE$.apply$default$10();
                Option<Partition> x$18 = Metadata$.MODULE$.apply$default$11();
                Option<Sink> x$19 = Metadata$.MODULE$.apply$default$12();
                Option<String> x$20 = Metadata$.MODULE$.apply$default$13();
                Option<Seq<String>> x$21 = Metadata$.MODULE$.apply$default$14();
                Option<Map<String, String>> x$22 = Metadata$.MODULE$.apply$default$15();
                Option<List<String>> x$23 = Metadata$.MODULE$.apply$default$17();
                Option<String> x$24 = Metadata$.MODULE$.apply$default$18();
                Option<Map<String, String>> x$25 = Metadata$.MODULE$.apply$default$19();
                Option<String> x$26 = Metadata$.MODULE$.apply$default$20();
                Option<Map<String, String>> x$27 = Metadata$.MODULE$.apply$default$21();
                Domain domain2 = inferSchema.createDomain(this.domainName$1, (Option<Metadata>)new Some((Object)new Metadata(x$8, x$9, x$10, x$11, x$12, x$13, x$14, x$15, x$16, x$17, x$18, x$19, x$20, x$21, x$22, (Option<String>)x$7, x$23, x$24, x$25, x$26, x$27)), (List<Schema>)List$.MODULE$.apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Schema[]{schema})));
                inferSchema.generateYaml(domain2, this.savePath$1, this.$outer.ai$starlake$job$infer$InferSchemaJob$$settings);
            }
            {
                if ($outer == null) {
                    throw null;
                }
                this.$outer = $outer;
                this.domainName$1 = domainName$1;
                this.schemaName$1 = schemaName$1;
                this.dataPath$1 = dataPath$1;
                this.savePath$1 = savePath$1;
                this.header$1 = header$1;
            }
        });
    }

    public InferSchemaJob(Settings settings) {
        this.ai$starlake$job$infer$InferSchemaJob$$settings = settings;
        this.sparkEnv = new SparkEnv(this.name(), SparkEnv$.MODULE$.$lessinit$greater$default$2(), settings);
        this.session = this.sparkEnv().session();
    }
}

