/*
 * Decompiled with CFR 0.152.
 */
package io.projectglow.gff;

import io.projectglow.common.FeatureSchemas$;
import java.io.Serializable;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Option;
import scala.Predef;
import scala.Predef$;
import scala.Some;
import scala.Tuple2;
import scala.collection.GenSeqLike;
import scala.collection.GenTraversableOnce;
import scala.collection.Seq;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.immutable.Map;
import scala.collection.immutable.Seq$;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.math.Ordering;
import scala.math.Ordering$;
import scala.reflect.ClassTag$;
import scala.runtime.BoxesRunTime;

public final class GffDataSource$ {
    public static GffDataSource$ MODULE$;
    private final String attributesMapColumnName;
    private final String COLUMN_DELIMITER;
    private final String ATTRIBUTES_DELIMITER;
    private final String GFF3_TAG_VALUE_DELIMITER;
    private final String GTF_TAG_VALUE_DELIMITER;
    private final String COMMENT_IDENTIFIER;
    private final String NULL_IDENTIFIER;
    private final String ARRAY_DELIMITER;
    private final Map<String, String> csvReadOptions;
    private final String columnPruningConf;

    static {
        new GffDataSource$();
    }

    public String attributesMapColumnName() {
        return this.attributesMapColumnName;
    }

    public String COLUMN_DELIMITER() {
        return this.COLUMN_DELIMITER;
    }

    public String ATTRIBUTES_DELIMITER() {
        return this.ATTRIBUTES_DELIMITER;
    }

    public String GFF3_TAG_VALUE_DELIMITER() {
        return this.GFF3_TAG_VALUE_DELIMITER;
    }

    public String GTF_TAG_VALUE_DELIMITER() {
        return this.GTF_TAG_VALUE_DELIMITER;
    }

    public String COMMENT_IDENTIFIER() {
        return this.COMMENT_IDENTIFIER;
    }

    public String NULL_IDENTIFIER() {
        return this.NULL_IDENTIFIER;
    }

    public String ARRAY_DELIMITER() {
        return this.ARRAY_DELIMITER;
    }

    public Map<String, String> csvReadOptions() {
        return this.csvReadOptions;
    }

    public String columnPruningConf() {
        return this.columnPruningConf;
    }

    public String checkAndGetPath(Map<String, String> options) {
        String p;
        Option option = options.get((Object)"path");
        if (!(option instanceof Some)) {
            throw new IllegalArgumentException("Path is required");
        }
        Some some = (Some)option;
        String string = p = (String)some.value();
        return string;
    }

    public StructType inferSchema(SQLContext sqlContext, String path) {
        SparkSession spark = sqlContext.sparkSession();
        boolean originalColumnPruning = new StringOps(Predef$.MODULE$.augmentString(spark.conf().get(this.columnPruningConf()))).toBoolean();
        if (originalColumnPruning) {
            spark.conf().set(this.columnPruningConf(), false);
        }
        Dataset csvDf = spark.read().options(this.csvReadOptions()).schema(FeatureSchemas$.MODULE$.gffBaseSchema()).csv(path);
        scala.collection.immutable.Seq attributeTags = (scala.collection.immutable.Seq)((TraversableLike)((TraversableLike)((Row[])this.addAttributesMapColumn((Dataset<Row>)csvDf).withColumn(this.attributesMapColumnName(), functions$.MODULE$.explode(functions$.MODULE$.map_keys(functions$.MODULE$.col(this.attributesMapColumnName())))).agg(functions$.MODULE$.collect_set(this.attributesMapColumnName()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new Column[0])).collect())[0].getAs(0)).filter((Function1 & Serializable & scala.Serializable)x$3 -> BoxesRunTime.boxToBoolean((boolean)GffDataSource$.$anonfun$inferSchema$1(x$3)))).groupBy((Function1 & Serializable & scala.Serializable)x$4 -> x$4.toLowerCase()).mapValues((Function1 & Serializable & scala.Serializable)x$5 -> (String)x$5.head()).values().to(Predef$.MODULE$.fallbackStringCanBuildFrom());
        spark.conf().set(this.columnPruningConf(), originalColumnPruning);
        scala.collection.immutable.Seq attributeFields = (scala.collection.immutable.Seq)((SeqLike)attributeTags.map((Function1 & Serializable & scala.Serializable)t -> new StructField(t, (DataType)FeatureSchemas$.MODULE$.gffOfficialAttributeFields().find((Function1 & Serializable & scala.Serializable)f -> BoxesRunTime.boxToBoolean((boolean)GffDataSource$.$anonfun$inferSchema$5(t, f))).map((Function1 & Serializable & scala.Serializable)x$6 -> x$6.dataType()).getOrElse((Function0 & Serializable & scala.Serializable)() -> StringType$.MODULE$), StructField$.MODULE$.apply$default$3(), StructField$.MODULE$.apply$default$4()), Seq$.MODULE$.canBuildFrom())).sortBy((Function1 & Serializable & scala.Serializable)f -> {
            Integer n;
            int n2 = ((GenSeqLike)FeatureSchemas$.MODULE$.gffOfficialAttributeFields().map((Function1 & Serializable & scala.Serializable)x$7 -> x$7.name(), scala.collection.Seq$.MODULE$.canBuildFrom())).indexOf((Object)MODULE$.normalizeString(f.name()));
            switch (n2) {
                case -1: {
                    n = BoxesRunTime.boxToInteger((int)(FeatureSchemas$.MODULE$.gffOfficialAttributeFields().length() + 1));
                    break;
                }
                default: {
                    n = BoxesRunTime.boxToInteger((int)n2);
                    break;
                }
            }
            return new Tuple2((Object)n, (Object)f.name());
        }, Ordering$.MODULE$.Tuple2((Ordering)Ordering.Int$.MODULE$, (Ordering)Ordering.String$.MODULE$));
        return new StructType((StructField[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])FeatureSchemas$.MODULE$.gffBaseSchema().fields())).dropRight(1))).$plus$plus((GenTraversableOnce)attributeFields, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(StructField.class))));
    }

    public Dataset<Row> addAttributesMapColumn(Dataset<Row> df) {
        return df.withColumn(this.attributesMapColumnName(), functions$.MODULE$.expr(new StringOps(Predef$.MODULE$.augmentString(new StringBuilder(94).append("str_to_map(\n           |       ").append(FeatureSchemas$.MODULE$.attributesField().name()).append(",\n           |       \"").append(this.ATTRIBUTES_DELIMITER()).append("\",\n           |       \"").append(this.GFF3_TAG_VALUE_DELIMITER()).append("\"\n           |   )").toString())).stripMargin()));
    }

    public Dataset<Row> normalizeAttributesMapKeys(Dataset<Row> df) {
        return df.withColumn(this.attributesMapColumnName(), functions$.MODULE$.map_from_arrays(functions$.MODULE$.expr(new StringOps(Predef$.MODULE$.augmentString(new StringBuilder(119).append("transform(\n            |       map_keys(").append(this.attributesMapColumnName()).append("),\n            |       k -> regexp_replace(lower(k), \"_\", \"\")\n            |   )").toString())).stripMargin()), functions$.MODULE$.map_values(functions$.MODULE$.col(this.attributesMapColumnName()))));
    }

    public Dataset<Row> filterFastaLines(Dataset<Row> df) {
        return df.where(functions$.MODULE$.isnull(functions$.MODULE$.coalesce((Seq)Predef$.MODULE$.wrapRefArray((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])FeatureSchemas$.MODULE$.gffBaseSchema().fieldNames())).drop(1))).map((Function1 & Serializable & scala.Serializable)x$8 -> functions$.MODULE$.col(x$8), Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Column.class)))))).unary_$bang());
    }

    public String normalizeString(String s) {
        return s.toLowerCase().replaceAll("_", "");
    }

    public static final /* synthetic */ boolean $anonfun$inferSchema$1(String x$3) {
        return !x$3.isEmpty();
    }

    public static final /* synthetic */ boolean $anonfun$inferSchema$5(String t$1, StructField f) {
        String string = f.name();
        String string2 = MODULE$.normalizeString(t$1);
        return !(string != null ? !string.equals(string2) : string2 != null);
    }

    private GffDataSource$() {
        MODULE$ = this;
        this.attributesMapColumnName = "attributesMap";
        this.COLUMN_DELIMITER = "\t";
        this.ATTRIBUTES_DELIMITER = ";";
        this.GFF3_TAG_VALUE_DELIMITER = "=";
        this.GTF_TAG_VALUE_DELIMITER = " ";
        this.COMMENT_IDENTIFIER = "#";
        this.NULL_IDENTIFIER = ".";
        this.ARRAY_DELIMITER = ",";
        this.csvReadOptions = (Map)Predef$.MODULE$.Map().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Tuple2[]{Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"sep"), (Object)this.COLUMN_DELIMITER()), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"comment"), (Object)this.COMMENT_IDENTIFIER()), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"mode"), (Object)"DROPMALFORMED"), Predef.ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc((Object)"nullValue"), (Object)this.NULL_IDENTIFIER())}));
        this.columnPruningConf = "spark.sql.csv.parser.columnPruning.enabled";
    }
}

