package bio.ferlab.datalake.spark3.genomics.prepared;

import bio.ferlab.datalake.commons.config.Configuration;
import bio.ferlab.datalake.commons.config.DatasetConf;
import bio.ferlab.datalake.spark3.etl.ETLSingleDestination;
import bio.ferlab.datalake.spark3.implicits.DatasetConfImplicits$;
import bio.ferlab.datalake.spark3.implicits.GenomicImplicits$;
import bio.ferlab.datalake.spark3.implicits.GenomicImplicits$columns$;
import java.time.LocalDateTime;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StructType;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Tuple2;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

/* compiled from: VariantCentric.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005-b\u0001B\b\u0011\u0001uA\u0011\u0002\n\u0001\u0003\u0002\u0003\u0006Y!J\u0017\t\u000bM\u0002A\u0011\u0001\u001b\t\u000fe\u0002!\u0019!C!u!1a\b\u0001Q\u0001\nmBqa\u0010\u0001C\u0002\u0013%!\b\u0003\u0004A\u0001\u0001\u0006Ia\u000f\u0005\b\u0003\u0002\u0011\r\u0011\"\u0003;\u0011\u0019\u0011\u0005\u0001)A\u0005w!)1\t\u0001C!\t\"9a\u0010AI\u0001\n\u0003y\b\u0002CA\u000b\u0001E\u0005I\u0011A@\t\u000f\u0005]\u0001\u0001\"\u0011\u0002\u001a!A\u0011q\u0005\u0001\u0012\u0002\u0013\u0005q\u0010\u0003\u0005\u0002*\u0001\t\n\u0011\"\u0001��\u000591\u0016M]5b]R\u001cUM\u001c;sS\u000eT!!\u0005\n\u0002\u0011A\u0014X\r]1sK\u0012T!a\u0005\u000b\u0002\u0011\u001d,gn\\7jGNT!!\u0006\f\u0002\rM\u0004\u0018M]64\u0015\t9\u0002$\u0001\u0005eCR\fG.Y6f\u0015\tI\"$\u0001\u0004gKJd\u0017M\u0019\u0006\u00027\u0005\u0019!-[8\u0004\u0001M\u0011\u0001A\b\t\u0003?\tj\u0011\u0001\t\u0006\u0003CQ\t1!\u001a;m\u0013\t\u0019\u0003E\u0001\u000bF)2\u001b\u0016N\\4mK\u0012+7\u000f^5oCRLwN\\\u0001\u000eG>tg-[4ve\u0006$\u0018n\u001c8\u0011\u0005\u0019ZS\"A\u0014\u000b\u0005!J\u0013AB2p]\u001aLwM\u0003\u0002+-\u000591m\\7n_:\u001c\u0018B\u0001\u0017(\u00055\u0019uN\u001c4jOV\u0014\u0018\r^5p]&\u0011afL\u0001\u0005G>tg-\u0003\u00021c\t\u0019Q\t\u0016'\u000b\u0005I\u0002\u0013A\u0001<3\u0003\u0019a\u0014N\\5u}Q\tQ\u0007\u0006\u00027qA\u0011q\u0007A\u0007\u0002!!)AE\u0001a\u0002K\u0005yQ.Y5o\t\u0016\u001cH/\u001b8bi&|g.F\u0001<!\t1C(\u0003\u0002>O\tYA)\u0019;bg\u0016$8i\u001c8g\u0003Ai\u0017-\u001b8EKN$\u0018N\\1uS>t\u0007%A\tf]JL7\r[3e?Z\f'/[1oiN\f!#\u001a8sS\u000eDW\rZ0wCJL\u0017M\u001c;tA\u0005)RM\u001c:jG\",GmX2p]N,\u0017/^3oG\u0016\u001c\u0018AF3oe&\u001c\u0007.\u001a3`G>t7/Z9vK:\u001cWm\u001d\u0011\u0002\u000f\u0015DHO]1diR\u0019QI\u001d?\u0015\u0005\u0019k\u0007\u0003B$Q'Zs!\u0001\u0013(\u0011\u0005%cU\"\u0001&\u000b\u0005-c\u0012A\u0002\u001fs_>$hHC\u0001N\u0003\u0015\u00198-\u00197b\u0013\tyE*\u0001\u0004Qe\u0016$WMZ\u0005\u0003#J\u00131!T1q\u0015\tyE\n\u0005\u0002H)&\u0011QK\u0015\u0002\u0007'R\u0014\u0018N\\4\u0011\u0005]SgB\u0001-h\u001d\tIFM\u0004\u0002[C:\u00111L\u0018\b\u0003\u0013rK\u0011!X\u0001\u0004_J<\u0017BA0a\u0003\u0019\t\u0007/Y2iK*\tQ,\u0003\u0002cG\u0006)1\u000f]1sW*\u0011q\fY\u0005\u0003K\u001a\f1a]9m\u0015\t\u00117-\u0003\u0002iS\u00069\u0001/Y2lC\u001e,'BA3g\u0013\tYGNA\u0005ECR\fgI]1nK*\u0011\u0001.\u001b\u0005\u0006E&\u0001\u001dA\u001c\t\u0003_Bl\u0011![\u0005\u0003c&\u0014Ab\u00159be.\u001cVm]:j_:Dqa]\u0005\u0011\u0002\u0003\u0007A/A\bmCN$(+\u001e8ECR,G+[7f!\t)(0D\u0001w\u0015\t9\b0\u0001\u0003uS6,'\"A=\u0002\t)\fg/Y\u0005\u0003wZ\u0014Q\u0002T8dC2$\u0015\r^3US6,\u0007bB?\n!\u0003\u0005\r\u0001^\u0001\u0013GV\u0014(/\u001a8u%VtG)\u0019;f)&lW-A\tfqR\u0014\u0018m\u0019;%I\u00164\u0017-\u001e7uIE*\"!!\u0001+\u0007Q\f\u0019a\u000b\u0002\u0002\u0006A!\u0011qAA\t\u001b\t\tIA\u0003\u0003\u0002\f\u00055\u0011!C;oG\",7m[3e\u0015\r\ty\u0001T\u0001\u000bC:tw\u000e^1uS>t\u0017\u0002BA\n\u0003\u0013\u0011\u0011#\u001e8dQ\u0016\u001c7.\u001a3WCJL\u0017M\\2f\u0003E)\u0007\u0010\u001e:bGR$C-\u001a4bk2$HEM\u0001\u0010iJ\fgn\u001d4pe6\u001c\u0016N\\4mKRA\u00111DA\u0010\u0003G\t)\u0003F\u0002W\u0003;AQA\u0019\u0007A\u00049Da!!\t\r\u0001\u00041\u0015\u0001\u00023bi\u0006Dqa\u001d\u0007\u0011\u0002\u0003\u0007A\u000fC\u0004~\u0019A\u0005\t\u0019\u0001;\u00023Q\u0014\u0018M\\:g_Jl7+\u001b8hY\u0016$C-\u001a4bk2$HEM\u0001\u001aiJ\fgn\u001d4pe6\u001c\u0016N\\4mK\u0012\"WMZ1vYR$3\u0007")
/* loaded from: input_file:bio/ferlab/datalake/spark3/genomics/prepared/VariantCentric.class */
public class VariantCentric extends ETLSingleDestination {
    private final DatasetConf mainDestination;
    private final DatasetConf enriched_variants;
    private final DatasetConf enriched_consequences;

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public DatasetConf mainDestination() {
        return this.mainDestination;
    }

    private DatasetConf enriched_variants() {
        return this.enriched_variants;
    }

    private DatasetConf enriched_consequences() {
        return this.enriched_consequences;
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public Map<String, Dataset<Row>> extract(LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(enriched_variants().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(enriched_variants()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(enriched_consequences().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(enriched_consequences()).read(super.conf(), sparkSession))}));
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public LocalDateTime extract$default$1() {
        return minDateTime();
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public LocalDateTime extract$default$2() {
        return LocalDateTime.now();
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public Dataset<Row> transformSingle(Map<String, Dataset<Row>> map, LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        String str = "NO_GENE";
        Dataset withColumn = ((Dataset) map.apply(enriched_consequences().id())).withColumn("symbol", functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("symbol"), functions$.MODULE$.lit("NO_GENE")}))).drop(Predef$.MODULE$.wrapRefArray(new String[]{"biotype", "ensembl_gene_id", "updated_on", "created_on", "consequences_oid", "normalized_consequences_oid", "original_canonical"})).withColumn("picked", functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("picked"), functions$.MODULE$.lit(BoxesRunTime.boxToBoolean(false))})));
        Dataset<Row> selectLocus = GenomicImplicits$.MODULE$.GenomicOperations(GenomicImplicits$.MODULE$.GenomicOperations(GenomicImplicits$.MODULE$.GenomicOperations(withColumn.withColumn("consequences", functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.when(functions$.MODULE$.col("picked").$eq$eq$eq(BoxesRunTime.boxToBoolean(true)), BoxesRunTime.boxToInteger(0)).when(functions$.MODULE$.col("canonical").$eq$eq$eq(BoxesRunTime.boxToBoolean(true)), BoxesRunTime.boxToInteger(1)).otherwise(BoxesRunTime.boxToInteger(2)).as("sort_csq"), withColumn.apply("*")})).dropFields(GenomicImplicits$columns$.MODULE$.locusColumnNames().$colon$colon("symbol")))).groupByLocus(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("symbol")})).agg(functions$.MODULE$.array_sort(functions$.MODULE$.collect_list("consequences")).as("consequences"), Predef$.MODULE$.wrapRefArray(new Column[0])).withColumn("consequences", functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("symbol"), functions$.MODULE$.col("consequences")})))).groupByLocus(Nil$.MODULE$).agg(functions$.MODULE$.collect_list("consequences").as("consequences"), Predef$.MODULE$.wrapRefArray(new Column[0])).withColumn("consequences", functions$.MODULE$.map_from_entries(functions$.MODULE$.col("consequences")))).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("consequences")}));
        StructType elementType = ((Dataset) map.apply(enriched_variants().id())).select("genes", Predef$.MODULE$.wrapRefArray(new String[0])).schema().fields()[0].dataType().elementType();
        return GenomicImplicits$.MODULE$.GenomicOperations(((Dataset) map.apply(enriched_variants().id())).drop(Predef$.MODULE$.wrapRefArray(new String[]{"updated_on", "created_on"})).withColumn("genes", functions$.MODULE$.array_union(functions$.MODULE$.col("genes"), functions$.MODULE$.array(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.struct(((List) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(elementType.fields())).toList().collect(new VariantCentric$$anonfun$1(null), List$.MODULE$.canBuildFrom())).$colon$colon(functions$.MODULE$.lit("NO_GENE").as("symbol"))).cast(elementType)}))))).joinByLocus(selectLocus, "left").withColumn("genes", functions$.MODULE$.transform(functions$.MODULE$.col("genes"), column -> {
            return column.withField("consequences", functions$.MODULE$.col("consequences").apply(column.apply("symbol")));
        })).drop("consequences").withColumn("genes", functions$.MODULE$.filter(functions$.MODULE$.col("genes"), column2 -> {
            return functions$.MODULE$.not(column2.apply("symbol").$eq$eq$eq(str).$amp$amp(column2.apply("consequences").isNull()));
        })).withColumn("genes", functions$.MODULE$.transform(functions$.MODULE$.col("genes"), column3 -> {
            return functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.array_min(column3.apply("consequences")).apply("sort_csq").as("sort_gene"), column3.as("g")}));
        })).withColumn("genes", functions$.MODULE$.array_sort(functions$.MODULE$.col("genes"))).withColumn("genes", functions$.MODULE$.transform(functions$.MODULE$.col("genes"), column4 -> {
            return column4.apply("g");
        })).withColumn("genes", functions$.MODULE$.transform(functions$.MODULE$.col("genes"), column5 -> {
            return column5.dropFields(Predef$.MODULE$.wrapRefArray(new String[]{"sort_gene", "consequences"})).withField("consequences", functions$.MODULE$.transform(column5.apply("consequences"), column5 -> {
                return column5.dropFields(Predef$.MODULE$.wrapRefArray(new String[]{"sort_csq"}));
            }));
        })).withColumn("csq", functions$.MODULE$.flatten(functions$.MODULE$.col("genes.consequences"))).withColumn("max_impact_score", functions$.MODULE$.array_max(functions$.MODULE$.col("csq.impact_score"))).drop("csq");
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public LocalDateTime transformSingle$default$2() {
        return minDateTime();
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public LocalDateTime transformSingle$default$3() {
        return LocalDateTime.now();
    }

    public VariantCentric(Configuration configuration) {
        super(configuration);
        this.mainDestination = conf().getDataset("es_index_variant_centric");
        this.enriched_variants = conf().getDataset("enriched_variants");
        this.enriched_consequences = conf().getDataset("enriched_consequences");
    }
}
