package bio.ferlab.datalake.spark3.genomics.enriched;

import bio.ferlab.datalake.commons.config.Configuration;
import bio.ferlab.datalake.commons.config.DatasetConf;
import bio.ferlab.datalake.spark3.etl.ETLSingleDestination;
import bio.ferlab.datalake.spark3.genomics.Frequencies$;
import bio.ferlab.datalake.spark3.genomics.FrequencySplit;
import bio.ferlab.datalake.spark3.implicits.DatasetConfImplicits$;
import bio.ferlab.datalake.spark3.implicits.GenomicImplicits$;
import bio.ferlab.datalake.spark3.implicits.GenomicImplicits$columns$;
import bio.ferlab.datalake.spark3.implicits.SparkUtils$;
import java.time.LocalDateTime;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import scala.MatchError;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.immutable.$colon;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxesRunTime;

/* compiled from: Variants.scala */
@ScalaSignature(bytes = "\u0006\u0001\tub\u0001\u0002\u001a4\u0001\u0001C\u0001b\u0012\u0001\u0003\u0002\u0003\u0006I\u0001\u0013\u0005\t)\u0002\u0011\t\u0011)A\u0005\u0011\"AQ\u000b\u0001B\u0001B\u0003%a\u000b\u0003\u0005d\u0001\t\u0005\t\u0015!\u0003e\u0011%\t\bA!A!\u0002\u0017\u0011(\u0010C\u0004\u0002\u0002\u0001!\t!a\u0001\t\u0013\u0005U\u0001A1A\u0005B\u0005]\u0001\u0002CA\u0010\u0001\u0001\u0006I!!\u0007\t\u0013\u0005\u0005\u0002A1A\u0005\u0012\u0005]\u0001\u0002CA\u0012\u0001\u0001\u0006I!!\u0007\t\u0013\u0005\u0015\u0002A1A\u0005\u0012\u0005]\u0001\u0002CA\u0014\u0001\u0001\u0006I!!\u0007\t\u0013\u0005%\u0002A1A\u0005\u0012\u0005]\u0001\u0002CA\u0016\u0001\u0001\u0006I!!\u0007\t\u0013\u00055\u0002A1A\u0005\u0012\u0005]\u0001\u0002CA\u0018\u0001\u0001\u0006I!!\u0007\t\u0013\u0005E\u0002A1A\u0005\u0012\u0005]\u0001\u0002CA\u001a\u0001\u0001\u0006I!!\u0007\t\u0013\u0005U\u0002A1A\u0005\u0012\u0005]\u0001\u0002CA\u001c\u0001\u0001\u0006I!!\u0007\t\u0013\u0005e\u0002A1A\u0005\u0012\u0005]\u0001\u0002CA\u001e\u0001\u0001\u0006I!!\u0007\t\u0013\u0005u\u0002A1A\u0005\u0012\u0005]\u0001\u0002CA \u0001\u0001\u0006I!!\u0007\t\u0013\u0005\u0005\u0003A1A\u0005\u0012\u0005]\u0001\u0002CA\"\u0001\u0001\u0006I!!\u0007\t\u000f\u0005\u0015\u0003\u0001\"\u0011\u0002H!I\u0011Q\u0012\u0001\u0012\u0002\u0013\u0005\u0011q\u0012\u0005\n\u0003K\u0003\u0011\u0013!C\u0001\u0003\u001fCq!a*\u0001\t\u0003\nI\u000bC\u0005\u00028\u0002\t\n\u0011\"\u0001\u0002\u0010\"I\u0011\u0011\u0018\u0001\u0012\u0002\u0013\u0005\u0011qR\u0004\b\u0003w\u001b\u0004\u0012AA_\r\u0019\u00114\u0007#\u0001\u0002@\"9\u0011\u0011\u0001\u0012\u0005\u0002\u0005%gABAfE\u0005\ti\r\u0003\u0006\u0002P\u0012\u0012\t\u0011)A\u0005\u0003#Bq!!\u0001%\t\u0003\t\t\u000eC\u0004\u0002Z\u0012\"\t!a7\t\u000f\u0005}G\u0005\"\u0001\u0002b\"9\u0011Q\u001d\u0013\u0005\u0002\u0005\u001d\bb\u0002B\u0001I\u0011\u0005!1\u0001\u0005\b\u0005\u000f!C\u0011\u0001B\u0005\u0011\u001d\u0011\t\u0002\nC\u0001\u0005'AqAa\u0006%\t\u0003\u0011I\u0002C\u0004\u0003&\u0011\"\tAa\n\t\u0013\t=\"%!A\u0005\u0004\tE\u0002\"\u0003B\u001bEE\u0005I\u0011\u0001B\u001c\u0011%\u0011YDII\u0001\n\u0003\u00119D\u0001\u0005WCJL\u0017M\u001c;t\u0015\t!T'\u0001\u0005f]JL7\r[3e\u0015\t1t'\u0001\u0005hK:|W.[2t\u0015\tA\u0014(\u0001\u0004ta\u0006\u00148n\r\u0006\u0003um\n\u0001\u0002Z1uC2\f7.\u001a\u0006\u0003yu\naAZ3sY\u0006\u0014'\"\u0001 \u0002\u0007\tLwn\u0001\u0001\u0014\u0005\u0001\t\u0005C\u0001\"F\u001b\u0005\u0019%B\u0001#8\u0003\r)G\u000f\\\u0005\u0003\r\u000e\u0013A#\u0012+M'&tw\r\\3EKN$\u0018N\\1uS>t\u0017!\u00049beRL7-\u001b9b]RLE\r\u0005\u0002J%6\t!J\u0003\u0002L\u0019\u0006\u00191/\u001d7\u000b\u00055s\u0015!B:qCJ\\'BA(Q\u0003\u0019\t\u0007/Y2iK*\t\u0011+A\u0002pe\u001eL!a\u0015&\u0003\r\r{G.^7o\u00039\tgMZ3di\u0016$7\u000b^1ukN\fAb\u001d8w\t\u0006$\u0018m]3u\u0013\u0012\u0004\"a\u00161\u000f\u0005as\u0006CA-]\u001b\u0005Q&BA.@\u0003\u0019a$o\\8u})\tQ,A\u0003tG\u0006d\u0017-\u0003\u0002`9\u00061\u0001K]3eK\u001aL!!\u00192\u0003\rM#(/\u001b8h\u0015\tyF,A\u0006ge\u0016\fX/\u001a8dS\u0016\u001c\bcA3k[:\u0011a\r\u001b\b\u00033\u001eL\u0011!X\u0005\u0003Sr\u000bq\u0001]1dW\u0006<W-\u0003\u0002lY\n\u00191+Z9\u000b\u0005%d\u0006C\u00018p\u001b\u0005)\u0014B\u000196\u000591%/Z9vK:\u001c\u0017p\u00159mSR\fQbY8oM&<WO]1uS>t\u0007CA:y\u001b\u0005!(BA;w\u0003\u0019\u0019wN\u001c4jO*\u0011q/O\u0001\bG>lWn\u001c8t\u0013\tIHOA\u0007D_:4\u0017nZ;sCRLwN\\\u0005\u0003wr\fAaY8oM&\u0011QP \u0002\u0004\u000bRc%BA@D\u0003\t1('\u0001\u0004=S:LGO\u0010\u000b\u000b\u0003\u000b\ti!a\u0004\u0002\u0012\u0005MA\u0003BA\u0004\u0003\u0017\u00012!!\u0003\u0001\u001b\u0005\u0019\u0004\"B9\u0007\u0001\b\u0011\bbB$\u0007!\u0003\u0005\r\u0001\u0013\u0005\b)\u001a\u0001\n\u00111\u0001I\u0011\u0015)f\u00011\u0001W\u0011\u0015\u0019g\u00011\u0001e\u0003=i\u0017-\u001b8EKN$\u0018N\\1uS>tWCAA\r!\r\u0019\u00181D\u0005\u0004\u0003;!(a\u0003#bi\u0006\u001cX\r^\"p]\u001a\f\u0001#\\1j]\u0012+7\u000f^5oCRLwN\u001c\u0011\u0002!QDw.^:b]\u0012|v-\u001a8p[\u0016\u001c\u0018!\u0005;i_V\u001c\u0018M\u001c3`O\u0016tw.\\3tA\u0005aAo\u001c9nK\u0012|&M]1w_\u0006iAo\u001c9nK\u0012|&M]1w_\u0002\n\u0011c\u001a8p[\u0006$wlZ3o_6,7o\u0018<3\u0003I9gn\\7bI~;WM\\8nKN|fO\r\u0011\u0002!\u001dtw.\\1e?\u0016Dx.\\3t?Z\u0014\u0014!E4o_6\fGmX3y_6,7o\u0018<3A\u0005\trM\\8nC\u0012|v-\u001a8p[\u0016\u001cxL^\u001a\u0002%\u001dtw.\\1e?\u001e,gn\\7fg~38\u0007I\u0001\u0006I\n\u001ch\u000e]\u0001\u0007I\n\u001ch\u000e\u001d\u0011\u0002\u000f\rd\u0017N\u001c<be\u0006A1\r\\5om\u0006\u0014\b%A\u0003hK:,7/\u0001\u0004hK:,7\u000fI\u0001\tgBd\u0017nY3bS\u0006I1\u000f\u001d7jG\u0016\f\u0017\u000eI\u0001\bKb$(/Y2u)\u0019\tI%!\u001e\u0002\nR!\u00111JA7!\u00199\u0016Q\n,\u0002R%\u0019\u0011q\n2\u0003\u00075\u000b\u0007\u000f\u0005\u0003\u0002T\u0005\u001dd\u0002BA+\u0003KrA!a\u0016\u0002d9!\u0011\u0011LA1\u001d\u0011\tY&a\u0018\u000f\u0007e\u000bi&C\u0001R\u0013\ty\u0005+\u0003\u0002N\u001d&\u00111\nT\u0005\u0003S*KA!!\u001b\u0002l\tIA)\u0019;b\rJ\fW.\u001a\u0006\u0003S*Ca!T\u000eA\u0004\u0005=\u0004cA%\u0002r%\u0019\u00111\u000f&\u0003\u0019M\u0003\u0018M]6TKN\u001c\u0018n\u001c8\t\u0013\u0005]4\u0004%AA\u0002\u0005e\u0014a\u00047bgR\u0014VO\u001c#bi\u0016$\u0016.\\3\u0011\t\u0005m\u0014QQ\u0007\u0003\u0003{RA!a \u0002\u0002\u0006!A/[7f\u0015\t\t\u0019)\u0001\u0003kCZ\f\u0017\u0002BAD\u0003{\u0012Q\u0002T8dC2$\u0015\r^3US6,\u0007\"CAF7A\u0005\t\u0019AA=\u0003I\u0019WO\u001d:f]R\u0014VO\u001c#bi\u0016$\u0016.\\3\u0002#\u0015DHO]1di\u0012\"WMZ1vYR$\u0013'\u0006\u0002\u0002\u0012*\"\u0011\u0011PAJW\t\t)\n\u0005\u0003\u0002\u0018\u0006\u0005VBAAM\u0015\u0011\tY*!(\u0002\u0013Ut7\r[3dW\u0016$'bAAP9\u0006Q\u0011M\u001c8pi\u0006$\u0018n\u001c8\n\t\u0005\r\u0016\u0011\u0014\u0002\u0012k:\u001c\u0007.Z2lK\u00124\u0016M]5b]\u000e,\u0017!E3yiJ\f7\r\u001e\u0013eK\u001a\fW\u000f\u001c;%e\u0005yAO]1og\u001a|'/\\*j]\u001edW\r\u0006\u0005\u0002,\u0006=\u00161WA[)\u0011\t\t&!,\t\r5s\u00029AA8\u0011\u001d\t\tL\ba\u0001\u0003\u0017\nA\u0001Z1uC\"I\u0011q\u000f\u0010\u0011\u0002\u0003\u0007\u0011\u0011\u0010\u0005\n\u0003\u0017s\u0002\u0013!a\u0001\u0003s\n\u0011\u0004\u001e:b]N4wN]7TS:<G.\u001a\u0013eK\u001a\fW\u000f\u001c;%e\u0005IBO]1og\u001a|'/\\*j]\u001edW\r\n3fM\u0006,H\u000e\u001e\u00134\u0003!1\u0016M]5b]R\u001c\bcAA\u0005EM\u0019!%!1\u0011\t\u0005\r\u0017QY\u0007\u00029&\u0019\u0011q\u0019/\u0003\r\u0005s\u0017PU3g)\t\tiL\u0001\u0007ECR\fgI]1nK>\u00038oE\u0002%\u0003\u0003\f!\u0001\u001a4\u0015\t\u0005M\u0017q\u001b\t\u0004\u0003+$S\"\u0001\u0012\t\u000f\u0005=g\u00051\u0001\u0002R\u0005Ir/\u001b;i\u000f\u0016tW-\u0012=uKJt\u0017\r\u001c*fM\u0016\u0014XM\\2f)\u0011\t\t&!8\t\r5;\u00039AA8\u0003q9\u0018\u000e\u001e5WCJL\u0017M\u001c;FqR,'O\\1m%\u00164WM]3oG\u0016$B!!\u0015\u0002d\"1Q\n\u000ba\u0002\u0003_\nqb^5uQB{\u0007/\u001e7bi&|gn\u001d\u000b\r\u0003S\fi/!=\u0002v\u0006e\u0018Q \u000b\u0005\u0003#\nY\u000f\u0003\u0004NS\u0001\u000f\u0011q\u000e\u0005\b\u0003_L\u0003\u0019AA)\u0003=!\bn\\;tC:$w)\u001a8p[\u0016\u001c\bbBAzS\u0001\u0007\u0011\u0011K\u0001\u0007i>\u0004X.\u001a3\t\u000f\u0005]\u0018\u00061\u0001\u0002R\u0005yqM\\8nC\u0012<UM\\8nKN4&\u0007C\u0004\u0002|&\u0002\r!!\u0015\u0002\u001d\u001dtw.\\1e\u000bb|W.Z:We!9\u0011q`\u0015A\u0002\u0005E\u0013aD4o_6\fGmR3o_6,7OV\u001a\u0002\u0013]LG\u000f\u001b#c':\u0003F\u0003BA)\u0005\u000bAq!!\u000e+\u0001\u0004\t\t&A\u0006xSRD7\t\\5om\u0006\u0014H\u0003\u0002B\u0006\u0005\u001f!B!!\u0015\u0003\u000e!1Qj\u000ba\u0002\u0003_Bq!!\u000f,\u0001\u0004\t\t&A\u0005xSRDw)\u001a8fgR!\u0011\u0011\u000bB\u000b\u0011\u001d\ti\u0004\fa\u0001\u0003#\nqb^5uQ\u001a\u0013X-];f]\u000eLWm\u001d\u000b\u000b\u0003#\u0012YB!\b\u0003 \t\r\u0002\"B$.\u0001\u0004A\u0005\"\u0002+.\u0001\u0004A\u0005b\u0002B\u0011[\u0001\u0007\u0011\u0011K\u0001\u0004g:4\b\"B2.\u0001\u0004!\u0017\u0001D<ji\"\u001c\u0006\u000f\\5dK\u0006KG\u0003\u0002B\u0015\u0005[!B!!\u0015\u0003,!1QJ\fa\u0002\u0003_Bq!!\u0011/\u0001\u0004\t\t&\u0001\u0007ECR\fgI]1nK>\u00038\u000f\u0006\u0003\u0002T\nM\u0002bBAh_\u0001\u0007\u0011\u0011K\u0001\u001cI1,7o]5oSR$sM]3bi\u0016\u0014H\u0005Z3gCVdG\u000fJ\u0019\u0016\u0005\te\"f\u0001%\u0002\u0014\u0006YB\u0005\\3tg&t\u0017\u000e\u001e\u0013he\u0016\fG/\u001a:%I\u00164\u0017-\u001e7uII\u0002")
/* loaded from: input_file:bio/ferlab/datalake/spark3/genomics/enriched/Variants.class */
public class Variants extends ETLSingleDestination {
    private final Column participantId;
    private final Column affectedStatus;
    private final String snvDatasetId;
    private final Seq<FrequencySplit> frequencies;
    private final DatasetConf mainDestination;
    private final DatasetConf thousand_genomes;
    private final DatasetConf topmed_bravo;
    private final DatasetConf gnomad_genomes_v2;
    private final DatasetConf gnomad_exomes_v2;
    private final DatasetConf gnomad_genomes_v3;
    private final DatasetConf dbsnp;
    private final DatasetConf clinvar;
    private final DatasetConf genes;
    private final DatasetConf spliceai;

    /* compiled from: Variants.scala */
    /* loaded from: input_file:bio/ferlab/datalake/spark3/genomics/enriched/Variants$DataFrameOps.class */
    public static class DataFrameOps {
        private final Dataset<Row> df;

        public Dataset<Row> withGeneExternalReference(SparkSession sparkSession) {
            String str = "gene_external_reference";
            return (Dataset) new $colon.colon(Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(functions$.MODULE$.exists(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"genes"}))).$(Nil$.MODULE$), column -> {
                return column.apply("orphanet").isNotNull().and(functions$.MODULE$.size(column.apply("orphanet")).$greater(BoxesRunTime.boxToInteger(0)));
            })), "Orphanet"), new $colon.colon(Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(functions$.MODULE$.exists(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"genes"}))).$(Nil$.MODULE$), column2 -> {
                return column2.apply("omim").isNotNull().and(functions$.MODULE$.size(column2.apply("omim")).$greater(BoxesRunTime.boxToInteger(0)));
            })), "OMIM"), Nil$.MODULE$)).foldLeft(this.df.withColumn("gene_external_reference", functions$.MODULE$.when(functions$.MODULE$.exists(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"genes"}))).$(Nil$.MODULE$), column3 -> {
                return column3.apply("hpo").isNotNull().and(functions$.MODULE$.size(column3.apply("hpo")).$greater(BoxesRunTime.boxToInteger(0)));
            }), functions$.MODULE$.array(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.lit("HPO")}))).otherwise(functions$.MODULE$.array(Nil$.MODULE$))), (dataset, tuple2) -> {
                Tuple2 tuple2 = new Tuple2(dataset, tuple2);
                if (tuple2 != null) {
                    Dataset dataset = (Dataset) tuple2._1();
                    Tuple2 tuple22 = (Tuple2) tuple2._2();
                    if (tuple22 != null) {
                        return dataset.withColumn(str, functions$.MODULE$.when((Column) tuple22._1(), functions$.MODULE$.array_union(functions$.MODULE$.col(str), functions$.MODULE$.array(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.lit((String) tuple22._2())})))).otherwise(functions$.MODULE$.col(str)));
                    }
                }
                throw new MatchError(tuple2);
            });
        }

        public Dataset<Row> withVariantExternalReference(SparkSession sparkSession) {
            String str = "variant_external_reference";
            return (Dataset) new $colon.colon(Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"clinvar"}))).$(Nil$.MODULE$).isNotNull()), "Clinvar"), Nil$.MODULE$).foldLeft(this.df.withColumn("variant_external_reference", functions$.MODULE$.when(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"rsnumber"}))).$(Nil$.MODULE$).isNotNull(), functions$.MODULE$.array(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.lit("DBSNP")}))).otherwise(functions$.MODULE$.array(Nil$.MODULE$))), (dataset, tuple2) -> {
                Tuple2 tuple2 = new Tuple2(dataset, tuple2);
                if (tuple2 != null) {
                    Dataset dataset = (Dataset) tuple2._1();
                    Tuple2 tuple22 = (Tuple2) tuple2._2();
                    if (tuple22 != null) {
                        return dataset.withColumn(str, functions$.MODULE$.when((Column) tuple22._1(), functions$.MODULE$.array_union(functions$.MODULE$.col(str), functions$.MODULE$.array(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.lit((String) tuple22._2())})))).otherwise(functions$.MODULE$.col(str)));
                    }
                }
                throw new MatchError(tuple2);
            });
        }

        public Dataset<Row> withPopulations(Dataset<Row> dataset, Dataset<Row> dataset2, Dataset<Row> dataset3, Dataset<Row> dataset4, Dataset<Row> dataset5, SparkSession sparkSession) {
            Dataset<Row> selectLocus = GenomicImplicits$.MODULE$.GenomicOperations(dataset).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ac"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"af"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"an"}))).$(Nil$.MODULE$).cast("long")}));
            Dataset<Row> selectLocus2 = GenomicImplicits$.MODULE$.GenomicOperations(dataset2).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ac"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"af"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"an"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"homozygotes"}))).$(Nil$.MODULE$).cast("long").as("hom"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"heterozygotes"}))).$(Nil$.MODULE$).cast("long").as("het")}));
            Dataset<Row> selectLocus3 = GenomicImplicits$.MODULE$.GenomicOperations(dataset3).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ac"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"af"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"an"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hom"}))).$(Nil$.MODULE$).cast("long")}));
            return GenomicImplicits$.MODULE$.GenomicOperations(GenomicImplicits$.MODULE$.GenomicOperations(GenomicImplicits$.MODULE$.GenomicOperations(GenomicImplicits$.MODULE$.GenomicOperations(GenomicImplicits$.MODULE$.GenomicOperations(this.df).joinAndMerge(selectLocus, "thousand_genomes", "left")).joinAndMerge(selectLocus2, "topmed_bravo", "left")).joinAndMerge(selectLocus3, "gnomad_genomes_2_1_1", "left")).joinAndMerge(GenomicImplicits$.MODULE$.GenomicOperations(dataset4).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ac"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"af"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"an"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hom"}))).$(Nil$.MODULE$).cast("long")})), "gnomad_exomes_2_1_1", "left")).joinAndMerge(GenomicImplicits$.MODULE$.GenomicOperations(dataset5).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ac"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"af"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"an"}))).$(Nil$.MODULE$).cast("long"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"nhomalt"}))).$(Nil$.MODULE$).cast("long").as("hom")})), "gnomad_genomes_3", "left").select(Predef$.MODULE$.wrapRefArray(new Column[]{this.df.apply("*"), functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("thousand_genomes"), functions$.MODULE$.col("topmed_bravo"), functions$.MODULE$.col("gnomad_genomes_2_1_1"), functions$.MODULE$.col("gnomad_exomes_2_1_1"), functions$.MODULE$.col("gnomad_genomes_3")})).as("external_frequencies")}));
        }

        public Dataset<Row> withDbSNP(Dataset<Row> dataset) {
            return GenomicImplicits$.MODULE$.GenomicOperations(this.df).joinByLocus(dataset, "left").select(Predef$.MODULE$.wrapRefArray(new Column[]{this.df.drop("name").apply("*"), functions$.MODULE$.coalesce(Predef$.MODULE$.wrapRefArray(new Column[]{this.df.apply("name"), dataset.apply("name")})).as("rsnumber")}));
        }

        public Dataset<Row> withClinvar(Dataset<Row> dataset, SparkSession sparkSession) {
            return GenomicImplicits$.MODULE$.GenomicOperations(this.df).joinAndMerge(GenomicImplicits$.MODULE$.GenomicOperations(dataset).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"name"}))).$(Nil$.MODULE$).as("clinvar_id"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"clin_sig"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"conditions"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"inheritance"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"interpretations"}))).$(Nil$.MODULE$)})), "clinvar", "left");
        }

        public Dataset<Row> withGenes(Dataset<Row> dataset) {
            return GenomicImplicits$.MODULE$.GenomicOperations(this.df.join(dataset, this.df.apply("chromosome").$eq$eq$eq(dataset.apply("chromosome")).$amp$amp(functions$.MODULE$.array_contains(this.df.apply("genes_symbol"), dataset.apply("symbol"))), "left").drop(dataset.apply("chromosome"))).groupByLocus(Nil$.MODULE$).agg(functions$.MODULE$.first(functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{this.df.apply("*")}))).as("variant"), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.collect_list(functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{dataset.drop("chromosome").apply("*")}))).as("genes")})).select("variant.*", Predef$.MODULE$.wrapRefArray(new String[]{"genes"}));
        }

        public Dataset<Row> withFrequencies(Column column, Column column2, Dataset<Row> dataset, Seq<FrequencySplit> seq) {
            Dataset<Row> joinByLocus;
            if (Nil$.MODULE$.equals(seq)) {
                joinByLocus = this.df;
            } else {
                joinByLocus = GenomicImplicits$.MODULE$.GenomicOperations(this.df).joinByLocus(Frequencies$.MODULE$.FrequencyOperations(dataset).freq(column, column2, seq), "inner");
            }
            return joinByLocus;
        }

        public Dataset<Row> withSpliceAi(Dataset<Row> dataset, SparkSession sparkSession) {
            return GenomicImplicits$.MODULE$.GenomicOperations(this.df.select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"*"}))).$(Nil$.MODULE$), functions$.MODULE$.explode_outer(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"genes"}))).$(Nil$.MODULE$)).as("gene"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"gene.symbol"}))).$(Nil$.MODULE$).as("symbol")})).join(GenomicImplicits$.MODULE$.GenomicOperations(dataset).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"symbol"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"max_score"}))).$(Nil$.MODULE$).as("spliceai")})).withColumn("type", functions$.MODULE$.when(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"spliceai.ds"}))).$(Nil$.MODULE$).$eq$eq$eq(BoxesRunTime.boxToInteger(0)), (Object) null).otherwise(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"spliceai.type"}))).$(Nil$.MODULE$))).withColumn("spliceai", functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"spliceai.ds"}))).$(Nil$.MODULE$).as("ds"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"type"}))).$(Nil$.MODULE$)}))).drop("type"), (Seq) GenomicImplicits$columns$.MODULE$.locusColumnNames().$colon$plus("symbol", List$.MODULE$.canBuildFrom()), "left").drop("symbol").withColumn("gene", functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"gene.*"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"spliceai"}))).$(Nil$.MODULE$)})))).groupByLocus(Nil$.MODULE$).agg(functions$.MODULE$.first(functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{this.df.drop("genes").apply("*")}))).as("variant"), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.collect_list("gene").as("genes")})).select("variant.*", Predef$.MODULE$.wrapRefArray(new String[]{"genes"}));
        }

        public DataFrameOps(Dataset<Row> dataset) {
            this.df = dataset;
        }
    }

    public static DataFrameOps DataFrameOps(Dataset<Row> dataset) {
        return Variants$.MODULE$.DataFrameOps(dataset);
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public DatasetConf mainDestination() {
        return this.mainDestination;
    }

    public DatasetConf thousand_genomes() {
        return this.thousand_genomes;
    }

    public DatasetConf topmed_bravo() {
        return this.topmed_bravo;
    }

    public DatasetConf gnomad_genomes_v2() {
        return this.gnomad_genomes_v2;
    }

    public DatasetConf gnomad_exomes_v2() {
        return this.gnomad_exomes_v2;
    }

    public DatasetConf gnomad_genomes_v3() {
        return this.gnomad_genomes_v3;
    }

    public DatasetConf dbsnp() {
        return this.dbsnp;
    }

    public DatasetConf clinvar() {
        return this.clinvar;
    }

    public DatasetConf genes() {
        return this.genes;
    }

    public DatasetConf spliceai() {
        return this.spliceai;
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public Map<String, Dataset<Row>> extract(LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(thousand_genomes().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(thousand_genomes()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(topmed_bravo().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(topmed_bravo()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(gnomad_genomes_v2().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(gnomad_genomes_v2()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(gnomad_exomes_v2().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(gnomad_exomes_v2()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(gnomad_genomes_v3().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(gnomad_genomes_v3()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(dbsnp().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(dbsnp()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(clinvar().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(clinvar()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(genes().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(genes()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(spliceai().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(spliceai()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(this.snvDatasetId), DatasetConfImplicits$.MODULE$.DatasetConfOperations(conf().getDataset(this.snvDatasetId)).read(super.conf(), sparkSession))}));
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public LocalDateTime extract$default$1() {
        return minDateTime();
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public LocalDateTime extract$default$2() {
        return LocalDateTime.now();
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public Dataset<Row> transformSingle(Map<String, Dataset<Row>> map, LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return Variants$.MODULE$.DataFrameOps(Variants$.MODULE$.DataFrameOps(Variants$.MODULE$.DataFrameOps(Variants$.MODULE$.DataFrameOps(Variants$.MODULE$.DataFrameOps(Variants$.MODULE$.DataFrameOps(Variants$.MODULE$.DataFrameOps(Variants$.MODULE$.DataFrameOps(GenomicImplicits$.MODULE$.GenomicOperations(GenomicImplicits$.MODULE$.GenomicOperations((Dataset) map.apply(this.snvDatasetId)).selectLocus(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("hgvsg"), functions$.MODULE$.col("genes_symbol"), functions$.MODULE$.col("name"), functions$.MODULE$.col("end"), functions$.MODULE$.col("variant_class")}))).groupByLocus(Nil$.MODULE$).agg(SparkUtils$.MODULE$.firstAs("hgvsg"), Predef$.MODULE$.wrapRefArray(new Column[]{SparkUtils$.MODULE$.firstAs("genes_symbol"), SparkUtils$.MODULE$.firstAs("name"), SparkUtils$.MODULE$.firstAs("end"), SparkUtils$.MODULE$.firstAs("variant_class")})).withColumn("dna_change", functions$.MODULE$.concat_ws(">", Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("reference"), functions$.MODULE$.col("alternate")}))).withColumn("assembly_version", functions$.MODULE$.lit("GRCh38"))).withFrequencies(this.participantId, this.affectedStatus, (Dataset) map.apply(this.snvDatasetId), this.frequencies)).withPopulations((Dataset) map.apply(thousand_genomes().id()), (Dataset) map.apply(topmed_bravo().id()), (Dataset) map.apply(gnomad_genomes_v2().id()), (Dataset) map.apply(gnomad_exomes_v2().id()), (Dataset) map.apply(gnomad_genomes_v3().id()), sparkSession)).withDbSNP((Dataset) map.apply(dbsnp().id()))).withClinvar((Dataset) map.apply(clinvar().id()), sparkSession)).withGenes((Dataset) map.apply(genes().id()))).withSpliceAi((Dataset) map.apply(spliceai().id()), sparkSession)).withGeneExternalReference(sparkSession)).withVariantExternalReference(sparkSession).withColumn("locus", functions$.MODULE$.concat_ws("-", GenomicImplicits$columns$.MODULE$.locus())).withColumn("hash", functions$.MODULE$.sha1(functions$.MODULE$.col("locus"))).drop("genes_symbol");
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public LocalDateTime transformSingle$default$2() {
        return minDateTime();
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public LocalDateTime transformSingle$default$3() {
        return LocalDateTime.now();
    }

    /* JADX WARN: 'super' call moved to the top of the method (can break code semantics) */
    public Variants(Column column, Column column2, String str, Seq<FrequencySplit> seq, Configuration configuration) {
        super(configuration);
        this.participantId = column;
        this.affectedStatus = column2;
        this.snvDatasetId = str;
        this.frequencies = seq;
        this.mainDestination = conf().getDataset("enriched_variants");
        this.thousand_genomes = conf().getDataset("normalized_1000_genomes");
        this.topmed_bravo = conf().getDataset("normalized_topmed_bravo");
        this.gnomad_genomes_v2 = conf().getDataset("normalized_gnomad_genomes_v2_1_1");
        this.gnomad_exomes_v2 = conf().getDataset("normalized_gnomad_exomes_v2_1_1");
        this.gnomad_genomes_v3 = conf().getDataset("normalized_gnomad_genomes_v3");
        this.dbsnp = conf().getDataset("normalized_dbsnp");
        this.clinvar = conf().getDataset("normalized_clinvar");
        this.genes = conf().getDataset("enriched_genes");
        this.spliceai = conf().getDataset("enriched_spliceai");
    }
}
