package bio.ferlab.datalake.spark3.p000public.enriched;

import bio.ferlab.datalake.commons.config.Configuration;
import bio.ferlab.datalake.commons.config.DatasetConf;
import bio.ferlab.datalake.spark3.etl.ETL;
import bio.ferlab.datalake.spark3.implicits.DatasetConfImplicits$;
import bio.ferlab.datalake.spark3.implicits.SparkUtils$;
import java.time.LocalDateTime;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.reflect.ScalaSignature;

/* compiled from: Genes.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005Ed\u0001B\r\u001b\u0001\u001dB\u0011B\f\u0001\u0003\u0002\u0003\u0006YaL\u001c\t\u000ba\u0002A\u0011A\u001d\t\u000fy\u0002!\u0019!C\u0001\u007f!11\t\u0001Q\u0001\n\u0001Cq\u0001\u0012\u0001C\u0002\u0013\u0005q\b\u0003\u0004F\u0001\u0001\u0006I\u0001\u0011\u0005\b\r\u0002\u0011\r\u0011\"\u0001@\u0011\u00199\u0005\u0001)A\u0005\u0001\"9\u0001\n\u0001b\u0001\n\u0003y\u0004BB%\u0001A\u0003%\u0001\tC\u0004K\u0001\t\u0007I\u0011A \t\r-\u0003\u0001\u0015!\u0003A\u0011\u001da\u0005A1A\u0005\u0002}Ba!\u0014\u0001!\u0002\u0013\u0001\u0005b\u0002(\u0001\u0005\u0004%\ta\u0010\u0005\u0007\u001f\u0002\u0001\u000b\u0011\u0002!\t\u000bA\u0003A\u0011I)\t\u000f\u0005]\u0001\u0001\"\u0011\u0002\u001a\u00191\u0011q\u0005\u0001\u0002\u0003SA\u0011\"a\r\u0014\u0005\u0003\u0005\u000b\u0011B2\t\ra\u001aB\u0011AA\u001b\u0011\u001d\tid\u0005C\u0001\u0003\u007fA\u0011\"!\u0018\u0001\u0003\u0003%\u0019!a\u0018\t\u000f\u0005\r\u0004\u0001\"\u0011\u0002f\t)q)\u001a8fg*\u00111\u0004H\u0001\tK:\u0014\u0018n\u00195fI*\u0011QDH\u0001\u0007aV\u0014G.[2\u000b\u0005}\u0001\u0013AB:qCJ\\7G\u0003\u0002\"E\u0005AA-\u0019;bY\u0006\\WM\u0003\u0002$I\u00051a-\u001a:mC\nT\u0011!J\u0001\u0004E&|7\u0001A\n\u0003\u0001!\u0002\"!\u000b\u0017\u000e\u0003)R!a\u000b\u0010\u0002\u0007\u0015$H.\u0003\u0002.U\t\u0019Q\t\u0016'\u0002\t\r|gN\u001a\t\u0003aUj\u0011!\r\u0006\u0003eM\naaY8oM&<'B\u0001\u001b!\u0003\u001d\u0019w.\\7p]NL!AN\u0019\u0003\u001b\r{gNZ5hkJ\fG/[8o\u0013\tqC&\u0001\u0004=S:LGO\u0010\u000b\u0002uQ\u00111(\u0010\t\u0003y\u0001i\u0011A\u0007\u0005\u0006]\t\u0001\u001daL\u0001\fI\u0016\u001cH/\u001b8bi&|g.F\u0001A!\t\u0001\u0014)\u0003\u0002Cc\tYA)\u0019;bg\u0016$8i\u001c8g\u00031!Wm\u001d;j]\u0006$\u0018n\u001c8!\u00035yW.[7`O\u0016tWmX:fi\u0006qq.\\5n?\u001e,g.Z0tKR\u0004\u0013!E8sa\"\fg.\u001a;`O\u0016tWmX:fi\u0006\u0011rN\u001d9iC:,GoX4f]\u0016|6/\u001a;!\u00031A\u0007o\\0hK:,wl]3u\u00035A\u0007o\\0hK:,wl]3uA\u0005Y\u0001.^7b]~;WM\\3t\u00031AW/\\1o?\u001e,g.Z:!\u00031!G\rZ0hK:,wl]3u\u00035!G\rZ0hK:,wl]3uA\u0005y1m\\:nS\u000e|v-\u001a8f?N,G/\u0001\td_Nl\u0017nY0hK:,wl]3uA\u00059Q\r\u001f;sC\u000e$H\u0003\u0002*��\u0003'!\"a\u0015>\u0011\tQk\u0006m\u0019\b\u0003+n\u0003\"AV-\u000e\u0003]S!\u0001\u0017\u0014\u0002\rq\u0012xn\u001c;?\u0015\u0005Q\u0016!B:dC2\f\u0017B\u0001/Z\u0003\u0019\u0001&/\u001a3fM&\u0011al\u0018\u0002\u0004\u001b\u0006\u0004(B\u0001/Z!\t!\u0016-\u0003\u0002c?\n11\u000b\u001e:j]\u001e\u0004\"\u0001Z<\u000f\u0005\u0015$hB\u00014r\u001d\t9gN\u0004\u0002iW:\u0011a+[\u0005\u0002U\u0006\u0019qN]4\n\u00051l\u0017AB1qC\u000eDWMC\u0001k\u0013\ty\u0007/A\u0003ta\u0006\u00148N\u0003\u0002m[&\u0011!o]\u0001\u0004gFd'BA8q\u0013\t)h/A\u0004qC\u000e\\\u0017mZ3\u000b\u0005I\u001c\u0018B\u0001=z\u0005%!\u0015\r^1Ge\u0006lWM\u0003\u0002vm\")q.\u0005a\u0002wB\u0011A0`\u0007\u0002m&\u0011aP\u001e\u0002\r'B\f'o[*fgNLwN\u001c\u0005\n\u0003\u0003\t\u0002\u0013!a\u0001\u0003\u0007\tq\u0002\\1tiJ+h\u000eR1uKRKW.\u001a\t\u0005\u0003\u000b\ty!\u0004\u0002\u0002\b)!\u0011\u0011BA\u0006\u0003\u0011!\u0018.\\3\u000b\u0005\u00055\u0011\u0001\u00026bm\u0006LA!!\u0005\u0002\b\tiAj\\2bY\u0012\u000bG/\u001a+j[\u0016D\u0011\"!\u0006\u0012!\u0003\u0005\r!a\u0001\u0002%\r,(O]3oiJ+h\u000eR1uKRKW.Z\u0001\niJ\fgn\u001d4pe6$\u0002\"a\u0007\u0002 \u0005\r\u0012Q\u0005\u000b\u0004G\u0006u\u0001\"B8\u0013\u0001\bY\bBBA\u0011%\u0001\u00071+\u0001\u0003eCR\f\u0007\"CA\u0001%A\u0005\t\u0019AA\u0002\u0011%\t)B\u0005I\u0001\u0002\u0004\t\u0019A\u0001\u0007ECR\fgI]1nK>\u00038oE\u0002\u0014\u0003W\u0001B!!\f\u000205\t\u0011,C\u0002\u00022e\u0013a!\u00118z%\u00164\u0017A\u00013g)\u0011\t9$a\u000f\u0011\u0007\u0005e2#D\u0001\u0001\u0011\u0019\t\u0019$\u0006a\u0001G\u0006\u0001\"n\\5o\u0003:$W*\u001a:hK^KG\u000f\u001b\u000b\bG\u0006\u0005\u0013QIA-\u0011\u0019\t\u0019E\u0006a\u0001G\u0006Aq-\u001a8f?N,G\u000fC\u0004\u0002HY\u0001\r!!\u0013\u0002\r)|\u0017N\\(o!\u0015\tY%a\u0015a\u001d\u0011\ti%!\u0015\u000f\u0007Y\u000by%C\u0001[\u0013\t)\u0018,\u0003\u0003\u0002V\u0005]#aA*fc*\u0011Q/\u0017\u0005\u0007\u000372\u0002\u0019\u00011\u0002\u0019\u0005\u001c8i\u001c7v[:t\u0015-\\3\u0002\u0019\u0011\u000bG/\u0019$sC6,w\n]:\u0015\t\u0005]\u0012\u0011\r\u0005\u0007\u0003g9\u0002\u0019A2\u0002\t1|\u0017\r\u001a\u000b\t\u0003O\nY'!\u001c\u0002pQ\u00191-!\u001b\t\u000b=D\u00029A>\t\r\u0005\u0005\u0002\u00041\u0001d\u0011%\t\t\u0001\u0007I\u0001\u0002\u0004\t\u0019\u0001C\u0005\u0002\u0016a\u0001\n\u00111\u0001\u0002\u0004\u0001")
/* loaded from: input_file:bio/ferlab/datalake/spark3/public/enriched/Genes.class */
public class Genes extends ETL {
    private final DatasetConf destination;
    private final DatasetConf omim_gene_set;
    private final DatasetConf orphanet_gene_set;
    private final DatasetConf hpo_gene_set;
    private final DatasetConf human_genes;
    private final DatasetConf ddd_gene_set;
    private final DatasetConf cosmic_gene_set;

    /* compiled from: Genes.scala */
    /* loaded from: input_file:bio/ferlab/datalake/spark3/public/enriched/Genes$DataFrameOps.class */
    public class DataFrameOps {
        private final Dataset<Row> df;
        public final /* synthetic */ Genes $outer;

        public Dataset<Row> joinAndMergeWith(Dataset<Row> dataset, Seq<String> seq, String str) {
            return this.df.join(dataset, seq, "left").groupBy("symbol", Predef$.MODULE$.wrapRefArray(new String[0])).agg(functions$.MODULE$.first(functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{this.df.apply("*")}))).as("hg"), Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.collect_list(functions$.MODULE$.struct(Predef$.MODULE$.wrapRefArray(new Column[]{dataset.drop(seq).apply("*")}))).as(str)})).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("hg.*"), functions$.MODULE$.col(str)})).withColumn(str, (Column) SparkUtils$.MODULE$.removeEmptyObjectsIn().apply(str));
        }

        public /* synthetic */ Genes bio$ferlab$datalake$spark3$public$enriched$Genes$DataFrameOps$$$outer() {
            return this.$outer;
        }

        public DataFrameOps(Genes genes, Dataset<Row> dataset) {
            this.df = dataset;
            if (genes == null) {
                throw null;
            }
            this.$outer = genes;
        }
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETL
    public DatasetConf destination() {
        return this.destination;
    }

    public DatasetConf omim_gene_set() {
        return this.omim_gene_set;
    }

    public DatasetConf orphanet_gene_set() {
        return this.orphanet_gene_set;
    }

    public DatasetConf hpo_gene_set() {
        return this.hpo_gene_set;
    }

    public DatasetConf human_genes() {
        return this.human_genes;
    }

    public DatasetConf ddd_gene_set() {
        return this.ddd_gene_set;
    }

    public DatasetConf cosmic_gene_set() {
        return this.cosmic_gene_set;
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETL
    public Map<String, Dataset<Row>> extract(LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(omim_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(omim_gene_set()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(orphanet_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(orphanet_gene_set()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(hpo_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(hpo_gene_set()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(human_genes().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(human_genes()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(ddd_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(ddd_gene_set()).read(super.conf(), sparkSession)), Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(cosmic_gene_set().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(cosmic_gene_set()).read(super.conf(), sparkSession))}));
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETL
    public Dataset<Row> transform(Map<String, Dataset<Row>> map, LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        Dataset<Row> select = ((Dataset) map.apply(human_genes().id())).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"chromosome"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"symbol"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"entrez_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"omim_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"external_references.hgnc"}))).$(Nil$.MODULE$).as("hgnc"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"ensembl_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"map_location"}))).$(Nil$.MODULE$).as("location"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"description"}))).$(Nil$.MODULE$).as("name"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"synonyms"}))).$(Nil$.MODULE$).as("alias"), functions$.MODULE$.regexp_replace(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"type_of_gene"}))).$(Nil$.MODULE$), "-", "_").as("biotype")}));
        Dataset<Row> select2 = ((Dataset) map.apply(orphanet_gene_set().id())).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"gene_symbol"}))).$(Nil$.MODULE$).as("symbol"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"disorder_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"name"}))).$(Nil$.MODULE$).as("panel"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"type_of_inheritance"}))).$(Nil$.MODULE$).as("inheritance")}));
        Dataset<Row> select3 = ((Dataset) map.apply(omim_gene_set().id())).where(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.name"}))).$(Nil$.MODULE$).isNotNull()).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"omim_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.name"}))).$(Nil$.MODULE$).as("name"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.omim_id"}))).$(Nil$.MODULE$).as("omim_id"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.inheritance"}))).$(Nil$.MODULE$).as("inheritance"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"phenotype.inheritance_code"}))).$(Nil$.MODULE$).as("inheritance_code")}));
        Dataset<Row> withColumn = ((Dataset) map.apply(hpo_gene_set().id())).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"entrez_gene_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hpo_term_id"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hpo_term_name"}))).$(Nil$.MODULE$)})).distinct().withColumn("hpo_term_label", functions$.MODULE$.concat(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hpo_term_name"}))).$(Nil$.MODULE$), functions$.MODULE$.lit(" ("), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"hpo_term_id"}))).$(Nil$.MODULE$), functions$.MODULE$.lit(")")})));
        return DataFrameOps(DataFrameOps(DataFrameOps(DataFrameOps(DataFrameOps(select).joinAndMergeWith(select2, (Seq) new $colon.colon("symbol", Nil$.MODULE$), "orphanet")).joinAndMergeWith(withColumn, (Seq) new $colon.colon("entrez_gene_id", Nil$.MODULE$), "hpo")).joinAndMergeWith(select3, (Seq) new $colon.colon("omim_gene_id", Nil$.MODULE$), "omim")).joinAndMergeWith(((Dataset) map.apply(ddd_gene_set().id())).select("disease_name", Predef$.MODULE$.wrapRefArray(new String[]{"symbol"})), (Seq) new $colon.colon("symbol", Nil$.MODULE$), "ddd")).joinAndMergeWith(((Dataset) map.apply(cosmic_gene_set().id())).select("symbol", Predef$.MODULE$.wrapRefArray(new String[]{"tumour_types_germline"})), (Seq) new $colon.colon("symbol", Nil$.MODULE$), "cosmic");
    }

    public DataFrameOps DataFrameOps(Dataset<Row> dataset) {
        return new DataFrameOps(this, dataset);
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETL
    public Dataset<Row> load(Dataset<Row> dataset, LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return super.load(dataset.repartition(1), localDateTime, localDateTime2, sparkSession);
    }

    public Genes(Configuration configuration) {
        super(configuration);
        this.destination = super.conf().getDataset("enriched_genes");
        this.omim_gene_set = super.conf().getDataset("normalized_omim_gene_set");
        this.orphanet_gene_set = super.conf().getDataset("normalized_orphanet_gene_set");
        this.hpo_gene_set = super.conf().getDataset("normalized_hpo_gene_set");
        this.human_genes = super.conf().getDataset("normalized_human_genes");
        this.ddd_gene_set = super.conf().getDataset("normalized_ddd_gene_set");
        this.cosmic_gene_set = super.conf().getDataset("normalized_cosmic_gene_set");
    }
}
