package bio.ferlab.datalake.spark3.p000public.normalized;

import bio.ferlab.datalake.spark3.config.Configuration;
import bio.ferlab.datalake.spark3.config.DatasetConf;
import bio.ferlab.datalake.spark3.etl.ETLP;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Tuple2;
import scala.collection.immutable.Map;
import scala.reflect.ScalaSignature;

/* compiled from: OmimGeneSet.scala */
@ScalaSignature(bytes = "\u0006\u0001E4AAC\u0006\u00011!Iq\u0004\u0001B\u0001B\u0003-\u0001E\n\u0005\u0006S\u0001!\tA\u000b\u0005\b_\u0001\u0011\r\u0011\"\u00111\u0011\u0019!\u0004\u0001)A\u0005c!9Q\u0007\u0001b\u0001\n\u0003\u0001\u0004B\u0002\u001c\u0001A\u0003%\u0011\u0007C\u00038\u0001\u0011\u0005\u0003\bC\u0003g\u0001\u0011\u0005s\rC\u0003m\u0001\u0011\u0005SNA\u0006P[&lw)\u001a8f'\u0016$(B\u0001\u0007\u000e\u0003)qwN]7bY&TX\r\u001a\u0006\u0003\u001d=\ta\u0001];cY&\u001c'B\u0001\t\u0012\u0003\u0019\u0019\b/\u0019:lg)\u0011!cE\u0001\tI\u0006$\u0018\r\\1lK*\u0011A#F\u0001\u0007M\u0016\u0014H.\u00192\u000b\u0003Y\t1AY5p\u0007\u0001\u0019\"\u0001A\r\u0011\u0005iiR\"A\u000e\u000b\u0005qy\u0011aA3uY&\u0011ad\u0007\u0002\u0005\u000bRc\u0005+\u0001\u0003d_:4\u0007CA\u0011%\u001b\u0005\u0011#BA\u0012\u0010\u0003\u0019\u0019wN\u001c4jO&\u0011QE\t\u0002\u000e\u0007>tg-[4ve\u0006$\u0018n\u001c8\n\u0005}9\u0013B\u0001\u0015\u001c\u0005\r)E\u000bT\u0001\u0007y%t\u0017\u000e\u001e \u0015\u0003-\"\"\u0001\f\u0018\u0011\u00055\u0002Q\"A\u0006\t\u000b}\u0011\u00019\u0001\u0011\u0002\u0017\u0011,7\u000f^5oCRLwN\\\u000b\u0002cA\u0011\u0011EM\u0005\u0003g\t\u00121\u0002R1uCN,GoQ8oM\u0006aA-Z:uS:\fG/[8oA\u0005\u0001\"/Y<`_6LWnX4f]\u0016l\u0017\r]\u0001\u0012e\u0006<xl\\7j[~;WM\\3nCB\u0004\u0013aB3yiJ\f7\r\u001e\u000b\u0002sQ\u0011!(\u0019\t\u0005w\u0011;%J\u0004\u0002=\u0005B\u0011Q\bQ\u0007\u0002})\u0011qhF\u0001\u0007yI|w\u000e\u001e \u000b\u0003\u0005\u000bQa]2bY\u0006L!a\u0011!\u0002\rA\u0013X\rZ3g\u0013\t)eIA\u0002NCBT!a\u0011!\u0011\u0005mB\u0015BA%G\u0005\u0019\u0019FO]5oOB\u00111J\u0018\b\u0003\u0019ns!!\u0014-\u000f\u00059+fBA(S\u001d\ti\u0004+C\u0001R\u0003\ry'oZ\u0005\u0003'R\u000ba!\u00199bG\",'\"A)\n\u0005Y;\u0016!B:qCJ\\'BA*U\u0013\tI&,A\u0002tc2T!AV,\n\u0005qk\u0016a\u00029bG.\fw-\u001a\u0006\u00033jK!a\u00181\u0003\u0013\u0011\u000bG/\u0019$sC6,'B\u0001/^\u0011\u00151v\u0001q\u0001c!\t\u0019G-D\u0001^\u0013\t)WL\u0001\u0007Ta\u0006\u00148nU3tg&|g.A\u0005ue\u0006t7OZ8s[R\u0011\u0001N\u001b\u000b\u0003\u0015&DQA\u0016\u0005A\u0004\tDQa\u001b\u0005A\u0002i\nA\u0001Z1uC\u0006!An\\1e)\tq\u0007\u000f\u0006\u0002K_\")a+\u0003a\u0002E\")1.\u0003a\u0001\u0015\u0002")
/* loaded from: input_file:bio/ferlab/datalake/spark3/public/normalized/OmimGeneSet.class */
public class OmimGeneSet extends ETLP {
    private final DatasetConf destination;
    private final DatasetConf raw_omim_genemap;

    @Override // bio.ferlab.datalake.spark3.etl.ETL
    public DatasetConf destination() {
        return this.destination;
    }

    public DatasetConf raw_omim_genemap() {
        return this.raw_omim_genemap;
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETL
    public Map<String, Dataset<Row>> extract(SparkSession sparkSession) {
        return Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(raw_omim_genemap().id()), raw_omim_genemap().read(super.conf(), sparkSession))}));
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETL
    public Dataset<Row> transform(Map<String, Dataset<Row>> map, SparkSession sparkSession) {
        Dataset select = ((Dataset) map.apply(raw_omim_genemap().id())).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("_c0").as("chromosome"), functions$.MODULE$.col("_c1").as("start"), functions$.MODULE$.col("_c2").as("end"), functions$.MODULE$.col("_c3").as("cypto_location"), functions$.MODULE$.col("_c4").as("computed_cypto_location"), functions$.MODULE$.col("_c5").as("omim_gene_id"), functions$.MODULE$.split(functions$.MODULE$.col("_c6"), ", ").as("symbols"), functions$.MODULE$.col("_c7").as("name"), functions$.MODULE$.col("_c8").as("approved_symbol"), functions$.MODULE$.col("_c9").as("entrez_gene_id"), functions$.MODULE$.col("_c10").as("ensembl_gene_id"), functions$.MODULE$.col("_c11").as("documentation"), functions$.MODULE$.split(functions$.MODULE$.col("_c12"), ";").as("phenotypes")}));
        return select.withColumn("raw_phenotype", functions$.MODULE$.explode(functions$.MODULE$.col("phenotypes"))).drop("phenotypes").withColumn("phenotype", OmimPhenotype$.MODULE$.parse_pheno().apply(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.col("raw_phenotype")}))).drop("raw_phenotype").unionByName(select.filter(functions$.MODULE$.col("phenotypes").isNull()).drop("phenotypes").withColumn("phenotype", functions$.MODULE$.lit((Object) null).cast("struct<name:string,omim_id:string,inheritance:array<string>,inheritance_code:array<string>>")));
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETL
    public Dataset<Row> load(Dataset<Row> dataset, SparkSession sparkSession) {
        return super.load(dataset.coalesce(1), sparkSession);
    }

    public OmimGeneSet(Configuration configuration) {
        super(configuration);
        this.destination = super.conf().getDataset("normalized_omim_gene_set");
        this.raw_omim_genemap = super.conf().getDataset("raw_omim_genemap");
    }
}
