package bio.ferlab.datalake.spark3.publictables.normalized.refseq;

import bio.ferlab.datalake.commons.config.Configuration;
import bio.ferlab.datalake.commons.config.DatasetConf;
import bio.ferlab.datalake.spark3.etl.ETLP;
import bio.ferlab.datalake.spark3.implicits.DatasetConfImplicits$;
import bio.ferlab.datalake.spark3.utils.Coalesce;
import bio.ferlab.datalake.spark3.utils.Coalesce$;
import java.time.LocalDateTime;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.expressions.UserDefinedFunction;
import org.apache.spark.sql.functions$;
import scala.Array$;
import scala.Function1;
import scala.MatchError;
import scala.Option;
import scala.Option$;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.SeqLike;
import scala.collection.immutable.$colon;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayOps;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.reflect.api.Mirror;
import scala.reflect.api.TypeCreator;
import scala.reflect.api.TypeTags;
import scala.reflect.api.Types;
import scala.reflect.api.Universe;
import scala.reflect.runtime.package$;

/* compiled from: RefSeqHumanGenes.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005\u0005d\u0001B\n\u0015\u0001\rB\u0011B\u000b\u0001\u0003\u0002\u0003\u0006YaK\u001a\t\u000ba\u0002A\u0011A\u001d\t\u000fy\u0002!\u0019!C\u0005\u007f!11\t\u0001Q\u0001\n\u0001Cq\u0001\u0012\u0001C\u0002\u0013\u0005s\b\u0003\u0004F\u0001\u0001\u0006I\u0001\u0011\u0005\u0006\r\u0002!\te\u0012\u0005\n\u0003\u0007\u0001\u0011\u0013!C\u0001\u0003\u000bA\u0011\"a\u0007\u0001#\u0003%\t!!\u0002\t\u000f\u0005u\u0001\u0001\"\u0011\u0002 !I\u0011Q\u0006\u0001\u0012\u0002\u0013\u0005\u0011Q\u0001\u0005\n\u0003_\u0001\u0011\u0013!C\u0001\u0003\u000bA\u0011\"!\r\u0001\u0005\u0004%\t%a\r\t\u0011\u0005u\u0002\u0001)A\u0005\u0003kA\u0011\"a\u0010\u0001\u0005\u0004%\t!!\u0011\t\u0011\u00055\u0003\u0001)A\u0005\u0003\u0007B\u0011\"a\u0014\u0001\u0005\u0004%\t!!\u0015\t\u0011\u0005}\u0003\u0001)A\u0005\u0003'\u0012\u0001CU3g'\u0016\f\b*^7b]\u001e+g.Z:\u000b\u0005U1\u0012A\u0002:fMN,\u0017O\u0003\u0002\u00181\u0005Qan\u001c:nC2L'0\u001a3\u000b\u0005eQ\u0012\u0001\u00049vE2L7\r^1cY\u0016\u001c(BA\u000e\u001d\u0003\u0019\u0019\b/\u0019:lg)\u0011QDH\u0001\tI\u0006$\u0018\r\\1lK*\u0011q\u0004I\u0001\u0007M\u0016\u0014H.\u00192\u000b\u0003\u0005\n1AY5p\u0007\u0001\u0019\"\u0001\u0001\u0013\u0011\u0005\u0015BS\"\u0001\u0014\u000b\u0005\u001dR\u0012aA3uY&\u0011\u0011F\n\u0002\u0005\u000bRc\u0005+\u0001\u0003d_:4\u0007C\u0001\u00172\u001b\u0005i#B\u0001\u00180\u0003\u0019\u0019wN\u001c4jO*\u0011\u0001\u0007H\u0001\bG>lWn\u001c8t\u0013\t\u0011TFA\u0007D_:4\u0017nZ;sCRLwN\\\u0005\u0003UQJ!!\u000e\u001c\u0003\u0007\u0015#FJ\u0003\u00028M\u0005\u0011aOM\u0001\u0007y%t\u0017\u000e\u001e \u0015\u0003i\"\"aO\u001f\u0011\u0005q\u0002Q\"\u0001\u000b\t\u000b)\u0012\u00019A\u0016\u0002-I\fwo\u0018:fMN,\u0017o\u00185v[\u0006twlZ3oKN,\u0012\u0001\u0011\t\u0003Y\u0005K!AQ\u0017\u0003\u0017\u0011\u000bG/Y:fi\u000e{gNZ\u0001\u0018e\u0006<xL]3gg\u0016\fx\f[;nC:|v-\u001a8fg\u0002\nq\"\\1j]\u0012+7\u000f^5oCRLwN\\\u0001\u0011[\u0006Lg\u000eR3ti&t\u0017\r^5p]\u0002\nq!\u001a=ue\u0006\u001cG\u000fF\u0002Ik~$\"!\u00139\u0011\t)\u001bf+\u0017\b\u0003\u0017F\u0003\"\u0001T(\u000e\u00035S!A\u0014\u0012\u0002\rq\u0012xn\u001c;?\u0015\u0005\u0001\u0016!B:dC2\f\u0017B\u0001*P\u0003\u0019\u0001&/\u001a3fM&\u0011A+\u0016\u0002\u0004\u001b\u0006\u0004(B\u0001*P!\tQu+\u0003\u0002Y+\n11\u000b\u001e:j]\u001e\u0004\"AW7\u000f\u0005mSgB\u0001/h\u001d\tiFM\u0004\u0002_C:\u0011AjX\u0005\u0002A\u0006\u0019qN]4\n\u0005\t\u001c\u0017AB1qC\u000eDWMC\u0001a\u0013\t)g-A\u0003ta\u0006\u00148N\u0003\u0002cG&\u0011\u0001.[\u0001\u0004gFd'BA3g\u0013\tYG.A\u0004qC\u000e\\\u0017mZ3\u000b\u0005!L\u0017B\u00018p\u0005%!\u0015\r^1Ge\u0006lWM\u0003\u0002lY\")Qm\u0002a\u0002cB\u0011!o]\u0007\u0002Y&\u0011A\u000f\u001c\u0002\r'B\f'o[*fgNLwN\u001c\u0005\bm\u001e\u0001\n\u00111\u0001x\u0003=a\u0017m\u001d;Sk:$\u0015\r^3US6,\u0007C\u0001=~\u001b\u0005I(B\u0001>|\u0003\u0011!\u0018.\\3\u000b\u0003q\fAA[1wC&\u0011a0\u001f\u0002\u000e\u0019>\u001c\u0017\r\u001c#bi\u0016$\u0016.\\3\t\u0011\u0005\u0005q\u0001%AA\u0002]\f!cY;se\u0016tGOU;o\t\u0006$X\rV5nK\u0006\tR\r\u001f;sC\u000e$H\u0005Z3gCVdG\u000fJ\u0019\u0016\u0005\u0005\u001d!fA<\u0002\n-\u0012\u00111\u0002\t\u0005\u0003\u001b\t9\"\u0004\u0002\u0002\u0010)!\u0011\u0011CA\n\u0003%)hn\u00195fG.,GMC\u0002\u0002\u0016=\u000b!\"\u00198o_R\fG/[8o\u0013\u0011\tI\"a\u0004\u0003#Ut7\r[3dW\u0016$g+\u0019:jC:\u001cW-A\tfqR\u0014\u0018m\u0019;%I\u00164\u0017-\u001e7uII\nq\u0002\u001e:b]N4wN]7TS:<G.\u001a\u000b\t\u0003C\t)#!\u000b\u0002,Q\u0019\u0011,a\t\t\u000b\u0015T\u00019A9\t\r\u0005\u001d\"\u00021\u0001J\u0003\u0011!\u0017\r^1\t\u000fYT\u0001\u0013!a\u0001o\"A\u0011\u0011\u0001\u0006\u0011\u0002\u0003\u0007q/A\rue\u0006t7OZ8s[NKgn\u001a7fI\u0011,g-Y;mi\u0012\u0012\u0014!\u0007;sC:\u001chm\u001c:n'&tw\r\\3%I\u00164\u0017-\u001e7uIM\n!\u0003Z3gCVdGOU3qCJ$\u0018\u000e^5p]V\u0011\u0011Q\u0007\t\u0007\u0003o\tI$W-\u000e\u0003=K1!a\u000fP\u0005%1UO\\2uS>t\u0017'A\neK\u001a\fW\u000f\u001c;SKB\f'\u000f^5uS>t\u0007%\u0001\u0007ta2LG\u000fV8NCB4e.\u0006\u0002\u0002DA9\u0011qGA\u001d-\u0006\u0015\u0003CBA\u001c\u0003\u000f\nY%C\u0002\u0002J=\u0013aa\u00149uS>t\u0007\u0003\u0002&T-Z\u000bQb\u001d9mSR$v.T1q\r:\u0004\u0013AC:qY&$Hk\\'baV\u0011\u00111\u000b\t\u0005\u0003+\nY&\u0004\u0002\u0002X)\u0019\u0011\u0011\f7\u0002\u0017\u0015D\bO]3tg&|gn]\u0005\u0005\u0003;\n9FA\nVg\u0016\u0014H)\u001a4j]\u0016$g)\u001e8di&|g.A\u0006ta2LG\u000fV8NCB\u0004\u0003")
/* loaded from: input_file:bio/ferlab/datalake/spark3/publictables/normalized/refseq/RefSeqHumanGenes.class */
public class RefSeqHumanGenes extends ETLP {
    private final DatasetConf raw_refseq_human_genes;
    private final DatasetConf mainDestination;
    private final Function1<Dataset<Row>, Dataset<Row>> defaultRepartition;
    private final Function1<String, Option<Map<String, String>>> splitToMapFn;
    private final UserDefinedFunction splitToMap;

    private DatasetConf raw_refseq_human_genes() {
        return this.raw_refseq_human_genes;
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public DatasetConf mainDestination() {
        return this.mainDestination;
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public Map<String, Dataset<Row>> extract(LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return Predef$.MODULE$.Map().apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(raw_refseq_human_genes().id()), DatasetConfImplicits$.MODULE$.DatasetConfOperations(raw_refseq_human_genes()).read(super.conf(), sparkSession))}));
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public LocalDateTime extract$default$1() {
        return minDateTime();
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public LocalDateTime extract$default$2() {
        return LocalDateTime.now();
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public Dataset<Row> transformSingle(Map<String, Dataset<Row>> map, LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return ((Dataset) map.apply(raw_refseq_human_genes().id())).select(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"#tax_id"}))).$(Nil$.MODULE$).as("tax_id"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"GeneID"}))).$(Nil$.MODULE$).as("entrez_gene_id"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Symbol"}))).$(Nil$.MODULE$).as("symbol"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"LocusTag"}))).$(Nil$.MODULE$).as("locus_tag"), functions$.MODULE$.split(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Synonyms"}))).$(Nil$.MODULE$), "\\|").as("synonyms"), splitToMap().apply(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"dbXrefs"}))).$(Nil$.MODULE$)})).as("external_references"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"chromosome"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"map_location"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"description"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"type_of_gene"}))).$(Nil$.MODULE$), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Symbol_from_nomenclature_authority"}))).$(Nil$.MODULE$).as("symbol_from_nomenclature_authority"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Full_name_from_nomenclature_authority"}))).$(Nil$.MODULE$).as("full_name_from_nomenclature_authority"), sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Nomenclature_status"}))).$(Nil$.MODULE$).as("nomenclature_status"), functions$.MODULE$.split(sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Other_designations"}))).$(Nil$.MODULE$), "\\|").as("other_designations"), splitToMap().apply(Predef$.MODULE$.wrapRefArray(new Column[]{sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"Feature_type"}))).$(Nil$.MODULE$)})).as("feature_types")})).withColumn("ensembl_gene_id", sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"external_references.ensembl"}))).$(Nil$.MODULE$)).withColumn("omim_gene_id", sparkSession.implicits().StringToColumn(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"external_references.mim"}))).$(Nil$.MODULE$));
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public LocalDateTime transformSingle$default$2() {
        return minDateTime();
    }

    @Override // bio.ferlab.datalake.spark3.etl.ETLSingleDestination
    public LocalDateTime transformSingle$default$3() {
        return LocalDateTime.now();
    }

    @Override // bio.ferlab.datalake.spark3.etl.v2.ETL
    public Function1<Dataset<Row>, Dataset<Row>> defaultRepartition() {
        return this.defaultRepartition;
    }

    public Function1<String, Option<Map<String, String>>> splitToMapFn() {
        return this.splitToMapFn;
    }

    public UserDefinedFunction splitToMap() {
        return this.splitToMap;
    }

    public RefSeqHumanGenes(Configuration configuration) {
        super(configuration);
        this.raw_refseq_human_genes = super.conf().getDataset("raw_refseq_human_genes");
        this.mainDestination = super.conf().getDataset("normalized_human_genes");
        this.defaultRepartition = new Coalesce(Coalesce$.MODULE$.apply$default$1());
        this.splitToMapFn = str -> {
            return Option$.MODULE$.apply(str).map(str -> {
                return new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Tuple2[]) new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(str.split("\\|"))).map(str -> {
                    String[] split = str.split(":", 2);
                    Option unapplySeq = Array$.MODULE$.unapplySeq(split);
                    if (unapplySeq.isEmpty() || unapplySeq.get() == null || ((SeqLike) unapplySeq.get()).lengthCompare(2) != 0) {
                        throw new MatchError(split);
                    }
                    Tuple2 tuple2 = new Tuple2((String) ((SeqLike) unapplySeq.get()).apply(0), (String) ((SeqLike) unapplySeq.get()).apply(1));
                    String str = (String) tuple2._1();
                    return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str.toLowerCase().replaceAll("/", "_").replaceAll("-", "_")), (String) tuple2._2());
                }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(Tuple2.class))))).toMap(Predef$.MODULE$.$conforms());
            });
        };
        functions$ functions_ = functions$.MODULE$;
        Function1<String, Option<Map<String, String>>> splitToMapFn = splitToMapFn();
        TypeTags universe = package$.MODULE$.universe();
        final RefSeqHumanGenes refSeqHumanGenes = null;
        TypeTags.TypeTag apply = universe.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(RefSeqHumanGenes.class.getClassLoader()), new TypeCreator(refSeqHumanGenes) { // from class: bio.ferlab.datalake.spark3.publictables.normalized.refseq.RefSeqHumanGenes$$typecreator1$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe2 = mirror.universe();
                return universe2.internal().reificationSupport().TypeRef(universe2.internal().reificationSupport().ThisType(mirror.staticPackage("scala").asModule().moduleClass()), mirror.staticClass("scala.Option"), new $colon.colon(universe2.internal().reificationSupport().TypeRef(universe2.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe2.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "Map"), new $colon.colon(universe2.internal().reificationSupport().TypeRef(universe2.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe2.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), new $colon.colon(universe2.internal().reificationSupport().TypeRef(universe2.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe2.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$), Nil$.MODULE$))), Nil$.MODULE$));
            }
        });
        TypeTags universe2 = package$.MODULE$.universe();
        final RefSeqHumanGenes refSeqHumanGenes2 = null;
        this.splitToMap = functions_.udf(splitToMapFn, apply, universe2.TypeTag().apply(package$.MODULE$.universe().runtimeMirror(RefSeqHumanGenes.class.getClassLoader()), new TypeCreator(refSeqHumanGenes2) { // from class: bio.ferlab.datalake.spark3.publictables.normalized.refseq.RefSeqHumanGenes$$typecreator2$1
            public <U extends Universe> Types.TypeApi apply(Mirror<U> mirror) {
                Universe universe3 = mirror.universe();
                return universe3.internal().reificationSupport().TypeRef(universe3.internal().reificationSupport().SingleType(mirror.staticPackage("scala").asModule().moduleClass().asType().toTypeConstructor(), mirror.staticModule("scala.Predef")), universe3.internal().reificationSupport().selectType(mirror.staticModule("scala.Predef").asModule().moduleClass(), "String"), Nil$.MODULE$);
            }
        }));
    }
}
