package bio.ferlab.datalake.spark3.etl.v2;

import bio.ferlab.datalake.commons.config.Configuration;
import bio.ferlab.datalake.commons.config.DatasetConf;
import bio.ferlab.datalake.commons.config.LoadType;
import bio.ferlab.datalake.commons.config.LoadType$Scd1$;
import bio.ferlab.datalake.commons.config.LoadType$Scd2$;
import bio.ferlab.datalake.commons.config.RunStep;
import bio.ferlab.datalake.commons.config.RunStep$;
import bio.ferlab.datalake.commons.config.RunStep$extract$;
import bio.ferlab.datalake.commons.config.RunStep$load$;
import bio.ferlab.datalake.commons.config.RunStep$publish$;
import bio.ferlab.datalake.commons.config.RunStep$reset$;
import bio.ferlab.datalake.commons.config.RunStep$sample$;
import bio.ferlab.datalake.commons.config.RunStep$transform$;
import bio.ferlab.datalake.commons.file.FileSystem;
import bio.ferlab.datalake.spark3.datastore.SqlBinderResolver$;
import bio.ferlab.datalake.spark3.etl.Runnable;
import bio.ferlab.datalake.spark3.file.FileSystemResolver$;
import bio.ferlab.datalake.spark3.implicits.DatasetConfImplicits$;
import bio.ferlab.datalake.spark3.loader.LoadResolver$;
import java.sql.Date;
import java.sql.Timestamp;
import java.time.LocalDateTime;
import org.apache.spark.sql.Column;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.None$;
import scala.Option;
import scala.PartialFunction;
import scala.Predef$;
import scala.Predef$ArrowAssoc$;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.TraversableOnce;
import scala.collection.immutable.$colon;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;
import scala.collection.immutable.Map;
import scala.collection.immutable.Map$;
import scala.collection.immutable.Nil$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.util.Try$;

/* compiled from: ETL.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005Ug!B\u000f\u001f\u0003\u0003Y\u0003\u0002\u0003\u001c\u0001\u0005\u000b\u0007I1A\u001c\t\u0011\u0001\u0003!\u0011!Q\u0001\naBQ!\u0011\u0001\u0005\u0002\tCqa\u0012\u0001C\u0002\u0013\u0005\u0001\n\u0003\u0004R\u0001\u0001\u0006I!\u0013\u0005\b%\u0002\u0011\r\u0011\"\u0001I\u0011\u0019\u0019\u0006\u0001)A\u0005\u0013\"9A\u000b\u0001b\u0001\u000e\u0003)\u0006bB-\u0001\u0005\u0004%\tA\u0017\u0005\u0007G\u0002\u0001\u000b\u0011B.\t\u000b\u0011\u0004a\u0011A3\t\u0013\u0005\u001d\u0002!%A\u0005\u0002\u0005%\u0002\"CA \u0001E\u0005I\u0011AA\u0015\u0011\u001d\t\t\u0005\u0001D\u0001\u0003\u0007B\u0011\"!\u0015\u0001#\u0003%\t!!\u000b\t\u0013\u0005M\u0003!%A\u0005\u0002\u0005%\u0002bBA+\u0001\u0011\u0005\u0011q\u000b\u0005\n\u0003G\u0002\u0011\u0013!C\u0001\u0003SA\u0011\"!\u001a\u0001#\u0003%\t!!\u000b\t\u000f\u0005\u001d\u0004\u0001\"\u0001\u0002j!9\u0011Q\u000f\u0001\u0005B\u0005]\u0004\"CAQ\u0001E\u0005I\u0011AAR\u0011%\t9\u000bAI\u0001\n\u0003\tI\u000bC\u0005\u0002.\u0002\t\n\u0011\"\u0001\u0002*\"9\u0011q\u0016\u0001\u0005\u0002\u0005E\u0006bBA^\u0001\u0011\u0005\u0011Q\u0018\u0005\b\u0003\u0007\u0004A\u0011AAc\u0011\u001d\t\u0019\u000e\u0001C\u0001\u0003\u000b\u00141!\u0012+M\u0015\ty\u0002%\u0001\u0002we)\u0011\u0011EI\u0001\u0004KRd'BA\u0012%\u0003\u0019\u0019\b/\u0019:lg)\u0011QEJ\u0001\tI\u0006$\u0018\r\\1lK*\u0011q\u0005K\u0001\u0007M\u0016\u0014H.\u00192\u000b\u0003%\n1AY5p\u0007\u0001\u00192\u0001\u0001\u00173!\ti\u0003'D\u0001/\u0015\u0005y\u0013!B:dC2\f\u0017BA\u0019/\u0005\u0019\te.\u001f*fMB\u00111\u0007N\u0007\u0002A%\u0011Q\u0007\t\u0002\t%Vtg.\u00192mK\u0006!1m\u001c8g+\u0005A\u0004CA\u001d?\u001b\u0005Q$BA\u001e=\u0003\u0019\u0019wN\u001c4jO*\u0011Q\bJ\u0001\bG>lWn\u001c8t\u0013\ty$HA\u0007D_:4\u0017nZ;sCRLwN\\\u0001\u0006G>tg\rI\u0001\u0007y%t\u0017\u000e\u001e \u0015\u0003\r#\"\u0001\u0012$\u0011\u0005\u0015\u0003Q\"\u0001\u0010\t\u000bY\u001a\u00019\u0001\u001d\u0002\u00175Lg\u000eR1uKRKW.Z\u000b\u0002\u0013B\u0011!jT\u0007\u0002\u0017*\u0011A*T\u0001\u0005i&lWMC\u0001O\u0003\u0011Q\u0017M^1\n\u0005A[%!\u0004'pG\u0006dG)\u0019;f)&lW-\u0001\u0007nS:$\u0015\r^3US6,\u0007%A\u0006nCb$\u0015\r^3US6,\u0017\u0001D7bq\u0012\u000bG/\u001a+j[\u0016\u0004\u0013aD7bS:$Um\u001d;j]\u0006$\u0018n\u001c8\u0016\u0003Y\u0003\"!O,\n\u0005aS$a\u0003#bi\u0006\u001cX\r^\"p]\u001a\f1\u0001\\8h+\u0005Y\u0006C\u0001/b\u001b\u0005i&B\u00010`\u0003\u0015\u0019HN\u001a\u001bk\u0015\u0005\u0001\u0017aA8sO&\u0011!-\u0018\u0002\u0007\u0019><w-\u001a:\u0002\t1|w\rI\u0001\bKb$(/Y2u)\u00151\u0017qDA\u0012)\r9\u0017Q\u0003\t\u0005Q>\u0014XO\u0004\u0002j[B\u0011!NL\u0007\u0002W*\u0011ANK\u0001\u0007yI|w\u000e\u001e \n\u00059t\u0013A\u0002)sK\u0012,g-\u0003\u0002qc\n\u0019Q*\u00199\u000b\u00059t\u0003C\u00015t\u0013\t!\u0018O\u0001\u0004TiJLgn\u001a\t\u0004m\u0006=abA<\u0002\n9\u0019\u00010a\u0001\u000f\u0005ethB\u0001>}\u001d\tQ70C\u0001a\u0013\tix,\u0001\u0004ba\u0006\u001c\u0007.Z\u0005\u0004\u007f\u0006\u0005\u0011!B:qCJ\\'BA?`\u0013\u0011\t)!a\u0002\u0002\u0007M\fHNC\u0002��\u0003\u0003IA!a\u0003\u0002\u000e\u00059\u0001/Y2lC\u001e,'\u0002BA\u0003\u0003\u000fIA!!\u0005\u0002\u0014\tIA)\u0019;b\rJ\fW.\u001a\u0006\u0005\u0003\u0017\ti\u0001\u0003\u0004��\u0017\u0001\u000f\u0011q\u0003\t\u0005\u00033\tY\"\u0004\u0002\u0002\u000e%!\u0011QDA\u0007\u00051\u0019\u0006/\u0019:l'\u0016\u001c8/[8o\u0011!\t\tc\u0003I\u0001\u0002\u0004I\u0015a\u00047bgR\u0014VO\u001c#bi\u0016$\u0016.\\3\t\u0011\u0005\u00152\u0002%AA\u0002%\u000b!cY;se\u0016tGOU;o\t\u0006$X\rV5nK\u0006\tR\r\u001f;sC\u000e$H\u0005Z3gCVdG\u000fJ\u0019\u0016\u0005\u0005-\"fA%\u0002.-\u0012\u0011q\u0006\t\u0005\u0003c\tY$\u0004\u0002\u00024)!\u0011QGA\u001c\u0003%)hn\u00195fG.,GMC\u0002\u0002:9\n!\"\u00198o_R\fG/[8o\u0013\u0011\ti$a\r\u0003#Ut7\r[3dW\u0016$g+\u0019:jC:\u001cW-A\tfqR\u0014\u0018m\u0019;%I\u00164\u0017-\u001e7uII\n\u0011\u0002\u001e:b]N4wN]7\u0015\u0011\u0005\u0015\u0013\u0011JA'\u0003\u001f\"2aZA$\u0011\u0019yh\u0002q\u0001\u0002\u0018!1\u00111\n\bA\u0002\u001d\fA\u0001Z1uC\"A\u0011\u0011\u0005\b\u0011\u0002\u0003\u0007\u0011\n\u0003\u0005\u0002&9\u0001\n\u00111\u0001J\u0003M!(/\u00198tM>\u0014X\u000e\n3fM\u0006,H\u000e\u001e\u00133\u0003M!(/\u00198tM>\u0014X\u000e\n3fM\u0006,H\u000e\u001e\u00134\u0003\u0011aw.\u00193\u0015\u0011\u0005e\u0013QLA0\u0003C\"2aZA.\u0011\u0019y\u0018\u0003q\u0001\u0002\u0018!1\u00111J\tA\u0002\u001dD\u0001\"!\t\u0012!\u0003\u0005\r!\u0013\u0005\t\u0003K\t\u0002\u0013!a\u0001\u0013\u0006qAn\\1eI\u0011,g-Y;mi\u0012\u0012\u0014A\u00047pC\u0012$C-\u001a4bk2$HeM\u0001\baV\u0014G.[:i)\t\tY\u0007\u0006\u0003\u0002n\u0005M\u0004cA\u0017\u0002p%\u0019\u0011\u0011\u000f\u0018\u0003\tUs\u0017\u000e\u001e\u0005\u0007\u007fR\u0001\u001d!a\u0006\u0002\u0007I,h\u000e\u0006\u0005\u0002z\u0005u\u0014qSAP)\r9\u00171\u0010\u0005\u0007\u007fV\u0001\u001d!a\u0006\t\u0013\u0005}T\u0003%AA\u0002\u0005\u0005\u0015\u0001\u0003:v]N#X\r]:\u0011\r\u0005\r\u00151RAI\u001d\u0011\t))!#\u000f\u0007)\f9)C\u00010\u0013\r\tYAL\u0005\u0005\u0003\u001b\u000byIA\u0002TKFT1!a\u0003/!\rI\u00141S\u0005\u0004\u0003+S$a\u0002*v]N#X\r\u001d\u0005\n\u0003C)\u0002\u0013!a\u0001\u00033\u0003B!LAN\u0013&\u0019\u0011Q\u0014\u0018\u0003\r=\u0003H/[8o\u0011%\t)#\u0006I\u0001\u0002\u0004\tI*A\u0007sk:$C-\u001a4bk2$H%M\u000b\u0003\u0003KSC!!!\u0002.\u0005i!/\u001e8%I\u00164\u0017-\u001e7uII*\"!a++\t\u0005e\u0015QF\u0001\u000eeVtG\u0005Z3gCVdG\u000fJ\u001a\u0002#\u001d,G\u000fT1tiJ+h\u000eR1uK\u001a{'\u000f\u0006\u0003\u00024\u0006]FcA%\u00026\"1q0\u0007a\u0002\u0003/Aa!!/\u001a\u0001\u00041\u0016A\u00013t\u0003\u0015\u0011Xm]3u)\t\ty\f\u0006\u0003\u0002n\u0005\u0005\u0007BB@\u001b\u0001\b\t9\"\u0001\u0005tC6\u0004H.\u001b8h+\t\t9\r\u0005\u0004.\u0003\u0013\u0014\u0018QZ\u0005\u0004\u0003\u0017t#a\u0004)beRL\u0017\r\u001c$v]\u000e$\u0018n\u001c8\u0011\u000b5\ny-^;\n\u0007\u0005EgFA\u0005Gk:\u001cG/[8oc\u0005yA-\u001a4bk2$8+Y7qY&tw\r")
/* loaded from: input_file:bio/ferlab/datalake/spark3/etl/v2/ETL.class */
public abstract class ETL implements Runnable {
    private final Configuration conf;
    private final LocalDateTime minDateTime = LocalDateTime.of(1900, 1, 1, 0, 0, 0);
    private final LocalDateTime maxDateTime = LocalDateTime.of(9999, 12, 31, 23, 59, 55);
    private final Logger log = LoggerFactory.getLogger(getClass().getCanonicalName());

    public Configuration conf() {
        return this.conf;
    }

    public LocalDateTime minDateTime() {
        return this.minDateTime;
    }

    public LocalDateTime maxDateTime() {
        return this.maxDateTime;
    }

    public abstract DatasetConf mainDestination();

    public Logger log() {
        return this.log;
    }

    public abstract Map<String, Dataset<Row>> extract(LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession);

    public LocalDateTime extract$default$1() {
        return minDateTime();
    }

    public LocalDateTime extract$default$2() {
        return LocalDateTime.now();
    }

    public abstract Map<String, Dataset<Row>> transform(Map<String, Dataset<Row>> map, LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession);

    public LocalDateTime transform$default$2() {
        return minDateTime();
    }

    public LocalDateTime transform$default$3() {
        return LocalDateTime.now();
    }

    public Map<String, Dataset<Row>> load(Map<String, Dataset<Row>> map, LocalDateTime localDateTime, LocalDateTime localDateTime2, SparkSession sparkSession) {
        return (Map) map.map(tuple2 -> {
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            String str = (String) tuple2._1();
            Dataset dataset = (Dataset) tuple2._2();
            DatasetConf dataset2 = this.conf().getDataset(str);
            this.log().info((String) Try$.MODULE$.apply(() -> {
                dataset2.table().foreach(tableConf -> {
                    return sparkSession.sql(new StringBuilder(30).append("CREATE DATABASE IF NOT EXISTS ").append(tableConf.database()).toString());
                });
                return (Dataset) ((Function2) LoadResolver$.MODULE$.write(sparkSession, this.conf()).apply(Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(dataset2.format()), dataset2.loadtype()))).apply(dataset2, dataset);
            }).fold(th -> {
                return new StringBuilder(15).append("Failed to load ").append(str).toString();
            }, dataset3 -> {
                return new StringBuilder(18).append("Succeeded to load ").append(str).toString();
            }));
            return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str), DatasetConfImplicits$.MODULE$.DatasetConfOperations(dataset2).read(this.conf(), sparkSession));
        }, Map$.MODULE$.canBuildFrom());
    }

    public LocalDateTime load$default$2() {
        return minDateTime();
    }

    public LocalDateTime load$default$3() {
        return LocalDateTime.now();
    }

    public void publish(SparkSession sparkSession) {
    }

    @Override // bio.ferlab.datalake.spark3.etl.Runnable
    public Map<String, Dataset<Row>> run(Seq<RunStep> seq, Option<LocalDateTime> option, Option<LocalDateTime> option2, SparkSession sparkSession) {
        LocalDateTime localDateTime = (LocalDateTime) option.getOrElse(() -> {
            return seq.contains(RunStep$reset$.MODULE$) ? this.minDateTime() : this.getLastRunDateFor(this.mainDestination(), sparkSession);
        });
        LocalDateTime localDateTime2 = (LocalDateTime) option2.getOrElse(() -> {
            return LocalDateTime.now();
        });
        if (seq.isEmpty()) {
            log().info("WARNING ETL started with no runSteps. Nothing will be executed.");
        } else {
            log().info(new StringBuilder(14).append("RUN steps: \t\t ").append(seq.mkString(" -> ")).toString());
        }
        log().info(new StringBuilder(19).append("RUN lastRunDate: \t ").append(localDateTime).toString());
        log().info(new StringBuilder(22).append("RUN currentRunDate: \t ").append(localDateTime2).toString());
        if (seq.contains(RunStep$reset$.MODULE$)) {
            reset(sparkSession);
        }
        Map<String, Dataset<Row>> transform = seq.contains(RunStep$transform$.MODULE$) ? transform((seq.contains(RunStep$extract$.MODULE$) && seq.contains(RunStep$sample$.MODULE$)) ? (Map) extract(localDateTime, localDateTime2, sparkSession).map(tuple2 -> {
            if (tuple2 == null) {
                throw new MatchError(tuple2);
            }
            String str = (String) tuple2._1();
            return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str), ((Function1) this.sampling().apply(str)).apply((Dataset) tuple2._2()));
        }, Map$.MODULE$.canBuildFrom()) : seq.contains(RunStep$extract$.MODULE$) ? extract(localDateTime, localDateTime2, sparkSession) : (Map) Predef$.MODULE$.Map().apply(Nil$.MODULE$), localDateTime, localDateTime2, sparkSession) : (Map) Predef$.MODULE$.Map().apply(Nil$.MODULE$);
        if (seq.contains(RunStep$load$.MODULE$)) {
            load(transform, load$default$2(), load$default$3(), sparkSession);
        } else {
            transform.foreach(tuple22 -> {
                $anonfun$run$4(this, tuple22);
                return BoxedUnit.UNIT;
            });
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        if (seq.contains(RunStep$publish$.MODULE$)) {
            publish(sparkSession);
        }
        return ((TraversableOnce) ((List) transform.keys().toList().$plus$plus(new $colon.colon(mainDestination().id(), Nil$.MODULE$), List$.MODULE$.canBuildFrom())).map(str -> {
            return Predef$ArrowAssoc$.MODULE$.$minus$greater$extension(Predef$.MODULE$.ArrowAssoc(str), Try$.MODULE$.apply(() -> {
                return DatasetConfImplicits$.MODULE$.DatasetConfOperations(this.conf().getDataset(str)).read(this.conf(), sparkSession);
            }).getOrElse(() -> {
                return sparkSession.emptyDataFrame();
            }));
        }, List$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.$conforms());
    }

    @Override // bio.ferlab.datalake.spark3.etl.Runnable
    public Seq<RunStep> run$default$1() {
        return RunStep$.MODULE$.default_load();
    }

    @Override // bio.ferlab.datalake.spark3.etl.Runnable
    public Option<LocalDateTime> run$default$2() {
        return None$.MODULE$;
    }

    @Override // bio.ferlab.datalake.spark3.etl.Runnable
    public Option<LocalDateTime> run$default$3() {
        return None$.MODULE$;
    }

    public LocalDateTime getLastRunDateFor(DatasetConf datasetConf, SparkSession sparkSession) {
        LoadType loadtype = datasetConf.loadtype();
        return LoadType$Scd1$.MODULE$.equals(loadtype) ? (LocalDateTime) Try$.MODULE$.apply(() -> {
            return ((Timestamp) DatasetConfImplicits$.MODULE$.DatasetConfOperations(datasetConf).read(this.conf(), sparkSession).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.max(functions$.MODULE$.col((String) datasetConf.writeoptions().apply("updated_on_column")))})).limit(1).as(sparkSession.implicits().newTimeStampEncoder()).head()).toLocalDateTime();
        }).getOrElse(() -> {
            return this.minDateTime();
        }) : LoadType$Scd2$.MODULE$.equals(loadtype) ? (LocalDateTime) Try$.MODULE$.apply(() -> {
            return ((Date) DatasetConfImplicits$.MODULE$.DatasetConfOperations(datasetConf).read(this.conf(), sparkSession).select(Predef$.MODULE$.wrapRefArray(new Column[]{functions$.MODULE$.max(functions$.MODULE$.col((String) datasetConf.writeoptions().apply("valid_from_column")))})).limit(1).as(sparkSession.implicits().newDateEncoder()).head()).toLocalDate().atStartOfDay();
        }).getOrElse(() -> {
            return this.minDateTime();
        }) : minDateTime();
    }

    public void reset(SparkSession sparkSession) {
        ((FileSystem) FileSystemResolver$.MODULE$.resolve().apply(conf().getStorage(mainDestination().storageid()).filesystem())).remove(mainDestination().path());
        ((Function1) SqlBinderResolver$.MODULE$.drop(sparkSession, conf()).apply(mainDestination().format())).apply(mainDestination());
    }

    public PartialFunction<String, Function1<Dataset<Row>, Dataset<Row>>> sampling() {
        return defaultSampling();
    }

    public PartialFunction<String, Function1<Dataset<Row>, Dataset<Row>>> defaultSampling() {
        return new ETL$$anonfun$defaultSampling$1(null);
    }

    public static final /* synthetic */ void $anonfun$run$4(ETL etl, Tuple2 tuple2) {
        if (tuple2 == null) {
            throw new MatchError(tuple2);
        }
        String str = (String) tuple2._1();
        Dataset dataset = (Dataset) tuple2._2();
        etl.log().info(new StringBuilder(1).append(str).append(":").toString());
        dataset.show(false);
        BoxedUnit boxedUnit = BoxedUnit.UNIT;
    }

    public ETL(Configuration configuration) {
        this.conf = configuration;
    }
}
