package io.smartdatalake.workflow.dataobject;

import io.smartdatalake.config.SdlConfigObject;
import io.smartdatalake.util.hdfs.PartitionValues;
import io.smartdatalake.util.hdfs.PartitionValues$;
import io.smartdatalake.util.hdfs.SparkRepartitionDef;
import io.smartdatalake.util.misc.DataFrameUtil$;
import io.smartdatalake.workflow.ActionPipelineContext;
import org.apache.hadoop.fs.Path;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructType;
import scala.Option;
import scala.Predef$;
import scala.collection.GenSeq;
import scala.collection.IterableLike;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SetLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

/* compiled from: SparkFileDataObject.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005Ee\u0001\u0003\t\u0012!\u0003\r\t!F\r\t\u000bM\u0002A\u0011A\u001b\t\u000be\u0002a\u0011\u0001\u001e\t\u000b\u0019\u0003A\u0011A$\t\u000b-\u0003a\u0011\u0001'\t\u000bA\u0003a\u0011A)\t\u000bm\u0003A\u0011\u0001/\t\u000bY\u0004A\u0011A<\t\u000be\u0004A\u0011\u0001>\t\u000f\u0005=\u0001\u0001\"\u0011\u0002\u0012!I\u0011q\t\u0001\u0012\u0002\u0013\u0005\u0011\u0011\n\u0005\b\u0003?\u0002A\u0011IA1\u0011\u001d\ti\u0007\u0001C!\u0003_Bq!!\u001f\u0001\t\u0003\nY\bC\u0005\u0002\n\u0002\t\n\u0011\"\u0001\u0002J!I\u00111\u0012\u0001\u0012\u0002\u0013\u0005\u0011Q\u0012\u0002\u0014'B\f'o\u001b$jY\u0016$\u0015\r^1PE*,7\r\u001e\u0006\u0003%M\t!\u0002Z1uC>\u0014'.Z2u\u0015\t!R#\u0001\u0005x_J\\g\r\\8x\u0015\t1r#A\u0007t[\u0006\u0014H\u000fZ1uC2\f7.\u001a\u0006\u00021\u0005\u0011\u0011n\\\n\t\u0001i\u0001Ce\n\u0016.aA\u00111DH\u0007\u00029)\tQ$A\u0003tG\u0006d\u0017-\u0003\u0002 9\t1\u0011I\\=SK\u001a\u0004\"!\t\u0012\u000e\u0003EI!aI\t\u0003)!\u000bGm\\8q\r&dW\rR1uC>\u0013'.Z2u!\t\tS%\u0003\u0002'#\t\u00112)\u00198De\u0016\fG/\u001a#bi\u00064%/Y7f!\t\t\u0003&\u0003\u0002*#\t\t2)\u00198Xe&$X\rR1uC\u001a\u0013\u0018-\\3\u0011\u0005\u0005Z\u0013B\u0001\u0017\u0012\u0005m\u0019\u0015M\\\"sK\u0006$Xm\u0015;sK\u0006l\u0017N\\4ECR\fgI]1nKB\u0011\u0011EL\u0005\u0003_E\u0011\u0011#V:fe\u0012+g-\u001b8fIN\u001b\u0007.Z7b!\t\t\u0013'\u0003\u00023#\t\u00012k\u00195f[\u00064\u0016\r\\5eCRLwN\\\u0001\u0007I%t\u0017\u000e\u001e\u0013\u0004\u0001Q\ta\u0007\u0005\u0002\u001co%\u0011\u0001\b\b\u0002\u0005+:LG/\u0001\u0004g_Jl\u0017\r^\u000b\u0002wA\u0011Ah\u0011\b\u0003{\u0005\u0003\"A\u0010\u000f\u000e\u0003}R!\u0001\u0011\u001b\u0002\rq\u0012xn\u001c;?\u0013\t\u0011E$\u0001\u0004Qe\u0016$WMZ\u0005\u0003\t\u0016\u0013aa\u0015;sS:<'B\u0001\"\u001d\u0003\u001dy\u0007\u000f^5p]N,\u0012\u0001\u0013\t\u0005y%[4(\u0003\u0002K\u000b\n\u0019Q*\u00199\u0002\u001d\u0019LG.\u001a8b[\u0016\u001cu\u000e\\;n]V\tQ\nE\u0002\u001c\u001dnJ!a\u0014\u000f\u0003\r=\u0003H/[8o\u0003A\u0019\b/\u0019:l%\u0016\u0004\u0018M\u001d;ji&|g.F\u0001S!\rYbj\u0015\t\u0003)fk\u0011!\u0016\u0006\u0003-^\u000bA\u0001\u001b3gg*\u0011\u0001,F\u0001\u0005kRLG.\u0003\u0002[+\n\u00192\u000b]1sWJ+\u0007/\u0019:uSRLwN\u001c#fM\u0006Y!-\u001a4pe\u0016<&/\u001b;f)\tiF\u000f\u0005\u0002_c:\u0011qL\u001c\b\u0003A.t!!\u00195\u000f\u0005\t,gB\u0001 d\u0013\u0005!\u0017aA8sO&\u0011amZ\u0001\u0007CB\f7\r[3\u000b\u0003\u0011L!!\u001b6\u0002\u000bM\u0004\u0018M]6\u000b\u0005\u0019<\u0017B\u00017n\u0003\r\u0019\u0018\u000f\u001c\u0006\u0003S*L!a\u001c9\u0002\u000fA\f7m[1hK*\u0011A.\\\u0005\u0003eN\u0014\u0011\u0002R1uC\u001a\u0013\u0018-\\3\u000b\u0005=\u0004\b\"B;\u0007\u0001\u0004i\u0016A\u00013g\u0003%\tg\r^3s%\u0016\fG\r\u0006\u0002^q\")Qo\u0002a\u0001;\u0006Q!/Z1e'\u000eDW-\\1\u0015\u0007m\f)\u0001E\u0002\u001c\u001dr\u00042!`A\u0001\u001b\u0005q(BA@q\u0003\u0015!\u0018\u0010]3t\u0013\r\t\u0019A \u0002\u000b'R\u0014Xo\u0019;UsB,\u0007bBA\u0004\u0011\u0001\u0007\u0011\u0011B\u0001\rg>,(oY3Fq&\u001cHo\u001d\t\u00047\u0005-\u0011bAA\u00079\t9!i\\8mK\u0006t\u0017\u0001D4fi\u0012\u000bG/\u0019$sC6,G\u0003BA\n\u0003[!R!XA\u000b\u0003CAq!a\u0006\n\u0001\b\tI\"A\u0004tKN\u001c\u0018n\u001c8\u0011\t\u0005m\u0011QD\u0007\u0002a&\u0019\u0011q\u00049\u0003\u0019M\u0003\u0018M]6TKN\u001c\u0018n\u001c8\t\u000f\u0005\r\u0012\u0002q\u0001\u0002&\u000591m\u001c8uKb$\b\u0003BA\u0014\u0003Si\u0011aE\u0005\u0004\u0003W\u0019\"!F!di&|g\u000eU5qK2Lg.Z\"p]R,\u0007\u0010\u001e\u0005\n\u0003_I\u0001\u0013!a\u0001\u0003c\tq\u0002]1si&$\u0018n\u001c8WC2,Xm\u001d\t\u0007\u0003g\tY$!\u0011\u000f\t\u0005U\u0012\u0011\b\b\u0004}\u0005]\u0012\"A\u000f\n\u0005=d\u0012\u0002BA\u001f\u0003\u007f\u00111aU3r\u0015\tyG\u0004E\u0002U\u0003\u0007J1!!\u0012V\u0005=\u0001\u0016M\u001d;ji&|gNV1mk\u0016\u001c\u0018AF4fi\u0012\u000bG/\u0019$sC6,G\u0005Z3gCVdG\u000fJ\u0019\u0016\u0005\u0005-#\u0006BA\u0019\u0003\u001bZ#!a\u0014\u0011\t\u0005E\u00131L\u0007\u0003\u0003'RA!!\u0016\u0002X\u0005IQO\\2iK\u000e\\W\r\u001a\u0006\u0004\u00033b\u0012AC1o]>$\u0018\r^5p]&!\u0011QLA*\u0005E)hn\u00195fG.,GMV1sS\u0006t7-Z\u0001\u0016O\u0016$8\u000b\u001e:fC6Lgn\u001a#bi\u00064%/Y7f)\u0019\t\u0019'a\u001a\u0002jQ\u0019Q,!\u001a\t\u000f\u0005]1\u0002q\u0001\u0002\u001a!)ai\u0003a\u0001\u0011\"1\u00111N\u0006A\u0002m\fa\u0002]5qK2Lg.Z*dQ\u0016l\u0017-\u0001\tde\u0016\fG/\u001a*fC\u0012\u001c6\r[3nCR!\u0011\u0011OA;)\ra\u00181\u000f\u0005\b\u0003/a\u00019AA\r\u0011\u0019\t9\b\u0004a\u0001y\u0006YqO]5uKN\u001b\u0007.Z7b\u000399(/\u001b;f\t\u0006$\u0018M\u0012:b[\u0016$\u0002\"! \u0002\u0002\u0006\r\u0015Q\u0011\u000b\u0004m\u0005}\u0004bBA\f\u001b\u0001\u000f\u0011\u0011\u0004\u0005\u0006k6\u0001\r!\u0018\u0005\n\u0003_i\u0001\u0013!a\u0001\u0003cA\u0011\"a\"\u000e!\u0003\u0005\r!!\u0003\u0002!%\u001c(+Z2veNLg/Z%oaV$\u0018\u0001G<sSR,G)\u0019;b\rJ\fW.\u001a\u0013eK\u001a\fW\u000f\u001c;%e\u0005ArO]5uK\u0012\u000bG/\u0019$sC6,G\u0005Z3gCVdG\u000fJ\u001a\u0016\u0005\u0005=%\u0006BA\u0005\u0003\u001b\u0002")
/* loaded from: input_file:io/smartdatalake/workflow/dataobject/SparkFileDataObject.class */
public interface SparkFileDataObject extends HadoopFileDataObject, CanCreateDataFrame, CanWriteDataFrame, CanCreateStreamingDataFrame, UserDefinedSchema, SchemaValidation {
    String format();

    default Map<String, String> options() {
        return Predef$.MODULE$.Map().apply(Nil$.MODULE$);
    }

    Option<String> filenameColumn();

    Option<SparkRepartitionDef> sparkRepartition();

    default Dataset<Row> beforeWrite(Dataset<Row> dataset) {
        validateSchemaMin(dataset);
        return dataset;
    }

    default Dataset<Row> afterRead(Dataset<Row> dataset) {
        validateSchemaMin(dataset);
        return dataset;
    }

    default Option<StructType> readSchema(boolean z) {
        return schema();
    }

    default Dataset<Row> getDataFrame(Seq<PartitionValues> seq, SparkSession sparkSession, ActionPipelineContext actionPipelineContext) {
        Dataset<Row> load;
        Seq<String> checkWrongPartitionValues = PartitionValues$.MODULE$.checkWrongPartitionValues(seq, partitions());
        Predef$.MODULE$.assert(checkWrongPartitionValues.isEmpty(), () -> {
            return new StringBuilder(87).append("getDataFrame got request with PartitionValues keys ").append(checkWrongPartitionValues.mkString(",")).append(" not included in ").append(new SdlConfigObject.DataObjectId(this.id())).append(" partition columns ").append(this.partitions().mkString(", ")).toString();
        });
        boolean checkFilesExisting = checkFilesExisting(sparkSession);
        if (checkFilesExisting) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            Predef$.MODULE$.require(schema().isDefined(), () -> {
                return new StringBuilder(91).append("(").append(new SdlConfigObject.DataObjectId(this.id())).append(") DataObject schema is undefined. A schema must be defined if there are no existing files.").toString();
            });
            BoxesRunTime.boxToBoolean(filesystem(sparkSession).mkdirs(hadoopPath()));
        }
        if (partitions().isEmpty() || seq.isEmpty()) {
            load = DataFrameUtil$.MODULE$.DataFrameReaderUtils(sparkSession.read().format(format()).options(options())).optionalSchema(readSchema(checkFilesExisting)).load(hadoopPath().toString());
        } else {
            DataFrameReader option = DataFrameUtil$.MODULE$.DataFrameReaderUtils(sparkSession.read().format(format()).options(options())).optionalSchema(readSchema(checkFilesExisting)).option("basePath", hadoopPath().toString());
            load = (Dataset) ((TraversableOnce) ((Seq) seq.map(partitionValues -> {
                return new Path(this.hadoopPath(), (String) this.getPartitionString(partitionValues, sparkSession).get()).toString();
            }, Seq$.MODULE$.canBuildFrom())).map(str -> {
                return option.load(str);
            }, Seq$.MODULE$.canBuildFrom())).reduce((dataset, dataset2) -> {
                return dataset.union(dataset2);
            });
        }
        return afterRead(DataFrameUtil$.MODULE$.DfSDL(load).withOptionalColumn(filenameColumn(), functions$.MODULE$.input_file_name()));
    }

    default Seq<PartitionValues> getDataFrame$default$1() {
        return Nil$.MODULE$;
    }

    default Dataset<Row> getStreamingDataFrame(Map<String, String> map, Option<StructType> option, SparkSession sparkSession) {
        Predef$.MODULE$.require(schema().orElse(() -> {
            return option;
        }).isDefined(), () -> {
            return new StringBuilder(59).append("(").append(new SdlConfigObject.DataObjectId(this.id())).append(") Schema must be defined for streaming SparkFileDataObject").toString();
        });
        if (filesystem(sparkSession).exists(hadoopPath().getParent())) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            BoxesRunTime.boxToBoolean(filesystem(sparkSession).mkdirs(hadoopPath()));
        }
        return afterRead(sparkSession.readStream().format(format()).options(map).schema((StructType) schema().orElse(() -> {
            return option;
        }).get()).load(hadoopPath().toString()));
    }

    default StructType createReadSchema(StructType structType, SparkSession sparkSession) {
        return (StructType) filenameColumn().map(str -> {
            return this.addFieldIfNotExisting(structType, str, StringType$.MODULE$);
        }).getOrElse(() -> {
            return structType;
        });
    }

    default void writeDataFrame(Dataset<Row> dataset, Seq<PartitionValues> seq, boolean z, SparkSession sparkSession) {
        Predef$.MODULE$.require(!z, () -> {
            return "($id) SparkFileDataObject cannot write dataframe when dataobject is also used as recursive input ";
        });
        ObjectRef create = ObjectRef.create(beforeWrite(dataset));
        create.elem = (Dataset) sparkRepartition().map(sparkRepartitionDef -> {
            return sparkRepartitionDef.prepareDataFrame((Dataset) create.elem, this.partitions(), seq.size(), this.id());
        }).getOrElse(() -> {
            return (Dataset) create.elem;
        });
        SaveMode saveMode = saveMode();
        SaveMode saveMode2 = SaveMode.Overwrite;
        if (saveMode != null ? saveMode.equals(saveMode2) : saveMode2 == null) {
            if (partitions().nonEmpty()) {
                Seq seq2 = ((SetLike) ((TraversableOnce) seq.map(partitionValues -> {
                    return partitionValues.keys();
                }, Seq$.MODULE$.canBuildFrom())).reduceOption((set, set2) -> {
                    return set.$plus$plus(set2);
                }).getOrElse(() -> {
                    return Predef$.MODULE$.Set().apply(Nil$.MODULE$);
                })).toSeq();
                deletePartitions((Seq) seq.intersect((GenSeq) listPartitions(sparkSession).map(partitionValues2 -> {
                    return partitionValues2.filterKeys(seq2);
                }, Seq$.MODULE$.canBuildFrom())), sparkSession);
            }
        }
        String path = hadoopPath().toString();
        logger().info(new StringBuilder(22).append("Writing data frame to ").append(path).toString());
        DataFrameUtil$.MODULE$.DataFrameWriterUtils(((Dataset) create.elem).write().format(format()).mode(saveMode()).options(options())).optionalPartitionBy(partitions()).save(path);
        createMissingPartitions(seq, sparkSession);
        sparkRepartition().flatMap(sparkRepartitionDef2 -> {
            return sparkRepartitionDef2.filename();
        }).foreach(str -> {
            $anonfun$writeDataFrame$9(this, sparkSession, str);
            return BoxedUnit.UNIT;
        });
    }

    default Seq<PartitionValues> writeDataFrame$default$2() {
        return Nil$.MODULE$;
    }

    default boolean writeDataFrame$default$3() {
        return false;
    }

    static /* synthetic */ boolean $anonfun$writeDataFrame$13(SparkFileDataObject sparkFileDataObject, SparkSession sparkSession, String str, Path path) {
        return sparkFileDataObject.filesystem(sparkSession).rename(path, new Path(path.getParent(), str));
    }

    static /* synthetic */ void $anonfun$writeDataFrame$9(SparkFileDataObject sparkFileDataObject, SparkSession sparkSession, String str) {
        Predef$.MODULE$.require(sparkFileDataObject.partitions().isEmpty(), () -> {
            return new StringBuilder(73).append("(").append(new SdlConfigObject.DataObjectId(sparkFileDataObject.id())).append(") Cannot rename file with SparkRepartitionDef for partitioned DataObject").toString();
        });
        Seq<FileRef> fileRefs = sparkFileDataObject.getFileRefs((Seq) Nil$.MODULE$, sparkSession);
        Predef$.MODULE$.require(fileRefs.size() <= 1, () -> {
            return new StringBuilder(140).append("(").append(new SdlConfigObject.DataObjectId(sparkFileDataObject.id())).append(") Number of files should not be greater than 1 because SparkRepartitionDef.numberOfTasksPerPartition should be set to 1 if filename is set!").toString();
        });
        ((IterableLike) fileRefs.map(fileRef -> {
            return new Path(fileRef.fullPath());
        }, Seq$.MODULE$.canBuildFrom())).foreach(path -> {
            return BoxesRunTime.boxToBoolean($anonfun$writeDataFrame$13(sparkFileDataObject, sparkSession, str, path));
        });
    }
}
