package io.smartdatalake.workflow.dataobject;

import io.smartdatalake.config.SdlConfigObject;
import io.smartdatalake.definitions.Environment$;
import io.smartdatalake.definitions.SDLSaveMode$;
import io.smartdatalake.util.hdfs.PartitionValues;
import io.smartdatalake.util.hdfs.PartitionValues$;
import io.smartdatalake.util.hdfs.SparkRepartitionDef;
import io.smartdatalake.util.misc.DataFrameUtil$;
import io.smartdatalake.workflow.ActionPipelineContext;
import io.smartdatalake.workflow.ProcessingLogicException;
import org.apache.spark.sql.DataFrameReader;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.functions$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructType;
import scala.Enumeration;
import scala.None$;
import scala.Option;
import scala.Predef$;
import scala.Some;
import scala.Unit$;
import scala.collection.GenSeq;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.SeqLike;
import scala.collection.TraversableLike;
import scala.collection.immutable.Map;
import scala.collection.immutable.Nil$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;
import scala.runtime.ObjectRef;

/* compiled from: SparkFileDataObject.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005]f\u0001\u0003\n\u0014!\u0003\r\taF\u000e\t\u000bU\u0002A\u0011A\u001c\t\u000bm\u0002a\u0011\u0001\u001f\t\u000b!\u0003A\u0011A%\t\u000b5\u0003a\u0011\u0001(\t\u000bI\u0003a\u0011A*\t\u000bu\u0003A\u0011\u00010\t\r}\u0004A\u0011AA\u0001\u0011\u001d\tI\u0001\u0001C\u0001\u0003\u0017Aq!!\n\u0001\t\u0003\n9\u0003C\u0005\u0002T\u0001\t\n\u0011\"\u0001\u0002V!9\u00111\u000e\u0001\u0005B\u00055\u0004bBA=\u0001\u0011\u0005\u00131\u0010\u0005\b\u0003\u000b\u0003A\u0011IAD\u0011\u001d\t\u0019\n\u0001C!\u0003+C\u0011\"!*\u0001#\u0003%\t!!\u0016\t\u0013\u0005\u001d\u0006!%A\u0005\u0002\u0005%\u0006bBAW\u0001\u0011\u0005\u0011q\u0016\u0002\u0014'B\f'o\u001b$jY\u0016$\u0015\r^1PE*,7\r\u001e\u0006\u0003)U\t!\u0002Z1uC>\u0014'.Z2u\u0015\t1r#\u0001\u0005x_J\\g\r\\8x\u0015\tA\u0012$A\u0007t[\u0006\u0014H\u000fZ1uC2\f7.\u001a\u0006\u00025\u0005\u0011\u0011n\\\n\t\u0001q\u0011c%\u000b\u00170eA\u0011Q\u0004I\u0007\u0002=)\tq$A\u0003tG\u0006d\u0017-\u0003\u0002\"=\t1\u0011I\\=SK\u001a\u0004\"a\t\u0013\u000e\u0003MI!!J\n\u0003)!\u000bGm\\8q\r&dW\rR1uC>\u0013'.Z2u!\t\u0019s%\u0003\u0002)'\t\u00112)\u00198De\u0016\fG/\u001a#bi\u00064%/Y7f!\t\u0019#&\u0003\u0002,'\t\t2)\u00198Xe&$X\rR1uC\u001a\u0013\u0018-\\3\u0011\u0005\rj\u0013B\u0001\u0018\u0014\u0005m\u0019\u0015M\\\"sK\u0006$Xm\u0015;sK\u0006l\u0017N\\4ECR\fgI]1nKB\u00111\u0005M\u0005\u0003cM\u0011\u0011#V:fe\u0012+g-\u001b8fIN\u001b\u0007.Z7b!\t\u00193'\u0003\u00025'\t\u00012k\u00195f[\u00064\u0016\r\\5eCRLwN\\\u0001\u0007I%t\u0017\u000e\u001e\u0013\u0004\u0001Q\t\u0001\b\u0005\u0002\u001es%\u0011!H\b\u0002\u0005+:LG/\u0001\u0004g_Jl\u0017\r^\u000b\u0002{A\u0011a(\u0012\b\u0003\u007f\r\u0003\"\u0001\u0011\u0010\u000e\u0003\u0005S!A\u0011\u001c\u0002\rq\u0012xn\u001c;?\u0013\t!e$\u0001\u0004Qe\u0016$WMZ\u0005\u0003\r\u001e\u0013aa\u0015;sS:<'B\u0001#\u001f\u0003\u001dy\u0007\u000f^5p]N,\u0012A\u0013\t\u0005}-kT(\u0003\u0002M\u000f\n\u0019Q*\u00199\u0002\u001d\u0019LG.\u001a8b[\u0016\u001cu\u000e\\;n]V\tq\nE\u0002\u001e!vJ!!\u0015\u0010\u0003\r=\u0003H/[8o\u0003A\u0019\b/\u0019:l%\u0016\u0004\u0018M\u001d;ji&|g.F\u0001U!\ri\u0002+\u0016\t\u0003-nk\u0011a\u0016\u0006\u00031f\u000bA\u0001\u001b3gg*\u0011!lF\u0001\u0005kRLG.\u0003\u0002]/\n\u00192\u000b]1sWJ+\u0007/\u0019:uSRLwN\u001c#fM\u0006Y!-\u001a4pe\u0016<&/\u001b;f)\tyV\u0010\u0006\u0002aoB\u0011\u0011\r\u001e\b\u0003EFt!a\u00198\u000f\u0005\u0011\\gBA3i\u001d\t\u0001e-C\u0001h\u0003\ry'oZ\u0005\u0003S*\fa!\u00199bG\",'\"A4\n\u00051l\u0017!B:qCJ\\'BA5k\u0013\ty\u0007/A\u0002tc2T!\u0001\\7\n\u0005I\u001c\u0018a\u00029bG.\fw-\u001a\u0006\u0003_BL!!\u001e<\u0003\u0013\u0011\u000bG/\u0019$sC6,'B\u0001:t\u0011\u0015Ah\u0001q\u0001z\u0003\u001d\u0019Xm]:j_:\u0004\"A_>\u000e\u0003ML!\u0001`:\u0003\u0019M\u0003\u0018M]6TKN\u001c\u0018n\u001c8\t\u000by4\u0001\u0019\u00011\u0002\u0005\u00114\u0017!C1gi\u0016\u0014(+Z1e)\u0011\t\u0019!a\u0002\u0015\u0007\u0001\f)\u0001C\u0003y\u000f\u0001\u000f\u0011\u0010C\u0003\u007f\u000f\u0001\u0007\u0001-A\u0005hKR\u001c6\r[3nCR!\u0011QBA\u000e!\u0011i\u0002+a\u0004\u0011\t\u0005E\u0011qC\u0007\u0003\u0003'Q1!!\u0006t\u0003\u0015!\u0018\u0010]3t\u0013\u0011\tI\"a\u0005\u0003\u0015M#(/^2u)f\u0004X\rC\u0004\u0002\u001e!\u0001\r!a\b\u0002\u0019M|WO]2f\u000bbL7\u000f^:\u0011\u0007u\t\t#C\u0002\u0002$y\u0011qAQ8pY\u0016\fg.\u0001\u0007hKR$\u0015\r^1Ge\u0006lW\r\u0006\u0003\u0002*\u0005eB#\u00021\u0002,\u00055\u0002\"\u0002=\n\u0001\bI\bbBA\u0018\u0013\u0001\u000f\u0011\u0011G\u0001\bG>tG/\u001a=u!\u0011\t\u0019$!\u000e\u000e\u0003UI1!a\u000e\u0016\u0005U\t5\r^5p]BK\u0007/\u001a7j]\u0016\u001cuN\u001c;fqRD\u0011\"a\u000f\n!\u0003\u0005\r!!\u0010\u0002\u001fA\f'\u000f^5uS>tg+\u00197vKN\u0004b!a\u0010\u0002H\u00055c\u0002BA!\u0003\u000br1\u0001QA\"\u0013\u0005y\u0012B\u0001:\u001f\u0013\u0011\tI%a\u0013\u0003\u0007M+\u0017O\u0003\u0002s=A\u0019a+a\u0014\n\u0007\u0005EsKA\bQCJ$\u0018\u000e^5p]Z\u000bG.^3t\u0003Y9W\r\u001e#bi\u00064%/Y7fI\u0011,g-Y;mi\u0012\nTCAA,U\u0011\ti$!\u0017,\u0005\u0005m\u0003\u0003BA/\u0003Oj!!a\u0018\u000b\t\u0005\u0005\u00141M\u0001\nk:\u001c\u0007.Z2lK\u0012T1!!\u001a\u001f\u0003)\tgN\\8uCRLwN\\\u0005\u0005\u0003S\nyFA\tv]\u000eDWmY6fIZ\u000b'/[1oG\u0016\fQcZ3u'R\u0014X-Y7j]\u001e$\u0015\r^1Ge\u0006lW\r\u0006\u0004\u0002p\u0005M\u0014Q\u000f\u000b\u0004A\u0006E\u0004\"\u0002=\f\u0001\bI\b\"\u0002%\f\u0001\u0004Q\u0005bBA<\u0017\u0001\u0007\u0011QB\u0001\u000fa&\u0004X\r\\5oKN\u001b\u0007.Z7b\u0003A\u0019'/Z1uKJ+\u0017\rZ*dQ\u0016l\u0017\r\u0006\u0003\u0002~\u0005\u0005E\u0003BA\b\u0003\u007fBQ\u0001\u001f\u0007A\u0004eDq!a!\r\u0001\u0004\ty!A\u0006xe&$XmU2iK6\f\u0017\u0001B5oSR$b!!#\u0002\u0010\u0006EE#\u0002\u001d\u0002\f\u00065\u0005\"\u0002=\u000e\u0001\bI\bbBA\u0018\u001b\u0001\u000f\u0011\u0011\u0007\u0005\u0006}6\u0001\r\u0001\u0019\u0005\b\u0003wi\u0001\u0019AA\u001f\u000399(/\u001b;f\t\u0006$\u0018M\u0012:b[\u0016$\u0002\"a&\u0002\u001e\u0006}\u0015\u0011\u0015\u000b\u0006q\u0005e\u00151\u0014\u0005\u0006q:\u0001\u001d!\u001f\u0005\b\u0003_q\u00019AA\u0019\u0011\u0015qh\u00021\u0001a\u0011%\tYD\u0004I\u0001\u0002\u0004\ti\u0004C\u0005\u0002$:\u0001\n\u00111\u0001\u0002 \u0005\u0001\u0012n\u001d*fGV\u00148/\u001b<f\u0013:\u0004X\u000f^\u0001\u0019oJLG/\u001a#bi\u00064%/Y7fI\u0011,g-Y;mi\u0012\u0012\u0014\u0001G<sSR,G)\u0019;b\rJ\fW.\u001a\u0013eK\u001a\fW\u000f\u001c;%gU\u0011\u00111\u0016\u0016\u0005\u0003?\tI&\u0001\rgS2$XM\u001d)beRLG/[8og\u0016C\u0018n\u001d;j]\u001e$B!!-\u00026R!\u0011QHAZ\u0011\u0015A\u0018\u0003q\u0001z\u0011\u001d\tY$\u0005a\u0001\u0003{\u0001")
/* loaded from: input_file:io/smartdatalake/workflow/dataobject/SparkFileDataObject.class */
public interface SparkFileDataObject extends HadoopFileDataObject, CanCreateDataFrame, CanWriteDataFrame, CanCreateStreamingDataFrame, UserDefinedSchema, SchemaValidation {
    String format();

    default Map<String, String> options() {
        return Predef$.MODULE$.Map().apply(Nil$.MODULE$);
    }

    Option<String> filenameColumn();

    Option<SparkRepartitionDef> sparkRepartition();

    default Dataset<Row> beforeWrite(Dataset<Row> dataset, SparkSession sparkSession) {
        validateSchemaMin(dataset, "write");
        validateSchemaHasPartitionCols(dataset, "write");
        schema().foreach(structType -> {
            this.validateSchema(dataset, structType, "write");
            return BoxedUnit.UNIT;
        });
        return dataset;
    }

    default Dataset<Row> afterRead(Dataset<Row> dataset, SparkSession sparkSession) {
        validateSchemaMin(dataset, "read");
        validateSchemaHasPartitionCols(dataset, "read");
        schema().map(structType -> {
            return this.createReadSchema(structType, sparkSession);
        }).foreach(structType2 -> {
            this.validateSchema(dataset, structType2, "read");
            return BoxedUnit.UNIT;
        });
        return dataset;
    }

    default Option<StructType> getSchema(boolean z) {
        return schema();
    }

    default Dataset<Row> getDataFrame(Seq<PartitionValues> seq, SparkSession sparkSession, ActionPipelineContext actionPipelineContext) {
        Dataset<Row> load;
        Seq<String> checkWrongPartitionValues = PartitionValues$.MODULE$.checkWrongPartitionValues(seq, partitions());
        Predef$.MODULE$.assert(checkWrongPartitionValues.isEmpty(), () -> {
            return new StringBuilder(87).append("getDataFrame got request with PartitionValues keys ").append(checkWrongPartitionValues.mkString(",")).append(" not included in ").append(new SdlConfigObject.DataObjectId(this.id())).append(" partition columns ").append(this.partitions().mkString(", ")).toString();
        });
        boolean checkFilesExisting = checkFilesExisting(sparkSession);
        if (checkFilesExisting) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            Predef$.MODULE$.require(schema().isDefined(), () -> {
                return new StringBuilder(91).append("(").append(new SdlConfigObject.DataObjectId(this.id())).append(") DataObject schema is undefined. A schema must be defined if there are no existing files.").toString();
            });
            BoxesRunTime.boxToBoolean(filesystem(sparkSession).mkdirs(hadoopPath()));
        }
        Option<StructType> schema = getSchema(checkFilesExisting);
        if (partitions().isEmpty() || seq.isEmpty()) {
            load = DataFrameUtil$.MODULE$.DataFrameReaderUtils(sparkSession.read().format(format()).options(options())).optionalSchema(schema).load(hadoopPath().toString());
        } else {
            DataFrameReader option = DataFrameUtil$.MODULE$.DataFrameReaderUtils(sparkSession.read().format(format()).options(options())).optionalSchema(schema).option("basePath", hadoopPath().toString());
            Seq seq2 = (Seq) ((TraversableLike) seq.flatMap(partitionValues -> {
                return this.getConcretePaths(partitionValues, sparkSession);
            }, Seq$.MODULE$.canBuildFrom())).map(path -> {
                return path.toString();
            }, Seq$.MODULE$.canBuildFrom());
            load = (Dataset) (seq2.nonEmpty() ? new Some(option.load(seq2)) : None$.MODULE$).filter(dataset -> {
                return BoxesRunTime.boxToBoolean($anonfun$getDataFrame$5(this, schema, dataset));
            }).getOrElse(() -> {
                Predef$.MODULE$.require(this.schema().isDefined(), () -> {
                    return new StringBuilder(113).append("(").append(new SdlConfigObject.DataObjectId(this.id())).append(") DataObject schema is undefined. A schema must be defined as there are no existing files for partition values ").append(seq.mkString(", ")).append(".").toString();
                });
                return DataFrameUtil$.MODULE$.getEmptyDataFrame((StructType) schema.get(), sparkSession);
            });
        }
        return afterRead(DataFrameUtil$.MODULE$.DfSDL(load).withOptionalColumn(filenameColumn(), functions$.MODULE$.input_file_name()), sparkSession);
    }

    default Seq<PartitionValues> getDataFrame$default$1() {
        return Nil$.MODULE$;
    }

    default Dataset<Row> getStreamingDataFrame(Map<String, String> map, Option<StructType> option, SparkSession sparkSession) {
        Predef$.MODULE$.require(schema().orElse(() -> {
            return option;
        }).isDefined(), () -> {
            return new StringBuilder(60).append("(").append(new SdlConfigObject.DataObjectId(this.id())).append("}) Schema must be defined for streaming SparkFileDataObject").toString();
        });
        if (filesystem(sparkSession).exists(hadoopPath().getParent())) {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        } else {
            BoxesRunTime.boxToBoolean(filesystem(sparkSession).mkdirs(hadoopPath()));
        }
        return afterRead(sparkSession.readStream().format(format()).options(map).schema((StructType) schema().orElse(() -> {
            return option;
        }).get()).load(hadoopPath().toString()), sparkSession);
    }

    default StructType createReadSchema(StructType structType, SparkSession sparkSession) {
        return (StructType) filenameColumn().map(str -> {
            return this.addFieldIfNotExisting(structType, str, StringType$.MODULE$);
        }).getOrElse(() -> {
            return structType;
        });
    }

    default void init(Dataset<Row> dataset, Seq<PartitionValues> seq, SparkSession sparkSession, ActionPipelineContext actionPipelineContext) {
        validateSchemaMin(dataset, "write");
        schema().foreach(structType -> {
            this.validateSchema(dataset, structType, "write");
            return BoxedUnit.UNIT;
        });
    }

    default void writeDataFrame(Dataset<Row> dataset, Seq<PartitionValues> seq, boolean z, SparkSession sparkSession, ActionPipelineContext actionPipelineContext) {
        BoxedUnit boxedUnit;
        BoxedUnit boxedUnit2;
        BoxedUnit boxedUnit3;
        BoxedUnit boxedUnit4;
        Predef$.MODULE$.require(!z, () -> {
            return "($id) SparkFileDataObject cannot write dataframe when dataobject is also used as recursive input ";
        });
        ObjectRef create = ObjectRef.create(beforeWrite(dataset, sparkSession));
        create.elem = (Dataset) sparkRepartition().map(sparkRepartitionDef -> {
            return sparkRepartitionDef.prepareDataFrame((Dataset) create.elem, this.partitions(), seq, this.id());
        }).getOrElse(() -> {
            return (Dataset) create.elem;
        });
        Enumeration.Value saveMode = saveMode();
        Enumeration.Value Overwrite = SDLSaveMode$.MODULE$.Overwrite();
        if (Overwrite != null ? !Overwrite.equals(saveMode) : saveMode != null) {
            Enumeration.Value OverwriteOptimized = SDLSaveMode$.MODULE$.OverwriteOptimized();
            if (OverwriteOptimized != null ? !OverwriteOptimized.equals(saveMode) : saveMode != null) {
                Enumeration.Value OverwritePreserveDirectories = SDLSaveMode$.MODULE$.OverwritePreserveDirectories();
                if (OverwritePreserveDirectories != null ? !OverwritePreserveDirectories.equals(saveMode) : saveMode != null) {
                    boxedUnit = Unit$.MODULE$;
                } else {
                    if (seq.nonEmpty()) {
                        deletePartitionsFiles(filterPartitionsExisting(seq, sparkSession), sparkSession);
                        boxedUnit2 = BoxedUnit.UNIT;
                    } else {
                        if (!partitions().isEmpty() && !Environment$.MODULE$.globalConfig().allowOverwriteAllPartitionsWithoutPartitionValues().contains(new SdlConfigObject.DataObjectId(id()))) {
                            throw new ProcessingLogicException(new StringBuilder(171).append("(").append(new SdlConfigObject.DataObjectId(id())).append(") OverwritePreserveDirectories without partition values is not allowed on a partitioned DataObject. This is a protection from unintentionally deleting all partition data.").toString());
                        }
                        deleteAllFiles(hadoopPath(), sparkSession);
                        boxedUnit2 = BoxedUnit.UNIT;
                    }
                    boxedUnit = boxedUnit2;
                }
            } else {
                if (seq.nonEmpty()) {
                    deletePartitions(filterPartitionsExisting(seq, sparkSession), sparkSession);
                    boxedUnit3 = BoxedUnit.UNIT;
                } else {
                    if (!partitions().isEmpty() && !Environment$.MODULE$.globalConfig().allowOverwriteAllPartitionsWithoutPartitionValues().contains(new SdlConfigObject.DataObjectId(id()))) {
                        throw new ProcessingLogicException(new StringBuilder(161).append("(").append(new SdlConfigObject.DataObjectId(id())).append(") OverwriteOptimized without partition values is not allowed on a partitioned DataObject. This is a protection from unintentionally deleting all partition data.").toString());
                    }
                    deleteAll(sparkSession);
                    boxedUnit3 = BoxedUnit.UNIT;
                }
                boxedUnit = boxedUnit3;
            }
        } else {
            if (seq.nonEmpty()) {
                deletePartitions(filterPartitionsExisting(seq, sparkSession), sparkSession);
                boxedUnit4 = BoxedUnit.UNIT;
            } else if (Environment$.MODULE$.enableOverwriteUnpartitionedSparkFileDataObjectAdls()) {
                deleteAll(sparkSession);
                boxedUnit4 = BoxedUnit.UNIT;
            } else {
                boxedUnit4 = BoxedUnit.UNIT;
            }
            boxedUnit = boxedUnit4;
        }
        String path = hadoopPath().toString();
        logger().info(new StringBuilder(22).append("Writing data frame to ").append(path).toString());
        DataFrameUtil$.MODULE$.DataFrameWriterUtils(((Dataset) create.elem).write().format(format()).mode(SDLSaveMode$.MODULE$.value2SparkSaveMode(saveMode()).asSparkSaveMode()).options(options())).optionalPartitionBy(partitions()).save(path);
        createMissingPartitions(seq, sparkSession);
        sparkRepartition().foreach(sparkRepartitionDef2 -> {
            $anonfun$writeDataFrame$4(this, seq, sparkSession, sparkRepartitionDef2);
            return BoxedUnit.UNIT;
        });
    }

    default Seq<PartitionValues> writeDataFrame$default$2() {
        return Nil$.MODULE$;
    }

    default boolean writeDataFrame$default$3() {
        return false;
    }

    default Seq<PartitionValues> filterPartitionsExisting(Seq<PartitionValues> seq, SparkSession sparkSession) {
        Seq seq2 = PartitionValues$.MODULE$.getPartitionValuesKeys(seq).toSeq();
        return (Seq) seq.intersect((GenSeq) listPartitions(sparkSession).map(partitionValues -> {
            return partitionValues.filterKeys(seq2);
        }, Seq$.MODULE$.canBuildFrom()));
    }

    static /* synthetic */ boolean $anonfun$getDataFrame$5(SparkFileDataObject sparkFileDataObject, Option option, Dataset dataset) {
        return option.isDefined() || ((SeqLike) sparkFileDataObject.partitions().diff(Predef$.MODULE$.wrapRefArray(dataset.columns()))).isEmpty();
    }

    static /* synthetic */ void $anonfun$writeDataFrame$4(SparkFileDataObject sparkFileDataObject, Seq seq, SparkSession sparkSession, SparkRepartitionDef sparkRepartitionDef) {
        sparkRepartitionDef.renameFiles(sparkFileDataObject.getFileRefs(seq, sparkSession), sparkFileDataObject.filesystem(sparkSession));
    }

    static void $init$(SparkFileDataObject sparkFileDataObject) {
    }
}
