package gorsat.spark;

import gorsat.process.SparkRowUtilities;
import java.io.IOException;
import java.io.Serializable;
import java.util.zip.DataFormatException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.parquet.hadoop.codec.CodecConfig;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.execution.datasources.OutputWriter;
import org.apache.spark.sql.execution.datasources.OutputWriterFactory;
import org.apache.spark.sql.execution.datasources.PartitionedFile;
import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat;
import org.apache.spark.sql.internal.SQLConf;
import org.apache.spark.sql.sources.Filter;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.StructType;
import org.gorpipe.gor.model.DriverBackedFileReader;
import org.gorpipe.spark.SparkGOR;
import scala.Function1;
import scala.Option;
import scala.collection.Iterator;
import scala.collection.JavaConverters;
import scala.collection.Seq;
import scala.collection.immutable.Map;

/* loaded from: input_file:gorsat/spark/GorFileFormat.class */
public class GorFileFormat extends CSVFileFormat implements Serializable {
    public Option<StructType> inferSchema(SparkSession sparkSession, Map<String, String> map, Seq<FileStatus> seq) {
        FSDataInputStream inputStream;
        String str = (String) map.get("path").get();
        Option option = map.get("hadoop");
        StructType structType = null;
        try {
            if (option.isDefined() && Boolean.parseBoolean((String) option.get())) {
                Path path = new Path(str);
                Configuration configuration = new Configuration();
                configuration.set("fs.s3a.connection.ssl.enabled", "false");
                configuration.set("fs.s3a.path.style.access", "true");
                configuration.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem");
                configuration.set("fs.s3a.change.detection.mode", "warn");
                configuration.set("com.amazonaws.services.s3.enableV4", "true");
                configuration.set("fs.s3a.committer.name", "partitioned");
                configuration.set("fs.s3a.committer.staging.conflict-mode", "replace");
                configuration.set("spark.delta.logStore.class", "org.apache.spark.sql.delta.storage.S3SingleDriverLogStore");
                configuration.set("fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider");
                inputStream = path.getFileSystem(configuration).open(path);
            } else {
                inputStream = new DriverBackedFileReader("", "/", new Object[0]).getInputStream(str);
            }
            structType = SparkRowUtilities.inferSchema(inputStream, str, false, str.endsWith(".gorz"));
        } catch (IOException | DataFormatException e) {
            e.printStackTrace();
        }
        return Option.apply(structType);
    }

    public OutputWriterFactory prepareWrite(SparkSession sparkSession, Job job, final Map<String, String> map, StructType structType) {
        return new OutputWriterFactory() { // from class: gorsat.spark.GorFileFormat.1
            public OutputWriter newInstance(String str, StructType structType2, TaskAttemptContext taskAttemptContext) {
                try {
                    return new GorOutputWriter(str, structType2, (String) map.get("path").get());
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }

            public String getFileExtension(TaskAttemptContext taskAttemptContext) {
                return CodecConfig.from(taskAttemptContext).getCodec().getExtension() + ".gorz";
            }
        };
    }

    public boolean supportBatch(SparkSession sparkSession, StructType structType) {
        return super.supportBatch(sparkSession, structType);
    }

    public Option<Seq<String>> vectorTypes(StructType structType, StructType structType2, SQLConf sQLConf) {
        return super.vectorTypes(structType, structType2, sQLConf);
    }

    public boolean isSplitable(SparkSession sparkSession, Map<String, String> map, Path path) {
        return super.isSplitable(sparkSession, map, path);
    }

    public Function1<PartitionedFile, Iterator<InternalRow>> buildReader(SparkSession sparkSession, StructType structType, StructType structType2, StructType structType3, Seq<Filter> seq, Map<String, String> map, Configuration configuration) {
        Function1<PartitionedFile, Iterator<InternalRow>> buildReader;
        String str = (String) map.get("path").get();
        boolean endsWith = str.endsWith(".gorz");
        if (str.endsWith(".gord")) {
            buildReader = new GordFunction(structType3);
        } else {
            buildReader = super.buildReader(sparkSession, structType, structType2, structType3, seq, SparkGOR.me(map), configuration);
            if (endsWith) {
                return new GorzFunction(buildReader, structType3, JavaConverters.asJavaCollection(seq));
            }
        }
        return buildReader;
    }

    public Function1<PartitionedFile, Iterator<InternalRow>> buildReaderWithPartitionValues(SparkSession sparkSession, StructType structType, StructType structType2, StructType structType3, Seq<Filter> seq, Map<String, String> map, Configuration configuration) {
        return super.buildReaderWithPartitionValues(sparkSession, structType, structType2, structType3, seq, map, configuration);
    }

    public boolean supportDataType(DataType dataType) {
        return true;
    }

    public String shortName() {
        return "gor";
    }
}
