package io.openlineage.spark.agent.lifecycle.plan;

import io.openlineage.client.OpenLineage;
import io.openlineage.client.OpenLineage.Dataset;
import io.openlineage.client.utils.DatasetIdentifier;
import io.openlineage.spark.agent.lifecycle.plan.handlers.ExtensionLineageRelationHandler;
import io.openlineage.spark.agent.lifecycle.plan.handlers.JdbcRelationHandler;
import io.openlineage.spark.agent.util.PathUtils;
import io.openlineage.spark.agent.util.PlanUtils;
import io.openlineage.spark.agent.util.ScalaConversionUtils;
import io.openlineage.spark.api.AbstractQueryPlanDatasetBuilder;
import io.openlineage.spark.api.DatasetFactory;
import io.openlineage.spark.api.OpenLineageContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.spark.scheduler.SparkListenerEvent;
import org.apache.spark.sql.catalyst.catalog.CatalogTable;
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
import org.apache.spark.sql.execution.datasources.HadoopFsRelation;
import org.apache.spark.sql.execution.datasources.LogicalRelation;
import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:io/openlineage/spark/agent/lifecycle/plan/LogicalRelationDatasetBuilder.class */
public class LogicalRelationDatasetBuilder<D extends OpenLineage.Dataset> extends AbstractQueryPlanDatasetBuilder<SparkListenerEvent, LogicalRelation, D> {
    private static final Logger log = LoggerFactory.getLogger(LogicalRelationDatasetBuilder.class);
    private final DatasetFactory<D> datasetFactory;

    public LogicalRelationDatasetBuilder(OpenLineageContext openLineageContext, DatasetFactory<D> datasetFactory, boolean z) {
        super(openLineageContext, z);
        this.datasetFactory = datasetFactory;
    }

    @Override // io.openlineage.spark.api.AbstractQueryPlanDatasetBuilder
    public boolean isDefinedAtLogicalPlan(LogicalPlan logicalPlan) {
        return !((logicalPlan instanceof LogicalRelation) && isSingleNodeLogicalPlan(logicalPlan) && !this.searchDependencies) && (logicalPlan instanceof LogicalRelation) && ((((LogicalRelation) logicalPlan).relation() instanceof HadoopFsRelation) || (((LogicalRelation) logicalPlan).relation() instanceof JDBCRelation) || this.context.getSparkExtensionVisitorWrapper().isDefinedAt(((LogicalRelation) logicalPlan).relation()) || ((LogicalRelation) logicalPlan).catalogTable().isDefined());
    }

    private boolean isSingleNodeLogicalPlan(LogicalPlan logicalPlan) {
        return this.context.getQueryExecution().map(queryExecution -> {
            return queryExecution.optimizedPlan();
        }).filter(logicalPlan2 -> {
            return logicalPlan2.equals(logicalPlan);
        }).isPresent() && (logicalPlan.children() == null || logicalPlan.children().isEmpty());
    }

    @Override // io.openlineage.spark.api.AbstractQueryPlanDatasetBuilder
    public List<D> apply(LogicalRelation logicalRelation) {
        throw new UnsupportedOperationException("apply(LogicalPlay) is not implemented");
    }

    @Override // io.openlineage.spark.api.AbstractQueryPlanDatasetBuilder
    public List<D> apply(SparkListenerEvent sparkListenerEvent, LogicalRelation logicalRelation) {
        if (this.context.getSparkExtensionVisitorWrapper().isDefinedAt(logicalRelation.relation())) {
            return new ExtensionLineageRelationHandler(this.context, this.datasetFactory).handleRelation(sparkListenerEvent, logicalRelation);
        }
        if (logicalRelation.catalogTable() != null && logicalRelation.catalogTable().isDefined()) {
            return handleCatalogTable(logicalRelation);
        }
        if (logicalRelation.relation() instanceof HadoopFsRelation) {
            return handleHadoopFsRelation(logicalRelation);
        }
        if (logicalRelation.relation() instanceof JDBCRelation) {
            return new JdbcRelationHandler(this.datasetFactory).handleRelation(logicalRelation);
        }
        throw new IllegalArgumentException("Expected logical plan to be either HadoopFsRelation, JDBCRelation, or CatalogTable but was " + logicalRelation);
    }

    private List<D> handleCatalogTable(LogicalRelation logicalRelation) {
        if (!this.context.getSparkSession().isPresent()) {
            return Collections.emptyList();
        }
        DatasetIdentifier fromCatalogTable = PathUtils.fromCatalogTable((CatalogTable) logicalRelation.catalogTable().get(), this.context.getSparkSession().get());
        OpenLineage.DatasetFacetsBuilder newDatasetFacetsBuilder = this.context.getOpenLineage().newDatasetFacetsBuilder();
        newDatasetFacetsBuilder.schema(PlanUtils.schemaFacet(this.context.getOpenLineage(), logicalRelation.schema()));
        newDatasetFacetsBuilder.dataSource(PlanUtils.datasourceFacet(this.context.getOpenLineage(), fromCatalogTable.getNamespace()));
        getDatasetVersion(logicalRelation).map(str -> {
            return newDatasetFacetsBuilder.version(this.context.getOpenLineage().newDatasetVersionDatasetFacet(str));
        });
        return Collections.singletonList(this.datasetFactory.getDataset(fromCatalogTable, newDatasetFacetsBuilder));
    }

    private List<D> handleHadoopFsRelation(LogicalRelation logicalRelation) {
        HadoopFsRelation relation = logicalRelation.relation();
        try {
            return (List) this.context.getSparkSession().map(sparkSession -> {
                Configuration newHadoopConfWithOptions = sparkSession.sessionState().newHadoopConfWithOptions(relation.options());
                OpenLineage.DatasetFacetsBuilder newDatasetFacetsBuilder = this.context.getOpenLineage().newDatasetFacetsBuilder();
                getDatasetVersion(logicalRelation).map(str -> {
                    return newDatasetFacetsBuilder.version(this.context.getOpenLineage().newDatasetVersionDatasetFacet(str));
                });
                List fromSeq = ScalaConversionUtils.fromSeq(relation.location().rootPaths());
                return isSingleFileRelation(fromSeq, newHadoopConfWithOptions) ? Collections.singletonList(this.datasetFactory.getDataset(fromSeq.stream().findFirst().get().toUri(), relation.schema(), newDatasetFacetsBuilder)) : (List) fromSeq.stream().map(path -> {
                    return PlanUtils.getDirectoryPath(path, newHadoopConfWithOptions);
                }).distinct().map(path2 -> {
                    return this.datasetFactory.getDataset(path2.toUri(), relation.schema(), newDatasetFacetsBuilder);
                }).collect(Collectors.toList());
            }).orElse(Collections.emptyList());
        } catch (Exception e) {
            ArrayList arrayList = new ArrayList();
            Iterator it = new ArrayList(ScalaConversionUtils.fromSeq(relation.location().rootPaths())).iterator();
            while (it.hasNext()) {
                arrayList.add(this.datasetFactory.getDataset(((Path) it.next()).toUri(), relation.schema()));
            }
            return arrayList.isEmpty() ? Collections.emptyList() : arrayList;
        }
    }

    private boolean isSingleFileRelation(Collection<Path> collection, Configuration configuration) {
        if (collection.size() != 1) {
            return false;
        }
        try {
            Path path = collection.stream().findFirst().get();
            return path.getFileSystem(configuration).isFile(path);
        } catch (IOException e) {
            return false;
        }
    }

    protected Optional<String> getDatasetVersion(LogicalRelation logicalRelation) {
        return Optional.empty();
    }
}
