package com.google.cloud.spark.bigquery.v2.context;

import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.TableDefinition;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import com.google.cloud.bigquery.connector.common.BigQueryClient;
import com.google.cloud.bigquery.connector.common.BigQueryClientFactory;
import com.google.cloud.bigquery.connector.common.BigQueryTracerFactory;
import com.google.cloud.bigquery.connector.common.BigQueryUtil;
import com.google.cloud.bigquery.connector.common.LazyInitializationSupplier;
import com.google.cloud.bigquery.connector.common.ReadSessionCreator;
import com.google.cloud.bigquery.connector.common.ReadSessionCreatorConfig;
import com.google.cloud.bigquery.connector.common.ReadSessionResponse;
import com.google.cloud.bigquery.storage.v1.DataFormat;
import com.google.cloud.bigquery.storage.v1.ReadSession;
import com.google.cloud.spark.bigquery.ReadRowsResponseToInternalRowIteratorConverter;
import com.google.cloud.spark.bigquery.SchemaConverters;
import com.google.cloud.spark.bigquery.SchemaConvertersConfiguration;
import com.google.cloud.spark.bigquery.SparkBigQueryConfig;
import com.google.cloud.spark.bigquery.SparkBigQueryUtil;
import com.google.cloud.spark.bigquery.SparkFilterUtils;
import com.google.cloud.spark.bigquery.direct.BigQueryRDDFactory;
import com.google.cloud.spark.bigquery.metrics.DataOrigin;
import com.google.cloud.spark.bigquery.metrics.SparkBigQueryReadSessionMetrics;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Streams;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalLong;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.sources.Filter;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.vectorized.ColumnarBatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/google/cloud/spark/bigquery/v2/context/BigQueryDataSourceReaderContext.class */
public class BigQueryDataSourceReaderContext {
    private static final Logger logger = LoggerFactory.getLogger(BigQueryDataSourceReaderContext.class);
    private static StatisticsContext UNKNOWN_STATISTICS = new StatisticsContext() { // from class: com.google.cloud.spark.bigquery.v2.context.BigQueryDataSourceReaderContext.1
        @Override // com.google.cloud.spark.bigquery.v2.context.StatisticsContext
        public OptionalLong sizeInBytes() {
            return OptionalLong.empty();
        }

        @Override // com.google.cloud.spark.bigquery.v2.context.StatisticsContext
        public OptionalLong numRows() {
            return OptionalLong.empty();
        }
    };
    private final TableInfo table;
    private final TableId tableId;
    private final ReadSessionCreatorConfig readSessionCreatorConfig;
    private final BigQueryClient bigQueryClient;
    private final BigQueryClientFactory bigQueryReadClientFactory;
    private final BigQueryTracerFactory bigQueryTracerFactory;
    private final ReadSessionCreator readSessionCreator;
    private final SparkBigQueryConfig options;
    private final SQLContext sqlContext;
    private final SparkSession sparkSession;
    private final BigQueryRDDFactory bigQueryRDDFactory;
    private final Optional<String> globalFilter;
    private final String applicationId;
    private Optional<StructType> schema;
    private Optional<StructType> userProvidedSchema;
    private Map<String, StructField> fields;
    private ImmutableList<String> selectedFields;
    private List<ArrowInputPartitionContext> plannedInputPartitionContexts;
    private LazyInitializationSupplier<ReadSessionResponse> readSessionResponse;
    private final BigQueryClient.ReadTableOptions readTableOptions;
    private final Set<Filter> pushedFilters = new HashSet();
    private Filter[] allFilters = new Filter[0];
    private final ExecutorService asyncReadSessionExecutor = Executors.newSingleThreadExecutor();
    private boolean isBuilt = false;

    public BigQueryDataSourceReaderContext(TableInfo tableInfo, BigQueryClient bigQueryClient, BigQueryClientFactory bigQueryClientFactory, BigQueryTracerFactory bigQueryTracerFactory, ReadSessionCreatorConfig readSessionCreatorConfig, Optional<String> optional, Optional<StructType> optional2, String str, SparkBigQueryConfig sparkBigQueryConfig, SQLContext sQLContext, SparkSession sparkSession, BigQueryClient.ReadTableOptions readTableOptions) {
        this.table = tableInfo;
        this.tableId = tableInfo.getTableId();
        this.readSessionCreatorConfig = readSessionCreatorConfig;
        this.bigQueryClient = bigQueryClient;
        this.bigQueryReadClientFactory = bigQueryClientFactory;
        this.bigQueryTracerFactory = bigQueryTracerFactory;
        this.sparkSession = sparkSession;
        this.readTableOptions = readTableOptions;
        this.readSessionCreator = new ReadSessionCreator(readSessionCreatorConfig, bigQueryClient, bigQueryClientFactory);
        this.globalFilter = optional;
        SchemaConverters from = SchemaConverters.from(SchemaConvertersConfiguration.from(sparkBigQueryConfig));
        StructType spark = from.toSpark(from.getSchemaWithPseudoColumns(tableInfo));
        if (optional2.isPresent()) {
            this.schema = optional2;
            this.userProvidedSchema = optional2;
        } else {
            this.schema = Optional.of(spark);
            this.userProvidedSchema = Optional.empty();
        }
        this.fields = new LinkedHashMap();
        for (StructField structField : spark.fields()) {
            this.fields.put(structField.name(), structField);
        }
        this.applicationId = str;
        this.options = sparkBigQueryConfig;
        this.sqlContext = sQLContext;
        this.bigQueryRDDFactory = new BigQueryRDDFactory(bigQueryClient, bigQueryClientFactory, this.bigQueryTracerFactory, sparkBigQueryConfig, sQLContext);
        resetReadSessionResponse();
    }

    private void resetReadSessionResponse() {
        this.readSessionResponse = new LazyInitializationSupplier<>(this::createReadSession);
    }

    public StructType readSchema() {
        return this.schema.orElseGet(() -> {
            SchemaConverters from = SchemaConverters.from(SchemaConvertersConfiguration.from(this.options));
            return from.toSpark(from.getSchemaWithPseudoColumns(this.table));
        });
    }

    public boolean enableBatchRead() {
        return this.readSessionCreatorConfig.getReadDataFormat() == DataFormat.ARROW && !isEmptySchema();
    }

    public Stream<InputPartitionContext<InternalRow>> planInputPartitionContexts() {
        return isEmptySchema() ? createEmptyProjectionPartitions() : ((ReadSessionResponse) this.readSessionResponse.get()).getReadSession().getStreamsList().stream().map(readStream -> {
            return new BigQueryInputPartitionContext(this.bigQueryReadClientFactory, readStream.getName(), this.readSessionCreatorConfig.toReadRowsHelperOptions(), createConverter(this.selectedFields, (ReadSessionResponse) this.readSessionResponse.get(), this.userProvidedSchema));
        });
    }

    public Optional<String> getCombinedFilter() {
        return BigQueryUtil.emptyIfNeeded(SparkFilterUtils.getCompiledFilter(this.readSessionCreatorConfig.getPushAllFilters(), this.readSessionCreatorConfig.getReadDataFormat(), this.globalFilter, (Filter[]) this.pushedFilters.toArray(new Filter[0])));
    }

    public DataOrigin getDataOrigin() {
        return this.readTableOptions.query().isPresent() ? DataOrigin.QUERY : (BigQueryUtil.isBigLakeManagedTable(this.table) || this.table.getDefinition().getType() == TableDefinition.Type.EXTERNAL) ? DataOrigin.BIGLAKE : this.table.getDefinition().getType() == TableDefinition.Type.MATERIALIZED_VIEW ? DataOrigin.VIEW : DataOrigin.TABLE;
    }

    public Stream<InputPartitionContext<ColumnarBatch>> planBatchInputPartitionContexts() {
        if (!enableBatchRead()) {
            throw new IllegalStateException("Batch reads should not be enabled");
        }
        ReadSession readSession = ((ReadSessionResponse) this.readSessionResponse.get()).getReadSession();
        SparkBigQueryReadSessionMetrics from = SparkBigQueryReadSessionMetrics.from(this.sparkSession, readSession, System.currentTimeMillis(), this.readSessionCreatorConfig.getReadDataFormat(), getDataOrigin(), readSession.getStreamsCount());
        this.sparkSession.sparkContext().addSparkListener(from);
        ImmutableList<String> immutableList = this.selectedFields;
        if (immutableList.isEmpty()) {
            immutableList = (ImmutableList) SchemaConverters.from(SchemaConvertersConfiguration.from(this.options)).getSchemaWithPseudoColumns(((ReadSessionResponse) this.readSessionResponse.get()).getReadTableInfo()).getFields().stream().map((v0) -> {
                return v0.getName();
            }).collect(ImmutableList.toImmutableList());
        }
        ImmutableList<String> immutableList2 = immutableList;
        Optional of = Optional.of(this.userProvidedSchema.orElse(readSchema()));
        this.plannedInputPartitionContexts = (List) Streams.stream(Iterables.partition(readSession.getStreamsList(), this.readSessionCreatorConfig.streamsPerPartition())).map(list -> {
            return new ArrowInputPartitionContext(this.bigQueryReadClientFactory, this.bigQueryTracerFactory, (List) list.stream().map((v0) -> {
                return v0.getName();
            }).collect(Collectors.toCollection(ArrayList::new)), this.readSessionCreatorConfig.toReadRowsHelperOptions(), immutableList2, (ReadSessionResponse) this.readSessionResponse.get(), of, from);
        }).collect(Collectors.toList());
        return this.plannedInputPartitionContexts.stream().map(arrowInputPartitionContext -> {
            return arrowInputPartitionContext;
        });
    }

    private boolean isEmptySchema() {
        return ((Boolean) this.schema.map((v0) -> {
            return v0.isEmpty();
        }).orElse(false)).booleanValue();
    }

    private ReadRowsResponseToInternalRowIteratorConverter createConverter(ImmutableList<String> immutableList, ReadSessionResponse readSessionResponse, Optional<StructType> optional) {
        if (this.readSessionCreatorConfig.getReadDataFormat() != DataFormat.AVRO) {
            throw new IllegalArgumentException("No known converted for " + this.readSessionCreatorConfig.getReadDataFormat());
        }
        Schema schemaWithPseudoColumns = SchemaConverters.from(SchemaConvertersConfiguration.from(this.options)).getSchemaWithPseudoColumns(readSessionResponse.getReadTableInfo());
        if (immutableList.isEmpty()) {
            immutableList = (ImmutableList) schemaWithPseudoColumns.getFields().stream().map((v0) -> {
                return v0.getName();
            }).collect(ImmutableList.toImmutableList());
        } else {
            ImmutableSet copyOf = ImmutableSet.copyOf(immutableList);
            schemaWithPseudoColumns = Schema.of((Iterable) schemaWithPseudoColumns.getFields().stream().filter(field -> {
                return copyOf.contains(field.getName());
            }).collect(Collectors.toList()));
        }
        return ReadRowsResponseToInternalRowIteratorConverter.avro(schemaWithPseudoColumns, immutableList, readSessionResponse.getReadSession().getAvroSchema().getSchema(), optional, Optional.empty(), SchemaConvertersConfiguration.from(this.options));
    }

    private ReadSessionResponse createReadSession() {
        this.selectedFields = (ImmutableList) this.schema.map(structType -> {
            return ImmutableList.copyOf(structType.fieldNames());
        }).orElse(ImmutableList.copyOf(this.fields.keySet()));
        ReadSessionResponse create = this.readSessionCreator.create(this.tableId, this.selectedFields, getCombinedFilter());
        logger.info("Got read session for {}: {} for application id: {}", new Object[]{this.tableId.toString(), create.getReadSession().getName(), this.applicationId});
        return create;
    }

    Stream<InputPartitionContext<InternalRow>> createEmptyProjectionPartitions() {
        long calculateTableSize = this.bigQueryClient.calculateTableSize(this.tableId, getCombinedFilter());
        logger.info("Used optimized BQ count(*) path. Count: " + calculateTableSize);
        int defaultParallelism = this.readSessionCreatorConfig.getDefaultParallelism();
        int i = (int) (calculateTableSize / defaultParallelism);
        InputPartitionContext[] inputPartitionContextArr = (InputPartitionContext[]) IntStream.range(0, defaultParallelism).mapToObj(i2 -> {
            return new EmptyProjectionInputPartitionContext(i);
        }).toArray(i3 -> {
            return new EmptyProjectionInputPartitionContext[i3];
        });
        inputPartitionContextArr[0] = new EmptyProjectionInputPartitionContext(i + ((int) (calculateTableSize % defaultParallelism)));
        return Stream.of((Object[]) inputPartitionContextArr);
    }

    public Filter[] pushFilters(Filter[] filterArr) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (Filter filter : filterArr) {
            if (SparkFilterUtils.isTopLevelFieldHandled(this.readSessionCreatorConfig.getPushAllFilters(), filter, this.readSessionCreatorConfig.getReadDataFormat(), this.fields)) {
                arrayList.add(filter);
            } else {
                arrayList2.add(filter);
            }
        }
        this.allFilters = filterArr;
        this.pushedFilters.addAll(arrayList);
        return (Filter[]) arrayList2.stream().toArray(i -> {
            return new Filter[i];
        });
    }

    public Filter[] pushedFilters() {
        return (Filter[]) this.pushedFilters.toArray(new Filter[0]);
    }

    public Filter[] getAllFilters() {
        return this.allFilters;
    }

    public Optional<List<ArrowInputPartitionContext>> filter(Filter[] filterArr) {
        logger.info(String.format("Use Dynamic Partition Pruning runtime filters: %s", filterArr));
        if (this.plannedInputPartitionContexts == null) {
            logger.error("Should have planned partitions.");
            return Optional.empty();
        }
        Collection<? extends Filter> extractPartitionAndClusteringFilters = SparkBigQueryUtil.extractPartitionAndClusteringFilters(this.table, ImmutableList.copyOf(filterArr));
        if (extractPartitionAndClusteringFilters.isEmpty()) {
            logger.info("Could not find filters for partition of clustering field for table {}, aborting DPP filter", BigQueryUtil.friendlyTableName(this.tableId));
            return Optional.empty();
        }
        this.pushedFilters.addAll(extractPartitionAndClusteringFilters);
        if (!BigQueryUtil.filterLengthInLimit(getCombinedFilter())) {
            logger.warn("New filter for Dynamic Partition Pruning is too large, skipping partition pruning");
            return Optional.empty();
        }
        List<ArrowInputPartitionContext> list = this.plannedInputPartitionContexts;
        resetReadSessionResponse();
        planBatchInputPartitionContexts();
        if (this.plannedInputPartitionContexts.size() > list.size()) {
            logger.warn(String.format("New partitions should not be more than originally planned. Previously had %d streams, now has %d.", Integer.valueOf(list.size()), Integer.valueOf(this.plannedInputPartitionContexts.size())));
            return Optional.of(this.plannedInputPartitionContexts);
        }
        logger.info(String.format("Use Dynamic Partition Pruning, originally planned %d, adjust to %d partitions", Integer.valueOf(list.size()), Integer.valueOf(this.plannedInputPartitionContexts.size())));
        return Optional.of(this.plannedInputPartitionContexts);
    }

    public void pruneColumns(StructType structType) {
        this.schema = this.schema.map(structType2 -> {
            ImmutableSet copyOf = ImmutableSet.copyOf(structType.fieldNames());
            StructType structType2 = new StructType();
            for (StructField structField : structType2.fields()) {
                if (copyOf.contains(structField.name())) {
                    structType2 = structType2.add(structField);
                }
            }
            return structType2;
        });
    }

    public StatisticsContext estimateStatistics() {
        long longValue;
        long longValue2;
        final boolean isBigLakeManagedTable = BigQueryUtil.isBigLakeManagedTable(this.table);
        if (BigQueryUtil.isBigQueryNativeTable(this.table)) {
            if (this.isBuilt) {
                longValue = ((ReadSessionResponse) this.readSessionResponse.get()).getReadSession().getEstimatedTotalBytesScanned();
                longValue2 = ((ReadSessionResponse) this.readSessionResponse.get()).getReadSession().getEstimatedRowCount();
            } else {
                longValue = this.table.getNumBytes().longValue();
                longValue2 = this.table.getNumRows().longValue();
            }
            final long j = longValue;
            final long j2 = longValue2;
            return new StatisticsContext() { // from class: com.google.cloud.spark.bigquery.v2.context.BigQueryDataSourceReaderContext.2
                @Override // com.google.cloud.spark.bigquery.v2.context.StatisticsContext
                public OptionalLong sizeInBytes() {
                    return OptionalLong.of(j);
                }

                @Override // com.google.cloud.spark.bigquery.v2.context.StatisticsContext
                public OptionalLong numRows() {
                    return OptionalLong.of(j2);
                }
            };
        }
        if (this.table.getDefinition().getType() != TableDefinition.Type.EXTERNAL && !isBigLakeManagedTable) {
            return UNKNOWN_STATISTICS;
        }
        ReadSession readSession = ((ReadSessionResponse) this.readSessionResponse.get()).getReadSession();
        long estimatedTotalPhysicalFileSize = readSession.getEstimatedTotalPhysicalFileSize();
        final long estimatedRowCount = readSession.getEstimatedRowCount();
        long rowSize = getRowSize(this.fields.values());
        long longValue3 = (estimatedTotalPhysicalFileSize * ((Long) this.schema.map(structType -> {
            return Long.valueOf(getRowSize(Arrays.asList(structType.fields())));
        }).orElse(Long.valueOf(rowSize))).longValue()) / rowSize;
        final OptionalLong empty = longValue3 == 0 ? OptionalLong.empty() : OptionalLong.of(longValue3);
        return new StatisticsContext() { // from class: com.google.cloud.spark.bigquery.v2.context.BigQueryDataSourceReaderContext.3
            @Override // com.google.cloud.spark.bigquery.v2.context.StatisticsContext
            public OptionalLong sizeInBytes() {
                return empty;
            }

            @Override // com.google.cloud.spark.bigquery.v2.context.StatisticsContext
            public OptionalLong numRows() {
                return isBigLakeManagedTable ? OptionalLong.of(estimatedRowCount) : OptionalLong.empty();
            }
        };
    }

    public String getTableName() {
        return this.tableId.getTable();
    }

    public String getFullTableName() {
        return BigQueryUtil.friendlyTableName(this.tableId);
    }

    public TableId getTableId() {
        return this.tableId;
    }

    public BigQueryRDDFactory getBigQueryRddFactory() {
        return this.bigQueryRDDFactory;
    }

    public TableInfo getTableInfo() {
        return this.table;
    }

    public void build() {
        this.asyncReadSessionExecutor.submit(() -> {
            return (ReadSessionResponse) this.readSessionResponse.get();
        });
        this.asyncReadSessionExecutor.shutdown();
        this.isBuilt = true;
    }

    private long getRowSize(Collection<StructField> collection) {
        return 8 + ((Long) collection.stream().map(structField -> {
            return Long.valueOf(structField.dataType().defaultSize());
        }).reduce(0L, (v0, v1) -> {
            return Long.sum(v0, v1);
        })).longValue();
    }

    public String getReadSessionId() {
        return this.readSessionResponse.isInitialized() ? ((ReadSessionResponse) this.readSessionResponse.get()).getReadSession().getName() : "N/A";
    }
}
