package org.broadinstitute.hellbender.tools.genomicsdb;

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.CloseableTribbleIterator;
import htsjdk.tribble.FeatureReader;
import htsjdk.tribble.TribbleException;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFUtils;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import org.broadinstitute.barclay.argparser.Advanced;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.programgroups.ShortVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.engine.GATKTool;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.genomicsdb.GenomicsDBUtils;
import org.genomicsdb.importer.GenomicsDBImporter;
import org.genomicsdb.model.BatchCompletionCallbackFunctionArgument;
import org.genomicsdb.model.Coordinates;
import org.genomicsdb.model.GenomicsDBImportConfiguration;
import org.genomicsdb.model.GenomicsDBVidMapProto;
import org.genomicsdb.model.ImportConfig;

@DocumentedFeature
@CommandLineProgramProperties(summary = "Import VCFs to GenomicsDB", oneLineSummary = "Import VCFs to GenomicsDB", programGroup = ShortVariantDiscoveryProgramGroup.class)
/* loaded from: input_file:org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport.class */
public final class GenomicsDBImport extends GATKTool {
    private static final long DEFAULT_VCF_BUFFER_SIZE_PER_SAMPLE = 16384;
    private static final long DEFAULT_SEGMENT_SIZE = 1048576;
    private static final int DEFAULT_ZERO_BATCH_SIZE = 0;
    public static final String WORKSPACE_ARG_LONG_NAME = "genomicsdb-workspace-path";
    public static final String INCREMENTAL_WORKSPACE_ARG_LONG_NAME = "genomicsdb-update-workspace-path";
    public static final String SEGMENT_SIZE_ARG_LONG_NAME = "genomicsdb-segment-size";
    public static final String OVERWRITE_WORKSPACE_LONG_NAME = "overwrite-existing-genomicsdb-workspace";
    public static final String INTERVAL_LIST_LONG_NAME = "output-interval-list-to-file";
    public static final String VCF_BUFFER_SIZE_ARG_NAME = "genomicsdb-vcf-buffer-size";
    public static final String BATCHSIZE_ARG_LONG_NAME = "batch-size";
    public static final String CONSOLIDATE_ARG_NAME = "consolidate";
    public static final String SAMPLE_NAME_MAP_LONG_NAME = "sample-name-map";
    public static final String VALIDATE_SAMPLE_MAP_LONG_NAME = "validate-sample-name-map";
    public static final String MERGE_INPUT_INTERVALS_LONG_NAME = "merge-input-intervals";
    public static final String VCF_INITIALIZER_THREADS_LONG_NAME = "reader-threads";
    public static final String MAX_NUM_INTERVALS_TO_IMPORT_IN_PARALLEL = "max-num-intervals-to-import-in-parallel";
    public static final String MERGE_CONTIGS_INTO_NUM_PARTITIONS = "merge-contigs-into-num-partitions";
    public static final String BYPASS_FEATURE_READER = "bypass-feature-reader";
    public static final String VCF_HEADER_OVERRIDE = "header";
    public static final int INTERVAL_LIST_SIZE_WARNING_THRESHOLD = 100;
    public static final int ARRAY_COLUMN_BOUNDS_START = 0;
    public static final int ARRAY_COLUMN_BOUNDS_END = 1;
    public static final String SHARED_POSIXFS_OPTIMIZATIONS = "genomicsdb-shared-posixfs-optimizations";
    public static final String USE_GCS_HDFS_CONNECTOR = "genomicsdb-use-gcs-hdfs-connector";
    public static final String AVOID_NIO = "avoid-nio";

    @Argument(fullName = WORKSPACE_ARG_LONG_NAME, doc = "Workspace for GenomicsDB. Can be a POSIX file system absolute or relative path or a HDFS/GCS URL. Use this argument when creating a new GenomicsDB workspace. Either this or genomicsdb-update-workspace-path must be specified. Must be an empty or non-existent directory.", mutex = {INCREMENTAL_WORKSPACE_ARG_LONG_NAME, INTERVAL_LIST_LONG_NAME})
    private String workspace;

    @Argument(fullName = INCREMENTAL_WORKSPACE_ARG_LONG_NAME, doc = "Workspace when updating GenomicsDB. Can be a POSIX file system absolute or relative path or a HDFS/GCS URL. Use this argument when adding new samples to an existing GenomicsDB workspace or when using the output-interval-list-to-file option. Either this or genomicsdb-workspace-path must be specified. Must point to an existing workspace.", mutex = {WORKSPACE_ARG_LONG_NAME, VCF_HEADER_OVERRIDE})
    private String incrementalImportWorkspace;

    @Argument(fullName = StandardArgumentDefinitions.VARIANT_LONG_NAME, shortName = StandardArgumentDefinitions.VARIANT_SHORT_NAME, doc = "GVCF files to be imported to GenomicsDB. Each file must contain data for only a single sample. Either this or sample-name-map must be specified.", optional = true, mutex = {SAMPLE_NAME_MAP_LONG_NAME, AVOID_NIO})
    private List<String> variantPaths;

    @Advanced
    @Argument(fullName = SAMPLE_NAME_MAP_LONG_NAME, doc = "Path to file containing a mapping of sample name to file uri in tab delimited format.  If this is specified then the header from the first sample will be treated as the merged header rather than merging the headers, and the sample names will be taken from this file.  This may be used to rename input samples. This is a performance optimization that relaxes the normal checks for consistent headers.  Using vcfs with incompatible headers may result in silent data corruption.", optional = true, mutex = {StandardArgumentDefinitions.VARIANT_LONG_NAME})
    private String sampleNameMapFile;

    @Argument(fullName = INTERVAL_LIST_LONG_NAME, doc = "Path to output file where intervals from existing workspace should be written.If this option is specified, the tools outputs the interval_list of the workspace pointed to by genomicsdb-update-workspace-path at the path specified here in a Picard-style interval_list with a sequence dictionary header", optional = true, mutex = {WORKSPACE_ARG_LONG_NAME})
    private String intervalListOutputPathString;
    private ExecutorService inputPreloadExecutorService;
    private List<SimpleInterval> intervals;
    private SampleNameMap sampleNameMap;
    private SAMSequenceDictionary mergedHeaderSequenceDictionary;
    private String vidMapJSONFile;
    private String callsetMapJSONFile;
    private String vcfHeaderFile;

    @Argument(fullName = SEGMENT_SIZE_ARG_LONG_NAME, doc = "Buffer size in bytes allocated for GenomicsDB attributes during import. Should be large enough to hold data from one site. ", optional = true)
    private long segmentSize = DEFAULT_SEGMENT_SIZE;

    @Argument(fullName = VCF_BUFFER_SIZE_ARG_NAME, doc = "Buffer size in bytes to store variant contexts. Larger values are better as smaller values cause frequent disk writes. Defaults to 16384 which was empirically determined to work well for many inputs.", optional = true, minValue = 1024.0d, minRecommendedValue = 10240.0d)
    private long vcfBufferSizePerSample = DEFAULT_VCF_BUFFER_SIZE_PER_SAMPLE;

    @Argument(fullName = OVERWRITE_WORKSPACE_LONG_NAME, doc = "Will overwrite given workspace if it exists. Otherwise a new workspace is created. Cannot be set to true if genomicsdb-update-workspace-path is also set. Defaults to false", optional = true)
    private Boolean overwriteExistingWorkspace = false;

    @Argument(fullName = BATCHSIZE_ARG_LONG_NAME, doc = "Batch size controls the number of samples for which readers are open at once and therefore provides a way to minimize memory consumption. However, it can take longer to complete. Use the consolidate flag if more than a hundred batches were used. This will improve feature read time. batchSize=0 means no batching (i.e. readers for all samples will be opened at once) Defaults to 0", optional = true)
    private int batchSize = 0;

    @Argument(fullName = CONSOLIDATE_ARG_NAME, doc = "Boolean flag to enable consolidation. If importing data in batches, a new fragment is created for each batch. In case thousands of fragments are created, GenomicsDB feature readers will try to open ~20x as many files. Also, internally GenomicsDB would consume more memory to maintain bookkeeping data from all fragments. Use this flag to merge all fragments into one. Merging can potentially improve read performance, however overall benefit might not be noticeable as the top Java layers have significantly higher overheads. This flag has no effect if only one batch is used. Defaults to false", optional = true)
    private Boolean doConsolidation = false;

    @Argument(fullName = VALIDATE_SAMPLE_MAP_LONG_NAME, doc = "Boolean flag to enable checks on the sampleNameMap file. If true, tool checks whetherfeature readers are valid and shows a warning if sample names do not match with the headers. Defaults to false", optional = true)
    private Boolean validateSampleToReaderMap = false;

    @Argument(fullName = MERGE_INPUT_INTERVALS_LONG_NAME, doc = "Boolean flag to import all data in between intervals.  Improves performance using large lists of intervals, as in exome sequencing, especially if GVCF data only exists for specified intervals.")
    private boolean mergeInputIntervals = false;

    @Advanced
    @Argument(fullName = "reader-threads", doc = "How many simultaneous threads to use when opening VCFs in batches; higher values may improve performance when network latency is an issue. Multiple reader threads are not supported when running with multiple intervals.", optional = true, minValue = 1.0d)
    private int vcfInitializerThreads = 1;

    @Advanced
    @Argument(fullName = MAX_NUM_INTERVALS_TO_IMPORT_IN_PARALLEL, doc = "Max number of intervals to import in parallel; higher values may improve performance, but require more memory and a higher number of file descriptors open at the same time", optional = true, minValue = 1.0d)
    private int maxNumIntervalsToImportInParallel = 1;

    @Advanced
    @Argument(fullName = MERGE_CONTIGS_INTO_NUM_PARTITIONS, shortName = MERGE_CONTIGS_INTO_NUM_PARTITIONS, doc = "Number of GenomicsDB arrays to merge input intervals into. Defaults to 0, which disables this merging. This option can only be used if entire contigs are specified as intervals. The tool will not split up a contig into multiple arrays, which means the actual number of partitions may be less than what is specified for this argument. This can improve performance in the case where the user is trying to import a very large number of contigs - larger than 100", optional = true, minValue = 0.0d)
    private int mergeContigsIntoNumPartitions = 0;

    @Argument(fullName = "genomicsdb-shared-posixfs-optimizations", doc = "Allow for optimizations to improve the usability and performance for shared Posix Filesystems(e.g. NFS, Lustre). If set, file level locking is disabled and file system writes are minimized by keeping a higher number of file descriptors open for longer periods of time. Use with batch-size option if keeping a large number of file descriptors open is an issue", optional = true)
    private boolean sharedPosixFSOptimizations = false;

    @Argument(fullName = VCF_HEADER_OVERRIDE, doc = "Specify a vcf file to use instead of reading and combining headers from the input vcfs", optional = true, mutex = {INCREMENTAL_WORKSPACE_ARG_LONG_NAME})
    private FeatureInput<VariantContext> headerOverride = null;

    @Argument(fullName = BYPASS_FEATURE_READER, doc = "Use htslib to read input VCFs instead of GATK's FeatureReader. This will reduce memory usage and potentially speed up the import. Lower memory requirements may also enable parallelism through max-num-intervals-to-import-in-parallel. To enable this option, VCFs must be normalized, block-compressed and indexed.", optional = true)
    private boolean bypassFeatureReader = false;

    @Argument(fullName = AVOID_NIO, doc = "Do not attempt to open the input vcf file paths in java.  This can only be used with bypass-feature-reader.  It allows operating on file systems which GenomicsDB understands how to open but GATK does not.  This will disable many of the sanity checks.", mutex = {StandardArgumentDefinitions.VARIANT_LONG_NAME})
    @Advanced
    private boolean avoidNio = false;

    @Argument(fullName = "genomicsdb-use-gcs-hdfs-connector", doc = "Use the GCS HDFS Connector instead of the native GCS SDK client with gs:// URLs.", optional = true)
    public boolean useGcsHdfsConnector = false;
    private Set<VCFHeaderLine> mergedHeaderLines = null;
    private int batchCount = 1;
    private Boolean doIncrementalImport = false;
    private Boolean getIntervalsFromExistingWorkspace = false;

    /* loaded from: input_file:org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport$InitializedQueryWrapper.class */
    private static final class InitializedQueryWrapper implements FeatureReader<VariantContext> {
        private final FeatureReader<VariantContext> reader;
        private final SimpleInterval interval;
        private CloseableTribbleIterator<VariantContext> query;

        private InitializedQueryWrapper(FeatureReader<VariantContext> featureReader, Locatable locatable) throws IOException {
            this.reader = featureReader;
            this.interval = new SimpleInterval(locatable);
            this.query = featureReader.query(locatable.getContig(), locatable.getStart(), locatable.getEnd());
        }

        public CloseableTribbleIterator<VariantContext> query(String str, int i, int i2) {
            SimpleInterval simpleInterval = new SimpleInterval(str, i, i2);
            if (!this.interval.equals(simpleInterval)) {
                throw new GATKException("Cannot call query with different interval, expected:" + this.interval + " queried with: " + simpleInterval);
            }
            if (this.query == null) {
                throw new GATKException("Cannot call query twice on this wrapper.");
            }
            CloseableTribbleIterator<VariantContext> closeableTribbleIterator = this.query;
            this.query = null;
            return closeableTribbleIterator;
        }

        public CloseableTribbleIterator<VariantContext> iterator() {
            throw new UnsupportedOperationException("iterator() not supported, this should not have been called and indicates an issue with GenomicsDB integration");
        }

        public void close() throws IOException {
            this.reader.close();
        }

        public List<String> getSequenceNames() {
            throw new UnsupportedOperationException("getSequenceNames() not supported, this should not have been called and indicates an issue with GenomicsDB integration");
        }

        public Object getHeader() {
            return this.reader.getHeader();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport$UnableToCreateGenomicsDBWorkspace.class */
    public static class UnableToCreateGenomicsDBWorkspace extends UserException {
        private static final long serialVersionUID = 1;

        UnableToCreateGenomicsDBWorkspace(String str) {
            super(str);
        }
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    protected List<SimpleInterval> transformTraversalIntervals(List<SimpleInterval> list, SAMSequenceDictionary sAMSequenceDictionary) {
        return this.mergeInputIntervals ? IntervalUtils.getSpanningIntervals(list, sAMSequenceDictionary) : list;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public boolean disableProgressMeter() {
        return true;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public int getDefaultCloudPrefetchBufferSize() {
        return 0;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public int getDefaultCloudIndexPrefetchBufferSize() {
        return 0;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public String getProgressMeterRecordLabel() {
        return "batches";
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool, org.broadinstitute.hellbender.cmdline.CommandLineProgram
    public void onStartup() {
        initializeWorkspaceAndToolMode();
        assertVariantPathsOrSampleNameFileWasSpecified();
        assertOverwriteWorkspaceAndIncrementalImportMutuallyExclusive();
        assertAvoidNioConditionsAreValid();
        initializeHeaderAndSampleMappings();
        initializeIntervals();
        super.onStartup();
    }

    private void initializeWorkspaceAndToolMode() {
        if (this.incrementalImportWorkspace != null && !this.incrementalImportWorkspace.isEmpty()) {
            this.doIncrementalImport = true;
            this.workspace = this.incrementalImportWorkspace;
            if (this.intervalListOutputPathString != null && !this.intervalListOutputPathString.isEmpty()) {
                this.getIntervalsFromExistingWorkspace = true;
            }
        }
        if (this.useGcsHdfsConnector) {
            GenomicsDBUtils.useGcsHdfsConnector(true);
        }
    }

    private void assertOverwriteWorkspaceAndIncrementalImportMutuallyExclusive() {
        if (this.overwriteExistingWorkspace.booleanValue() && this.doIncrementalImport.booleanValue()) {
            throw new CommandLineException("overwrite-existing-genomicsdb-workspace cannot be set to true when genomicsdb-update-workspace-path is set");
        }
    }

    private void assertVariantPathsOrSampleNameFileWasSpecified() {
        if ((this.variantPaths == null || this.variantPaths.isEmpty()) && this.sampleNameMapFile == null && !this.getIntervalsFromExistingWorkspace.booleanValue()) {
            throw new CommandLineException.MissingArgument(StandardArgumentDefinitions.VARIANT_LONG_NAME, "One of --variant or --sample-name-map must be specified");
        }
    }

    private void assertAvoidNioConditionsAreValid() {
        if (this.avoidNio) {
            if (!this.bypassFeatureReader || this.headerOverride == null) {
                ArrayList arrayList = new ArrayList();
                if (!this.bypassFeatureReader) {
                    arrayList.add(BYPASS_FEATURE_READER);
                }
                if (this.headerOverride == null) {
                    arrayList.add(VCF_HEADER_OVERRIDE);
                }
                throw new CommandLineException.MissingArgument(String.join(" and ", arrayList), "If --avoid-nio is set then --bypass-feature-reader and --header must also be specified.");
            }
        }
    }

    private static void assertIntervalsCoverEntireContigs(GenomicsDBImporter genomicsDBImporter, List<SimpleInterval> list) {
        GenomicsDBVidMapProto.VidMappingPB protobufVidMapping = genomicsDBImporter.getProtobufVidMapping();
        if (protobufVidMapping == null) {
            throw new UserException("Could not get protobuf vid mappping object from GenomicsDBImporter");
        }
        Map map = (Map) protobufVidMapping.getContigsList().stream().collect(Collectors.toMap(chromosome -> {
            return chromosome.getName();
        }, chromosome2 -> {
            return chromosome2;
        }));
        for (SimpleInterval simpleInterval : list) {
            GenomicsDBVidMapProto.Chromosome chromosome3 = (GenomicsDBVidMapProto.Chromosome) map.get(simpleInterval.getContig());
            long length = chromosome3.getLength();
            if (simpleInterval.getStart() != 1 || simpleInterval.getEnd() < length) {
                throw new UserException("--merge-contigs-into-num-partitions requires that entire contigs be specified for input intervals. Input interval contained: " + String.format("Contig:%s, Start:%d, End:%d", simpleInterval.getContig(), Integer.valueOf(simpleInterval.getStart()), Integer.valueOf(simpleInterval.getEnd())) + " while reference contig was: " + String.format("Contig:%s, Start:%d, End:%d", chromosome3.getName(), 1, Long.valueOf(chromosome3.getLength())));
            }
        }
    }

    private void initializeHeaderAndSampleMappings() {
        if (this.variantPaths != null && !this.variantPaths.isEmpty()) {
            ArrayList arrayList = new ArrayList(this.variantPaths.size());
            this.sampleNameMap = new SampleNameMap();
            if (this.headerOverride == null) {
                for (String str : this.variantPaths) {
                    Path path = IOUtils.getPath(str);
                    if (this.bypassFeatureReader) {
                        GATKGenomicsDBUtils.assertVariantFileIsCompressedAndIndexed(path);
                    }
                    VCFHeader headerFromPath = getHeaderFromPath(path);
                    Utils.validate(headerFromPath != null, "Null header was found in " + path + ".");
                    assertGVCFHasOnlyOneSample(str, headerFromPath);
                    arrayList.add(headerFromPath);
                    try {
                        this.sampleNameMap.addSample((String) headerFromPath.getGenotypeSamples().get(0), new URI(str));
                    } catch (URISyntaxException e) {
                        throw new UserException("Malformed URI " + e.getMessage(), e);
                    }
                }
                this.mergedHeaderLines = VCFUtils.smartMergeHeaders(arrayList, true);
                this.mergedHeaderSequenceDictionary = new VCFHeader(this.mergedHeaderLines).getSequenceDictionary();
            } else {
                VCFHeader headerFromPath2 = getHeaderFromPath(this.headerOverride.toPath());
                this.mergedHeaderLines = new LinkedHashSet(headerFromPath2.getMetaDataInInputOrder());
                this.mergedHeaderSequenceDictionary = headerFromPath2.getSequenceDictionary();
            }
            this.mergedHeaderLines.addAll(getDefaultToolVCFHeaderLines());
        } else if (this.sampleNameMapFile != null) {
            this.sampleNameMap = new SampleNameMap(IOUtils.getPath(this.sampleNameMapFile), this.bypassFeatureReader && !this.avoidNio);
            VCFHeader headerFromPath3 = this.headerOverride == null ? getHeaderFromPath(this.sampleNameMap.getVCFForSampleAsPath(this.sampleNameMap.getSampleNameToVcfPath().entrySet().iterator().next().getKey())) : getHeaderFromPath(this.headerOverride.toPath());
            this.mergedHeaderLines = new LinkedHashSet(headerFromPath3.getMetaDataInInputOrder());
            this.mergedHeaderSequenceDictionary = headerFromPath3.getSequenceDictionary();
            this.mergedHeaderLines.addAll(getDefaultToolVCFHeaderLines());
        } else {
            if (!this.getIntervalsFromExistingWorkspace.booleanValue()) {
                throw new UserException("variant or sample-name-map must be specified unless output-interval-list-to-file is specified");
            }
            String appendPathToDir = IOUtils.appendPathToDir(this.workspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);
            IOUtils.assertPathsAreReadable(appendPathToDir);
            String readEntireFile = GenomicsDBUtils.readEntireFile(appendPathToDir);
            try {
                File createTempFile = IOUtils.createTempFile("tempheader", ".vcf");
                Files.writeString(createTempFile.toPath(), readEntireFile, new OpenOption[0]);
                this.mergedHeaderSequenceDictionary = VCFFileReader.getSequenceDictionary(createTempFile);
            } catch (IOException e2) {
                throw new UserException("Unable to create temporary header file to get sequence dictionary", e2);
            }
        }
        if (this.mergedHeaderSequenceDictionary == null) {
            throw new UserException("The merged vcf header has no sequence dictionary. Please provide a header that contains a sequence dictionary.");
        }
        if (this.sampleNameMap != null && this.sampleNameMap.indicesSpecified() && this.bypassFeatureReader) {
            throw new UserException("Indices were specified for some VCFs in the sample name map file, but --bypass-feature-reader was also specified. Specifying explicit indices is not supported when running with --bypass-feature-reader");
        }
    }

    private VCFHeader getHeaderFromPath(Path path) {
        try {
            AbstractFeatureReader featureReader = AbstractFeatureReader.getFeatureReader(path.toAbsolutePath().toUri().toString(), (String) null, new VCFCodec(), false, BucketUtils.getPrefetchingWrapper(this.cloudPrefetchBuffer), BucketUtils.getPrefetchingWrapper(this.cloudIndexPrefetchBuffer));
            try {
                VCFHeader vCFHeader = (VCFHeader) featureReader.getHeader();
                if (featureReader != null) {
                    featureReader.close();
                }
                return vCFHeader;
            } finally {
            }
        } catch (IOException e) {
            throw new UserException("Error while reading vcf header from " + path.toUri(), e);
        }
    }

    private static void assertGVCFHasOnlyOneSample(String str, VCFHeader vCFHeader) {
        int nGenotypeSamples = vCFHeader.getNGenotypeSamples();
        if (nGenotypeSamples != 1) {
            throw new UserException("Input GVCF: " + str + " was expected to contain a single sample but actually contained " + nGenotypeSamples + " samples.");
        }
    }

    private void writeIntervalListToFile() {
        IntervalList intervalList = new IntervalList(getBestAvailableSequenceDictionary());
        this.intervals.forEach(simpleInterval -> {
            intervalList.add(new Interval(simpleInterval));
        });
        intervalList.write(IOUtils.getPath(this.intervalListOutputPathString));
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void onTraversalStart() {
        String overwriteCreateOrCheckWorkspace = overwriteCreateOrCheckWorkspace();
        this.vidMapJSONFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(overwriteCreateOrCheckWorkspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME);
        this.callsetMapJSONFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(overwriteCreateOrCheckWorkspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME);
        this.vcfHeaderFile = GATKGenomicsDBUtils.genomicsDBApppendPaths(overwriteCreateOrCheckWorkspace, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);
        if (this.getIntervalsFromExistingWorkspace.booleanValue()) {
            this.logger.info("Interval file list will be written out to " + this.intervalListOutputPathString);
        } else if (this.doIncrementalImport.booleanValue()) {
            this.logger.info("Callset Map JSON file will be re-written to " + this.callsetMapJSONFile);
            this.logger.info("Incrementally importing to workspace - " + overwriteCreateOrCheckWorkspace);
        } else {
            this.logger.info("Vid Map JSON file will be written to " + this.vidMapJSONFile);
            this.logger.info("Callset Map JSON file will be written to " + this.callsetMapJSONFile);
            this.logger.info("Complete VCF Header will be written to " + this.vcfHeaderFile);
            this.logger.info("Importing to workspace - " + overwriteCreateOrCheckWorkspace);
        }
        initializeInputPreloadExecutorService();
    }

    private void initializeInputPreloadExecutorService() {
        if (this.vcfInitializerThreads <= 1) {
            this.inputPreloadExecutorService = null;
            return;
        }
        if (this.intervals == null || this.intervals.size() != 1) {
            this.logger.warn("GenomicsDBImport cannot use multiple VCF reader threads for initialization when the number of intervals is greater than 1. Falling back to serial VCF reader initialization.");
            this.inputPreloadExecutorService = null;
        } else {
            this.inputPreloadExecutorService = Executors.newFixedThreadPool(this.vcfInitializerThreads, new ThreadFactoryBuilder().setNameFormat("readerInitializer-thread-%d").setDaemon(true).build());
        }
    }

    private Map<String, FeatureReader<VariantContext>> createSampleToReaderMap(Map<String, URI> map, int i, int i2) {
        return this.inputPreloadExecutorService != null ? getFeatureReadersInParallel((SortedMap) map, i, i2) : getFeatureReadersSerially(map, i, i2);
    }

    private Void logMessageOnBatchCompletion(BatchCompletionCallbackFunctionArgument batchCompletionCallbackFunctionArgument) {
        this.logger.info("Done importing batch " + batchCompletionCallbackFunctionArgument.batchCount + "/" + batchCompletionCallbackFunctionArgument.totalBatchCount);
        this.logger.debug("List of samples imported in batch " + batchCompletionCallbackFunctionArgument.batchCount + ":");
        int i = 0;
        int numSamples = this.batchSize == 0 ? this.sampleNameMap.getNumSamples() : this.batchSize;
        int i2 = (batchCompletionCallbackFunctionArgument.batchCount - 1) * numSamples;
        int i3 = batchCompletionCallbackFunctionArgument.batchCount * numSamples;
        for (String str : this.sampleNameMap.getSampleNamesInSortedOrder()) {
            i++;
            if (i > i2 && i <= i3) {
                this.logger.debug("\t" + str);
            }
        }
        this.batchCount = batchCompletionCallbackFunctionArgument.batchCount + 1;
        return null;
    }

    private GenomicsDBImportConfiguration.Partition createPartitionWithBeginAndEnd(Coordinates.GenomicsDBColumn genomicsDBColumn, Coordinates.GenomicsDBColumn genomicsDBColumn2) {
        GenomicsDBImportConfiguration.Partition.Builder newBuilder = GenomicsDBImportConfiguration.Partition.newBuilder();
        newBuilder.setBegin(genomicsDBColumn);
        newBuilder.setEnd(genomicsDBColumn2);
        newBuilder.setWorkspace(this.workspace);
        newBuilder.setGenerateArrayNameFromPartitionBounds(true);
        return newBuilder.build();
    }

    private List<GenomicsDBImportConfiguration.Partition> generatePartitionListFromWorkspace() {
        String[] listGenomicsDBArrays = GenomicsDBUtils.listGenomicsDBArrays(this.workspace);
        Arrays.sort(listGenomicsDBArrays);
        ArrayList arrayList = new ArrayList();
        for (String str : listGenomicsDBArrays) {
            long[] arrayColumnBounds = GenomicsDBUtils.getArrayColumnBounds(this.workspace, str);
            Coordinates.GenomicsDBColumn.Builder newBuilder = Coordinates.GenomicsDBColumn.newBuilder();
            Coordinates.GenomicsDBColumn.Builder newBuilder2 = Coordinates.GenomicsDBColumn.newBuilder();
            newBuilder.setTiledbColumn(arrayColumnBounds[0]);
            newBuilder2.setTiledbColumn(arrayColumnBounds[1]);
            arrayList.add(createPartitionWithBeginAndEnd(newBuilder.build(), newBuilder2.build()));
        }
        return arrayList;
    }

    private List<GenomicsDBImportConfiguration.Partition> generatePartitionListFromIntervals() {
        return (List) this.intervals.stream().map(simpleInterval -> {
            Coordinates.ContigPosition.Builder newBuilder = Coordinates.ContigPosition.newBuilder();
            newBuilder.setContig(simpleInterval.getContig());
            Coordinates.GenomicsDBColumn.Builder newBuilder2 = Coordinates.GenomicsDBColumn.newBuilder();
            Coordinates.GenomicsDBColumn.Builder newBuilder3 = Coordinates.GenomicsDBColumn.newBuilder();
            newBuilder.setPosition(simpleInterval.getStart());
            newBuilder2.setContigPosition(newBuilder.build());
            newBuilder.setPosition(simpleInterval.getEnd());
            newBuilder3.setContigPosition(newBuilder.build());
            return createPartitionWithBeginAndEnd(newBuilder2.build(), newBuilder3.build());
        }).collect(Collectors.toList());
    }

    private List<SimpleInterval> generateIntervalListFromVidMap() {
        try {
            GenomicsDBVidMapProto.VidMappingPB protobufVidMappingFromJsonFile = GATKGenomicsDBUtils.getProtobufVidMappingFromJsonFile(this.vidMapJSONFile);
            return (List) Arrays.asList(GenomicsDBUtils.listGenomicsDBArrays(this.workspace)).stream().flatMap(str -> {
                long[] arrayColumnBounds = GenomicsDBUtils.getArrayColumnBounds(this.workspace, str);
                return protobufVidMappingFromJsonFile.getContigsList().stream().filter(chromosome -> {
                    return chromosome.getTiledbColumnOffset() >= arrayColumnBounds[0] && chromosome.getTiledbColumnOffset() <= arrayColumnBounds[1];
                }).map(chromosome2 -> {
                    return new SimpleInterval(chromosome2.getName(), 1, Math.toIntExact(chromosome2.getLength()));
                });
            }).collect(Collectors.toList());
        } catch (IOException e) {
            throw new UserException("Could not get vid map protobuf from file:" + this.vidMapJSONFile + ". Is the workspace corrupted?", e);
        }
    }

    private List<SimpleInterval> generateIntervalListFromWorkspace() {
        return (List) Arrays.asList(GenomicsDBUtils.listGenomicsDBArrays(this.workspace)).stream().map(str -> {
            String[] split = str.split("\\$");
            if (split.length != 3) {
                return null;
            }
            return new SimpleInterval(split[0], Integer.parseInt(split[1]), Integer.parseInt(split[2]));
        }).filter((v0) -> {
            return Objects.nonNull(v0);
        }).collect(Collectors.toList());
    }

    private ImportConfig createImportConfig(int i) {
        List<GenomicsDBImportConfiguration.Partition> generatePartitionListFromWorkspace = (this.intervals == null || this.intervals.isEmpty()) ? generatePartitionListFromWorkspace() : generatePartitionListFromIntervals();
        GenomicsDBImportConfiguration.ImportConfiguration.Builder newBuilder = GenomicsDBImportConfiguration.ImportConfiguration.newBuilder();
        newBuilder.addAllColumnPartitions(generatePartitionListFromWorkspace);
        newBuilder.setSizePerColumnPartition(this.vcfBufferSizePerSample);
        newBuilder.setFailIfUpdating(!this.doIncrementalImport.booleanValue());
        newBuilder.setSegmentSize(this.segmentSize);
        newBuilder.setConsolidateTiledbArrayAfterLoad(this.doConsolidation.booleanValue());
        newBuilder.setEnableSharedPosixfsOptimizations(this.sharedPosixFSOptimizations);
        ImportConfig importConfig = new ImportConfig(newBuilder.build(), this.validateSampleToReaderMap.booleanValue(), true, i, this.mergedHeaderLines, this.sampleNameMap.getSampleNameToVcfPath(), this.bypassFeatureReader ? null : (v1, v2, v3) -> {
            return createSampleToReaderMap(v1, v2, v3);
        }, this.doIncrementalImport.booleanValue());
        importConfig.setOutputCallsetmapJsonFile(this.callsetMapJSONFile);
        importConfig.setOutputVidmapJsonFile(this.vidMapJSONFile);
        importConfig.setOutputVcfHeaderFile(this.vcfHeaderFile);
        importConfig.setUseSamplesInOrder(true);
        importConfig.setFunctionToCallOnBatchCompletion(this::logMessageOnBatchCompletion);
        return importConfig;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void traverse() {
        if (this.getIntervalsFromExistingWorkspace.booleanValue()) {
            if (this.intervals == null || this.intervals.isEmpty()) {
                this.intervals = generateIntervalListFromVidMap();
            }
            writeIntervalListToFile();
            return;
        }
        this.progressMeter.setRecordsBetweenTimeChecks(1L);
        try {
            GenomicsDBImporter genomicsDBImporter = new GenomicsDBImporter(createImportConfig(this.batchSize == 0 ? this.sampleNameMap.getNumSamples() : this.batchSize));
            GATKGenomicsDBUtils.updateImportProtobufVidMapping(genomicsDBImporter);
            if (this.mergeContigsIntoNumPartitions != 0) {
                if (this.doIncrementalImport.booleanValue()) {
                    this.logger.warn("genomicsdb-update-workspace-path was set, so ignoring merge-contigs-into-num-partitions. When updating workspaces, GenomicsDBImport must use the same partition boundaries/intervals as the original import");
                } else {
                    assertIntervalsCoverEntireContigs(genomicsDBImporter, this.intervals);
                    genomicsDBImporter.coalesceContigsIntoNumPartitions(this.mergeContigsIntoNumPartitions);
                }
            }
            genomicsDBImporter.executeImport(this.maxNumIntervalsToImportInParallel);
        } catch (IOException e) {
            throw new UserException("Error initializing GenomicsDBImporter", e);
        } catch (IllegalArgumentException e2) {
            throw new GATKException("Null feature reader found in sampleNameMap file: " + this.sampleNameMapFile, e2);
        } catch (CompletionException e3) {
            if (!(e3.getCause() instanceof RuntimeException)) {
                throw e3;
            }
        }
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public Object onTraversalSuccess() {
        if (this.getIntervalsFromExistingWorkspace.booleanValue()) {
            this.logger.info("Interval list generated!");
        } else if (this.batchSize == 0) {
            this.logger.info("Import completed!");
        } else {
            this.logger.info("Import of all batches to GenomicsDB completed!");
        }
        return true;
    }

    private SortedMap<String, FeatureReader<VariantContext>> getFeatureReadersInParallel(SortedMap<String, URI> sortedMap, int i, int i2) {
        TreeMap treeMap = new TreeMap();
        this.logger.info("Starting batch input file preload");
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        ArrayList arrayList = new ArrayList(sortedMap.keySet());
        for (int i3 = i2; i3 < sortedMap.size() && i3 < i2 + i; i3++) {
            String str = (String) arrayList.get(i3);
            linkedHashMap.put(str, this.inputPreloadExecutorService.submit(() -> {
                Path path = IOUtils.getPath(((URI) sortedMap.get(str)).toString());
                try {
                    return new InitializedQueryWrapper(getReaderFromPath(path, this.sampleNameMap.getVCFIndexForSampleAsPath(str)), this.intervals.get(0));
                } catch (IOException e) {
                    throw new UserException.CouldNotReadInputFile("Couldn't read file: " + path.toUri(), e);
                }
            }));
        }
        linkedHashMap.forEach((str2, future) -> {
            try {
                treeMap.put(str2, (FeatureReader) future.get());
            } catch (InterruptedException | ExecutionException e) {
                throw new UserException.CouldNotReadInputFile("Failure while waiting for FeatureReader to initialize ", e);
            }
        });
        this.logger.info("Finished batch preload");
        this.logger.info("Importing batch " + this.batchCount + " with " + treeMap.size() + " samples");
        return treeMap;
    }

    private SortedMap<String, FeatureReader<VariantContext>> getFeatureReadersSerially(Map<String, URI> map, int i, int i2) {
        TreeMap treeMap = new TreeMap();
        ArrayList arrayList = new ArrayList(map.keySet());
        for (int i3 = i2; i3 < map.size() && i3 < i2 + i; i3++) {
            String str = (String) arrayList.get(i3);
            treeMap.put(str, getReaderFromPath(IOUtils.getPath(map.get(str).toString()), this.sampleNameMap.getVCFIndexForSampleAsPath(str)));
        }
        this.logger.info("Importing batch " + this.batchCount + " with " + treeMap.size() + " samples");
        return treeMap;
    }

    private FeatureReader<VariantContext> getReaderFromPath(final Path path, Path path2) {
        String uri = path.toAbsolutePath().toUri().toString();
        try {
            final AbstractFeatureReader featureReader = AbstractFeatureReader.getFeatureReader(uri, path2 == null ? null : path2.toAbsolutePath().toUri().toString(), new VCFCodec(), true, BucketUtils.getPrefetchingWrapper(this.cloudPrefetchBuffer), BucketUtils.getPrefetchingWrapper(this.cloudIndexPrefetchBuffer));
            return new FeatureReader<VariantContext>() { // from class: org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBImport.1

                /* renamed from: org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBImport$1$NoMnpIterator */
                /* loaded from: input_file:org/broadinstitute/hellbender/tools/genomicsdb/GenomicsDBImport$1$NoMnpIterator.class */
                class NoMnpIterator implements CloseableTribbleIterator<VariantContext> {
                    private final CloseableTribbleIterator<VariantContext> inner;

                    NoMnpIterator(CloseableTribbleIterator<VariantContext> closeableTribbleIterator) {
                        this.inner = closeableTribbleIterator;
                    }

                    public void close() {
                        this.inner.close();
                    }

                    /* JADX WARN: Multi-variable type inference failed */
                    public Iterator<VariantContext> iterator() {
                        return this;
                    }

                    public boolean hasNext() {
                        return this.inner.hasNext();
                    }

                    /* renamed from: next, reason: merged with bridge method [inline-methods] */
                    public VariantContext m189next() {
                        if (!hasNext()) {
                            throw new NoSuchElementException();
                        }
                        VariantContext variantContext = (VariantContext) this.inner.next();
                        if (GATKVariantContextUtils.isUnmixedMnpIgnoringNonRef(variantContext)) {
                            throw new UserException.BadInput(String.format("GenomicsDBImport does not support GVCFs with MNPs. MNP found at %1s:%2d in VCF %3s", variantContext.getContig(), Integer.valueOf(variantContext.getStart()), path.toAbsolutePath()));
                        }
                        return variantContext;
                    }
                }

                public void close() throws IOException {
                    featureReader.close();
                }

                public List<String> getSequenceNames() {
                    return featureReader.getSequenceNames();
                }

                public Object getHeader() {
                    return featureReader.getHeader();
                }

                public boolean isQueryable() {
                    return featureReader.isQueryable();
                }

                public CloseableTribbleIterator<VariantContext> query(Locatable locatable) throws IOException {
                    return new NoMnpIterator(featureReader.query(locatable));
                }

                public CloseableTribbleIterator<VariantContext> query(String str, int i, int i2) throws IOException {
                    return new NoMnpIterator(featureReader.query(str, i, i2));
                }

                public CloseableTribbleIterator<VariantContext> iterator() throws IOException {
                    return new NoMnpIterator(featureReader.iterator());
                }
            };
        } catch (TribbleException e) {
            throw new UserException("Failed to create reader from " + uri + " because of the following error:\n\t" + e.getMessage(), e);
        }
    }

    private String overwriteCreateOrCheckWorkspace() {
        String genomicsDBGetAbsolutePath = GATKGenomicsDBUtils.genomicsDBGetAbsolutePath(this.workspace);
        int createTileDBWorkspace = GenomicsDBUtils.createTileDBWorkspace(genomicsDBGetAbsolutePath, this.overwriteExistingWorkspace.booleanValue());
        if (createTileDBWorkspace == -1) {
            throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + this.workspace + " already exists and is not a directory");
        }
        if (createTileDBWorkspace < 0) {
            throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + this.workspace);
        }
        if (this.overwriteExistingWorkspace.booleanValue() || createTileDBWorkspace != 1) {
            if (this.doIncrementalImport.booleanValue()) {
                throw new UserException("We require an existing valid workspace when incremental import is set");
            }
            return genomicsDBGetAbsolutePath;
        }
        if (this.doIncrementalImport.booleanValue()) {
            return genomicsDBGetAbsolutePath;
        }
        throw new UnableToCreateGenomicsDBWorkspace("Error creating GenomicsDB workspace: " + this.workspace + " already exists");
    }

    void initializeIntervals() {
        if (!this.intervalArgumentCollection.intervalsSpecified()) {
            if (!this.getIntervalsFromExistingWorkspace.booleanValue() && !this.doIncrementalImport.booleanValue()) {
                throw new UserException("No intervals specified");
            }
            this.intervals = generateIntervalListFromWorkspace();
            return;
        }
        if (this.getIntervalsFromExistingWorkspace.booleanValue() || this.doIncrementalImport.booleanValue()) {
            this.logger.warn("genomicsdb-update-workspace-path was set, so ignoring specified intervals.The tool will use the intervals specified by the initial import");
            this.intervals = generateIntervalListFromWorkspace();
            return;
        }
        SAMSequenceDictionary bestAvailableSequenceDictionary = getBestAvailableSequenceDictionary();
        if (bestAvailableSequenceDictionary == null) {
            throw new UserException("We require at least one input source that has a sequence dictionary (reference or reads) when intervals are specified");
        }
        this.intervals = new ArrayList();
        List<SimpleInterval> intervals = this.intervalArgumentCollection.getIntervals(bestAvailableSequenceDictionary);
        if (!this.mergeInputIntervals && intervals.size() > 100) {
            this.logger.warn(String.format("A large number of intervals were specified. Using more than %d intervals in a single import is not recommended and can cause performance to suffer. If GVCF data only exists within those intervals, performance can be improved by aggregating intervals with the merge-input-intervals argument.", 100));
        }
        this.intervals = this.mergeInputIntervals ? IntervalUtils.getSpanningIntervals(intervals, getBestAvailableSequenceDictionary()) : intervals;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool, org.broadinstitute.hellbender.cmdline.CommandLineProgram
    public void onShutdown() {
        if (this.inputPreloadExecutorService != null) {
            this.inputPreloadExecutorService.shutdownNow();
        }
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public SAMSequenceDictionary getBestAvailableSequenceDictionary() {
        SAMSequenceDictionary sAMSequenceDictionary = this.mergedHeaderSequenceDictionary;
        return sAMSequenceDictionary == null ? super.getBestAvailableSequenceDictionary() : sAMSequenceDictionary;
    }
}
