package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import java.io.File;
import java.util.List;
import java.util.Random;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Triple;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.random.RandomGeneratorFactory;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
import picard.cmdline.programgroups.VariantFilteringProgramGroup;

@CommandLineProgramProperties(summary = "Extracts site-level variant annotations, labels, and other metadata from a VCF file to HDF5 files.", oneLineSummary = "Extracts site-level variant annotations, labels, and other metadata from a VCF file to HDF5 files", programGroup = VariantFilteringProgramGroup.class)
@DocumentedFeature
@BetaFeature
/* loaded from: input_file:org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/ExtractVariantAnnotations.class */
public final class ExtractVariantAnnotations extends LabeledVariantAnnotationsWalker {
    public static final String MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS_LONG_NAME = "maximum-number-of-unlabeled-variants";
    public static final String RESERVOIR_SAMPLING_RANDOM_SEED_LONG_NAME = "reservoir-sampling-random-seed";
    public static final String UNLABELED_TAG = ".unlabeled";
    private RandomGenerator rng;
    private LabeledVariantAnnotationsData unlabeledDataReservoir;

    @Argument(fullName = MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS_LONG_NAME, doc = "Maximum number of unlabeled variants to extract. If greater than zero, reservoir sampling will be used to randomly sample this number of sites from input sites that are not present in the specified resources. Choice of this number should be guided by considerations for training the model in TrainVariantAnnotationsModel; users may wish to choose a number that is comparable to the expected size of the labeled training set or that is compatible with available memory resources. Note that in allele-specific mode, this argument limits the number of variant records, rather than the number of alleles.", minValue = 0.0d)
    private int maximumNumberOfUnlabeledVariants = 0;

    @Argument(fullName = RESERVOIR_SAMPLING_RANDOM_SEED_LONG_NAME, doc = "Random seed to use for reservoir sampling of unlabeled variants.")
    private int reservoirSamplingRandomSeed = 0;
    private int unlabeledIndex = 0;

    @Override // org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.LabeledVariantAnnotationsWalker
    public void afterOnTraversalStart() {
        if (!this.resourceLabels.contains(LabeledVariantAnnotationsData.TRAINING_LABEL)) {
            this.logger.warn("No training set found! If you are using the downstream TrainVariantAnnotationsModel and ScoreVariantAnnotations tools, provide sets of known polymorphic loci marked with the training=true feature input tag. For example, --resource:hapmap,training=true hapmap.vcf");
        }
        if (!this.resourceLabels.contains(LabeledVariantAnnotationsData.CALIBRATION_LABEL)) {
            this.logger.warn("No calibration set found! If you are using the downstream TrainVariantAnnotationsModel and ScoreVariantAnnotations tools and wish to convert scores to sensitivity with respect to a calibration set of variants, provide sets of known polymorphic loci marked with the calibration=true feature input tag. For example, --resource:hapmap,calibration=true hapmap.vcf");
        }
        this.rng = RandomGeneratorFactory.createRandomGenerator(new Random(this.reservoirSamplingRandomSeed));
        this.unlabeledDataReservoir = this.maximumNumberOfUnlabeledVariants == 0 ? null : new LabeledVariantAnnotationsData(this.annotationNames, this.resourceLabels, this.useASAnnotations, this.maximumNumberOfUnlabeledVariants);
    }

    @Override // org.broadinstitute.hellbender.engine.MultiplePassVariantWalker
    protected void nthPassApply(VariantContext variantContext, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext, int i) {
        if (i == 0) {
            List<Triple<List<Allele>, VariantType, TreeSet<String>>> extractVariantMetadata = extractVariantMetadata(variantContext, featureContext, this.unlabeledDataReservoir != null);
            if (!extractVariantMetadata.isEmpty()) {
                List<Triple<List<Allele>, VariantType, TreeSet<String>>> list = (List) extractVariantMetadata.stream().filter(triple -> {
                    return !((TreeSet) triple.getRight()).isEmpty();
                }).collect(Collectors.toList());
                if (!list.isEmpty()) {
                    addExtractedVariantToData(this.data, variantContext, list);
                    writeExtractedVariantToVCF(variantContext, list);
                }
                if (this.unlabeledDataReservoir != null) {
                    List list2 = (List) extractVariantMetadata.stream().filter(triple2 -> {
                        return ((TreeSet) triple2.getRight()).isEmpty();
                    }).collect(Collectors.toList());
                    if (list2.isEmpty()) {
                        return;
                    }
                    if (this.unlabeledIndex < this.maximumNumberOfUnlabeledVariants) {
                        addExtractedVariantToData(this.unlabeledDataReservoir, variantContext, list2);
                    } else {
                        int nextInt = this.rng.nextInt(this.unlabeledIndex);
                        if (nextInt < this.maximumNumberOfUnlabeledVariants) {
                            setExtractedVariantInData(this.unlabeledDataReservoir, variantContext, list2, nextInt);
                        }
                    }
                    this.unlabeledIndex++;
                }
            }
        }
    }

    @Override // org.broadinstitute.hellbender.engine.MultiplePassVariantWalker
    protected void afterNthPass(int i) {
        if (i == 0) {
            writeAnnotationsToHDF5();
            this.data.clear();
            if (this.unlabeledDataReservoir != null) {
                writeUnlabeledAnnotationsToHDF5();
                this.unlabeledDataReservoir.clear();
            }
            if (this.vcfWriter != null) {
                this.vcfWriter.close();
            }
        }
    }

    @Override // org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.LabeledVariantAnnotationsWalker, org.broadinstitute.hellbender.engine.GATKTool
    public Object onTraversalSuccess() {
        this.logger.info(String.format("%s complete.", getClass().getSimpleName()));
        return null;
    }

    private static void setExtractedVariantInData(LabeledVariantAnnotationsData labeledVariantAnnotationsData, VariantContext variantContext, List<Triple<List<Allele>, VariantType, TreeSet<String>>> list, int i) {
        labeledVariantAnnotationsData.set(i, variantContext, (List) list.stream().map((v0) -> {
            return v0.getLeft();
        }).collect(Collectors.toList()), (List) list.stream().map((v0) -> {
            return v0.getMiddle();
        }).collect(Collectors.toList()), (List) list.stream().map((v0) -> {
            return v0.getRight();
        }).collect(Collectors.toList()));
    }

    private void writeUnlabeledAnnotationsToHDF5() {
        File file = new File(this.outputPrefix + ".unlabeled.annot.hdf5");
        if (this.unlabeledDataReservoir.size() == 0) {
            throw new GATKException(String.format("No unlabeled variants were present in the input VCF. Consider setting the %s argument to 0.", MAXIMUM_NUMBER_OF_UNLABELED_VARIANTS_LONG_NAME));
        }
        for (VariantType variantType : this.variantTypesToExtract) {
            this.logger.info(String.format("Extracted unlabeled annotations for %d variants of type %s.", Integer.valueOf(this.unlabeledDataReservoir.getVariantTypeFlat().stream().mapToInt(variantType2 -> {
                return variantType2 == variantType ? 1 : 0;
            }).sum()), variantType));
        }
        this.logger.info(String.format("Extracted unlabeled annotations for %s total records.", Integer.valueOf(this.unlabeledDataReservoir.size())));
        this.logger.info(String.format("Extracted unlabeled annotations for %s total variants.", Integer.valueOf(this.unlabeledDataReservoir.flatSize())));
        this.logger.info("Writing unlabeled annotations...");
        this.unlabeledDataReservoir.writeHDF5(file, this.omitAllelesInHDF5);
        this.logger.info(String.format("Unlabeled annotations and metadata written to %s.", file.getAbsolutePath()));
    }
}
