package org.broadinstitute.hellbender.tools.walkers.vqsr.scalable;

import com.google.common.collect.Sets;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.collections4.ListUtils;
import org.apache.commons.lang3.tuple.Triple;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.FeatureInput;
import org.broadinstitute.hellbender.engine.MultiplePassVariantWalker;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils;
import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.LabeledVariantAnnotationsData;
import org.broadinstitute.hellbender.tools.walkers.vqsr.scalable.data.VariantType;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.hellbender.utils.variant.VcfUtils;
import org.broadinstitute.hellbender.utils.variant.writers.ShardingVCFWriter;

/* loaded from: input_file:org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker.class */
public abstract class LabeledVariantAnnotationsWalker extends MultiplePassVariantWalker {
    public static final String MODE_LONG_NAME = "mode";
    public static final String IGNORE_FILTER_LONG_NAME = "ignore-filter";
    public static final String IGNORE_ALL_FILTERS_LONG_NAME = "ignore-all-filters";
    public static final String DO_NOT_TRUST_ALL_POLYMORPHIC_LONG_NAME = "do-not-trust-all-polymorphic";
    public static final String RESOURCE_MATCHING_STRATEGY_LONG_NAME = "resource-matching-strategy";
    public static final String OMIT_ALLELES_IN_HDF5_LONG_NAME = "omit-alleles-in-hdf5";
    public static final String DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME = "do-not-gzip-vcf-output";
    public static final String ANNOTATIONS_HDF5_SUFFIX = ".annot.hdf5";
    public static final String RESOURCE_LABEL_INFO_HEADER_LINE_FORMAT_STRING = "This site was labeled as %s according to resources";

    @Argument(fullName = "output", shortName = "O", doc = "Prefix for output filenames.")
    String outputPrefix;
    Set<VariantType> variantTypesToExtract;
    boolean useASAnnotations;
    File outputAnnotationsFile;
    VariantContextWriter vcfWriter;
    LabeledVariantAnnotationsData data;

    @Argument(fullName = StandardArgumentDefinitions.RESOURCE_LONG_NAME, doc = "Resource VCFs used to label extracted variants.", optional = true)
    private List<FeatureInput<VariantContext>> resources = new ArrayList(10);

    @Argument(fullName = StandardArgumentDefinitions.ANNOTATION_LONG_NAME, shortName = StandardArgumentDefinitions.ANNOTATION_SHORT_NAME, doc = "Names of the annotations to extract. Note that a requested annotation may in fact not be present at any extraction site; NaN missing values will be generated for such annotations.", minElements = 1)
    List<String> annotationNames = new ArrayList();

    @Argument(fullName = "mode", doc = "Variant types to extract.", minElements = 1)
    private List<VariantType> variantTypesToExtractList = new ArrayList(Arrays.asList(VariantType.SNP, VariantType.INDEL));

    @Argument(fullName = IGNORE_FILTER_LONG_NAME, doc = "Ignore the specified filter(s) in the input VCF.", optional = true)
    private List<String> ignoreInputFilters = new ArrayList();

    @Argument(fullName = IGNORE_ALL_FILTERS_LONG_NAME, doc = "If true, ignore all filters in the input VCF.", optional = true)
    private boolean ignoreAllFilters = false;

    @Argument(fullName = DO_NOT_TRUST_ALL_POLYMORPHIC_LONG_NAME, doc = "If true, do not trust that unfiltered records in the resources contain only polymorphic sites. This may increase runtime if the resources are not sites-only VCFs.", optional = true)
    private boolean doNotTrustAllPolymorphic = false;

    @Argument(fullName = RESOURCE_MATCHING_STRATEGY_LONG_NAME, doc = "The strategy to use for determining whether an input variant is present in a resource in non-allele-specific mode. START_POSITION: Start positions of input and resource variants must match. START_POSITION_AND_GIVEN_REPRESENTATION: The intersection of the sets of input and resource alleles (in their given representations) must also be non-empty. START_POSITION_AND_MINIMAL_REPRESENTATION: The intersection of the sets of input and resource alleles (after converting alleles to their minimal representations) must also be non-empty. This argument has no effect in allele-specific mode, in which the minimal representations of the input and resource alleles must match.", optional = true)
    private ResourceMatchingStrategy resourceMatchingStrategy = ResourceMatchingStrategy.START_POSITION;

    @Argument(fullName = OMIT_ALLELES_IN_HDF5_LONG_NAME, doc = "If true, omit alleles in output HDF5 files in order to decrease file sizes.", optional = true)
    boolean omitAllelesInHDF5 = false;

    @Argument(fullName = DO_NOT_GZIP_VCF_OUTPUT_LONG_NAME, doc = "If true, VCF output will not be compressed.", optional = true)
    boolean doNotGZIPVCFOutput = false;
    private final Set<String> ignoreInputFilterSet = new TreeSet();
    TreeSet<String> resourceLabels = new TreeSet<>();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/broadinstitute/hellbender/tools/walkers/vqsr/scalable/LabeledVariantAnnotationsWalker$ResourceMatchingStrategy.class */
    public enum ResourceMatchingStrategy {
        START_POSITION,
        START_POSITION_AND_GIVEN_REPRESENTATION,
        START_POSITION_AND_MINIMAL_REPRESENTATION
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void onTraversalStart() {
        this.ignoreInputFilterSet.addAll(this.ignoreInputFilters);
        this.variantTypesToExtract = EnumSet.copyOf((Collection) this.variantTypesToExtractList);
        this.outputAnnotationsFile = new File(this.outputPrefix + ".annot.hdf5");
        File file = new File(this.outputPrefix + (this.doNotGZIPVCFOutput ? ".vcf" : ShardingVCFWriter.SHARD_INDEX_SUFFIX));
        CopyNumberArgumentValidationUtils.validateOutputFiles(this.outputAnnotationsFile, file);
        for (FeatureInput<VariantContext> featureInput : this.resources) {
            TreeSet treeSet = (TreeSet) featureInput.getTagAttributes().entrySet().stream().filter(entry -> {
                return ((String) entry.getValue()).equals("true");
            }).map((v0) -> {
                return v0.getKey();
            }).sorted().collect(Collectors.toCollection(TreeSet::new));
            this.resourceLabels.addAll(treeSet);
            this.logger.info(String.format("Found %s track: labels = %s", featureInput.getName(), treeSet));
        }
        this.resourceLabels.forEach((v0) -> {
            v0.intern();
        });
        if (this.resourceLabels.contains("snp")) {
            throw new UserException.BadInput(String.format("The resource label \"%s\" is reserved for labeling variant types.", "snp"));
        }
        this.useASAnnotations = isAlleleSpecificAnnotationRequested();
        if (this.useASAnnotations && this.resourceMatchingStrategy != ResourceMatchingStrategy.START_POSITION_AND_MINIMAL_REPRESENTATION) {
            this.logger.warn(String.format("The %s argument is ignored when allele-specific annotations are requested. The START_POSITION_AND_MINIMAL_REPRESENTATION strategy will be used.", RESOURCE_MATCHING_STRATEGY_LONG_NAME));
            this.resourceMatchingStrategy = ResourceMatchingStrategy.START_POSITION_AND_MINIMAL_REPRESENTATION;
        }
        this.data = new LabeledVariantAnnotationsData(this.annotationNames, this.resourceLabels, this.useASAnnotations);
        this.logger.info(String.format("Using %d annotations %s...", Integer.valueOf(this.data.getSortedAnnotationNames().size()), this.data.getSortedAnnotationNames()));
        this.vcfWriter = createVCFWriter(file);
        this.vcfWriter.writeHeader(constructVCFHeader(this.data.getSortedLabels()));
        afterOnTraversalStart();
    }

    public void afterOnTraversalStart() {
    }

    @Override // org.broadinstitute.hellbender.engine.MultiplePassVariantWalker
    protected int numberOfPasses() {
        return 1;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public Object onTraversalSuccess() {
        return null;
    }

    private boolean isAlleleSpecificAnnotationRequested() {
        LinkedHashSet linkedHashSet = new LinkedHashSet(this.annotationNames);
        VCFHeader headerForVariants = getHeaderForVariants();
        return linkedHashSet.stream().anyMatch(str -> {
            return headerForVariants.getInfoHeaderLine(str).getCountType() == VCFHeaderLineCount.A;
        });
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static void addExtractedVariantToData(LabeledVariantAnnotationsData labeledVariantAnnotationsData, VariantContext variantContext, List<Triple<List<Allele>, VariantType, TreeSet<String>>> list) {
        labeledVariantAnnotationsData.add(variantContext, (List) list.stream().map((v0) -> {
            return v0.getLeft();
        }).collect(Collectors.toList()), (List) list.stream().map((v0) -> {
            return v0.getMiddle();
        }).collect(Collectors.toList()), (List) list.stream().map((v0) -> {
            return v0.getRight();
        }).collect(Collectors.toList()));
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void writeExtractedVariantToVCF(VariantContext variantContext, List<Triple<List<Allele>, VariantType, TreeSet<String>>> list) {
        writeExtractedVariantToVCF(variantContext, (List) list.stream().map((v0) -> {
            return v0.getLeft();
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toList()), (Set) list.stream().map((v0) -> {
            return v0.getRight();
        }).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toSet()));
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public void writeAnnotationsToHDF5() {
        if (this.data.size() == 0) {
            this.logger.warn("Found no input variants for extraction. This may be because the specified genomic region contains no input variants of the requested type(s) or, if extracting training labels, because none of the input variants were contained in the resource VCFs or no resource VCFs were provided. The annotations HDF5 file will not be generated.");
            return;
        }
        for (VariantType variantType : this.variantTypesToExtract) {
            this.logger.info(String.format("Extracted annotations for %d variants of type %s.", Integer.valueOf(this.data.getVariantTypeFlat().stream().mapToInt(variantType2 -> {
                return variantType2 == variantType ? 1 : 0;
            }).sum()), variantType));
        }
        for (String str : this.data.getSortedLabels()) {
            this.logger.info(String.format("Extracted annotations for %d variants labeled as %s.", Integer.valueOf(this.data.isLabelFlat(str).stream().mapToInt(bool -> {
                return bool.booleanValue() ? 1 : 0;
            }).sum()), str));
        }
        this.logger.info(String.format("Extracted annotations for %s total records.", Integer.valueOf(this.data.size())));
        this.logger.info(String.format("Extracted annotations for %s total variants.", Integer.valueOf(this.data.flatSize())));
        this.logger.info("Writing annotations...");
        this.data.writeHDF5(this.outputAnnotationsFile, this.omitAllelesInHDF5);
        this.logger.info(String.format("Annotations and metadata written to %s.", this.outputAnnotationsFile.getAbsolutePath()));
    }

    void writeExtractedVariantToVCF(VariantContext variantContext, List<Allele> list, Set<String> set) {
        VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variantContext.getSource(), variantContext.getContig(), variantContext.getStart(), variantContext.getEnd(), ListUtils.union(Collections.singletonList(variantContext.getReference()), list));
        set.forEach(str -> {
            variantContextBuilder.attribute(str, true);
        });
        this.vcfWriter.add(variantContextBuilder.make());
    }

    VCFHeader constructVCFHeader(List<String> list) {
        Set<VCFHeaderLine> set = (Set) list.stream().map(str -> {
            return new VCFInfoHeaderLine(str, 1, VCFHeaderLineType.Flag, String.format(RESOURCE_LABEL_INFO_HEADER_LINE_FORMAT_STRING, str));
        }).collect(Collectors.toCollection(TreeSet::new));
        set.add(GATKVCFHeaderLines.getFilterLine("PASS"));
        SAMSequenceDictionary bestAvailableSequenceDictionary = getBestAvailableSequenceDictionary();
        if (bestAvailableSequenceDictionary != null) {
            set = VcfUtils.updateHeaderContigLines(set, this.referenceArguments.getReferencePath(), bestAvailableSequenceDictionary, true);
        }
        set.addAll(getDefaultToolVCFHeaderLines());
        return new VCFHeader(set);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public final List<Triple<List<Allele>, VariantType, TreeSet<String>>> extractVariantMetadata(VariantContext variantContext, FeatureContext featureContext, boolean z) {
        if (variantContext == null || !(this.ignoreAllFilters || variantContext.isNotFiltered() || this.ignoreInputFilterSet.containsAll(variantContext.getFilters()))) {
            return Collections.emptyList();
        }
        if (this.useASAnnotations) {
            return (List) variantContext.getAlternateAlleles().stream().filter(allele -> {
                return !GATKVCFConstants.isSpanningDeletion(allele);
            }).filter(allele2 -> {
                return this.variantTypesToExtract.contains(VariantType.getAlleleSpecificVariantType(variantContext, allele2));
            }).map(allele3 -> {
                return Triple.of(Collections.singletonList(allele3), VariantType.getAlleleSpecificVariantType(variantContext, allele3), findMatchingResourceLabels(variantContext, allele3, featureContext));
            }).filter(triple -> {
                return z || !((TreeSet) triple.getRight()).isEmpty();
            }).collect(Collectors.toList());
        }
        VariantType variantType = VariantType.getVariantType(variantContext);
        if (this.variantTypesToExtract.contains(variantType)) {
            TreeSet<String> findMatchingResourceLabels = findMatchingResourceLabels(variantContext, null, featureContext);
            if (z || !findMatchingResourceLabels.isEmpty()) {
                return Collections.singletonList(Triple.of(variantContext.getAlternateAlleles(), variantType, findMatchingResourceLabels));
            }
        }
        return Collections.emptyList();
    }

    private TreeSet<String> findMatchingResourceLabels(VariantContext variantContext, Allele allele, FeatureContext featureContext) {
        TreeSet<String> treeSet = new TreeSet<>();
        for (FeatureInput<VariantContext> featureInput : this.resources) {
            Iterator it = featureContext.getValues(featureInput, featureContext.getInterval().getStart()).iterator();
            while (it.hasNext()) {
                if (isMatchingVariant(variantContext, (VariantContext) it.next(), allele, !this.doNotTrustAllPolymorphic, this.resourceMatchingStrategy)) {
                    Stream<R> map = featureInput.getTagAttributes().entrySet().stream().filter(entry -> {
                        return ((String) entry.getValue()).equals("true");
                    }).map((v0) -> {
                        return v0.getKey();
                    });
                    Objects.requireNonNull(treeSet);
                    map.forEach((v1) -> {
                        r1.add(v1);
                    });
                }
            }
        }
        return treeSet;
    }

    private static boolean isMatchingVariant(VariantContext variantContext, VariantContext variantContext2, Allele allele, boolean z, ResourceMatchingStrategy resourceMatchingStrategy) {
        if (variantContext2 == null || !variantContext2.isNotFiltered() || !variantContext2.isVariant() || !VariantType.checkVariantType(variantContext, variantContext2)) {
            return false;
        }
        if (!z && variantContext2.hasGenotypes() && !variantContext2.isPolymorphicInSamples()) {
            return false;
        }
        switch (resourceMatchingStrategy) {
            case START_POSITION:
                return true;
            case START_POSITION_AND_GIVEN_REPRESENTATION:
                return !Sets.intersection(Sets.newHashSet(variantContext.getAlternateAlleles()), Sets.newHashSet(variantContext2.getAlternateAlleles())).isEmpty();
            case START_POSITION_AND_MINIMAL_REPRESENTATION:
                try {
                    return allele == null ? variantContext.getAlternateAlleles().stream().anyMatch(allele2 -> {
                        return GATKVariantContextUtils.isAlleleInList(variantContext.getReference(), allele2, variantContext2.getReference(), variantContext2.getAlternateAlleles());
                    }) : GATKVariantContextUtils.isAlleleInList(variantContext.getReference(), allele, variantContext2.getReference(), variantContext2.getAlternateAlleles());
                } catch (IllegalStateException e) {
                    throw new IllegalStateException("Reference allele mismatch at position " + variantContext2.getContig() + ":" + variantContext2.getStart() + ": ", e);
                }
            default:
                throw new GATKException.ShouldNeverReachHereException("Unknown ResourceMatchingStrategy.");
        }
    }
}
