package org.broadinstitute.hellbender.tools.walkers.sv;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.tribble.annotation.Strand;
import htsjdk.tribble.bed.FullBEDFeature;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import java.io.File;
import java.util.Iterator;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.programgroups.StructuralVariantDiscoveryProgramGroup;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.FeatureDataSource;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.VariantWalker;
import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.walkers.sv.SVAnnotateEngine;
import org.broadinstitute.hellbender.utils.SVInterval;
import org.broadinstitute.hellbender.utils.SVIntervalTree;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfGeneFeature;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfTranscriptFeature;

@CommandLineProgramProperties(summary = "Adds predicted functional consequence, gene overlap, and noncoding element overlap annotations to SV VCF from GATK-SV pipeline. Input files are an SV VCF, a GTF file containing primary or canonical transcripts, and a BED file containing noncoding elements. Output file is an annotated SV VCF.", oneLineSummary = "Adds gene overlap and variant consequence annotations to SV VCF from GATK-SV pipeline", programGroup = StructuralVariantDiscoveryProgramGroup.class)
@DocumentedFeature
/* loaded from: input_file:org/broadinstitute/hellbender/tools/walkers/sv/SVAnnotate.class */
public final class SVAnnotate extends VariantWalker {
    public static final String PROTEIN_CODING_GTF_NAME = "protein-coding-gtf";
    public static final String PROMOTER_WINDOW_NAME = "promoter-window-length";
    public static final String NON_CODING_BED_NAME = "non-coding-bed";
    public static final String MAX_BND_LEN_NAME = "max-breakend-as-cnv-length";

    @Argument(fullName = PROTEIN_CODING_GTF_NAME, doc = "Protein-coding GTF file containing primary or canonical transcripts (1-2 transcripts per gene only)", optional = true)
    private File proteinCodingGTFFile;

    @Argument(fullName = NON_CODING_BED_NAME, doc = "BED file (with header) containing non-coding features. Columns: chrom, start, end, name, score (.), strand", optional = true)
    private File nonCodingBedFile;
    private SVIntervalTree<String> nonCodingIntervalTree;
    private SVAnnotateEngine.GTFIntervalTreesContainer gtfIntervalTrees;
    private SAMSequenceDictionary sequenceDictionary;
    private SVAnnotateEngine svAnnotateEngine;

    @Argument(fullName = "output", shortName = "O", doc = "Output file (if not provided, defaults to STDOUT)", common = false, optional = true)
    private GATKPath outputFile = null;

    @Argument(fullName = PROMOTER_WINDOW_NAME, doc = "Promoter window (bp) upstream of TSS. Promoters will be inferred as the {window} bases upstream of the TSS. Default: 1000", minValue = 0.0d, optional = true)
    private int promoterWindow = 1000;

    @Argument(fullName = MAX_BND_LEN_NAME, doc = "Length in bp. Provide to annotate BNDs smaller than this size as deletions or duplications if applicable. Recommended value: < 2000000", minValue = 0.0d, optional = true)
    private int maxBreakendLen = -1;
    private VariantContextWriter vcfWriter = null;

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void onTraversalStart() {
        VCFHeader headerForVariants = getHeaderForVariants();
        this.sequenceDictionary = headerForVariants.getSequenceDictionary();
        if (this.proteinCodingGTFFile != null) {
            this.gtfIntervalTrees = buildIntervalTreesFromGTF(new FeatureDataSource(this.proteinCodingGTFFile), this.sequenceDictionary, this.promoterWindow);
        }
        if (this.nonCodingBedFile != null) {
            this.nonCodingIntervalTree = buildIntervalTreeFromBED(new FeatureDataSource(this.nonCodingBedFile), this.sequenceDictionary);
        }
        this.vcfWriter = createVCFWriter(this.outputFile);
        updateAndWriteHeader(headerForVariants);
        this.svAnnotateEngine = new SVAnnotateEngine(this.gtfIntervalTrees, this.nonCodingIntervalTree, this.sequenceDictionary, this.maxBreakendLen);
    }

    @VisibleForTesting
    protected static boolean isNegativeStrand(GencodeGtfTranscriptFeature gencodeGtfTranscriptFeature) {
        return gencodeGtfTranscriptFeature.getGenomicStrand().equals(Strand.decode("-"));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @VisibleForTesting
    public static int getTranscriptionStartSite(GencodeGtfTranscriptFeature gencodeGtfTranscriptFeature) {
        return isNegativeStrand(gencodeGtfTranscriptFeature) ? gencodeGtfTranscriptFeature.getEnd() : gencodeGtfTranscriptFeature.getStart();
    }

    @VisibleForTesting
    protected static SimpleInterval getPromoterInterval(GencodeGtfTranscriptFeature gencodeGtfTranscriptFeature, int i) {
        int transcriptionStartSite = getTranscriptionStartSite(gencodeGtfTranscriptFeature);
        boolean isNegativeStrand = isNegativeStrand(gencodeGtfTranscriptFeature);
        return new SimpleInterval(gencodeGtfTranscriptFeature.getContig(), isNegativeStrand ? transcriptionStartSite + 1 : Math.max(transcriptionStartSite - i, 1), isNegativeStrand ? transcriptionStartSite + i : transcriptionStartSite - 1);
    }

    @VisibleForTesting
    protected static SVAnnotateEngine.GTFIntervalTreesContainer buildIntervalTreesFromGTF(FeatureDataSource<GencodeGtfGeneFeature> featureDataSource, SAMSequenceDictionary sAMSequenceDictionary, int i) {
        SVIntervalTree sVIntervalTree = new SVIntervalTree();
        SVIntervalTree sVIntervalTree2 = new SVIntervalTree();
        SVIntervalTree sVIntervalTree3 = new SVIntervalTree();
        Iterator<GencodeGtfGeneFeature> it = featureDataSource.iterator();
        while (it.hasNext()) {
            for (GencodeGtfTranscriptFeature gencodeGtfTranscriptFeature : it.next().getTranscripts()) {
                int sequenceIndex = sAMSequenceDictionary.getSequenceIndex(gencodeGtfTranscriptFeature.getContig());
                if (sequenceIndex >= 0) {
                    sVIntervalTree.put(SVUtils.locatableToSVInterval(gencodeGtfTranscriptFeature, sAMSequenceDictionary), gencodeGtfTranscriptFeature);
                    String geneName = gencodeGtfTranscriptFeature.getGeneName();
                    int transcriptionStartSite = getTranscriptionStartSite(gencodeGtfTranscriptFeature);
                    sVIntervalTree3.put(new SVInterval(sequenceIndex, transcriptionStartSite, transcriptionStartSite + 1), geneName);
                    sVIntervalTree2.put(SVUtils.locatableToSVInterval(getPromoterInterval(gencodeGtfTranscriptFeature, i), sAMSequenceDictionary), geneName);
                }
            }
        }
        return new SVAnnotateEngine.GTFIntervalTreesContainer(sVIntervalTree, sVIntervalTree2, sVIntervalTree3);
    }

    @VisibleForTesting
    protected static SVIntervalTree<String> buildIntervalTreeFromBED(FeatureDataSource<FullBEDFeature> featureDataSource, SAMSequenceDictionary sAMSequenceDictionary) {
        SVIntervalTree<String> sVIntervalTree = new SVIntervalTree<>();
        Iterator<FullBEDFeature> it = featureDataSource.iterator();
        while (it.hasNext()) {
            FullBEDFeature next = it.next();
            try {
                sVIntervalTree.put(SVUtils.locatableToSVInterval(next, sAMSequenceDictionary), next.getName());
            } catch (IllegalArgumentException e) {
            }
        }
        return sVIntervalTree;
    }

    private void addAnnotationInfoKeysToHeader(VCFHeader vCFHeader) {
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.LOF, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) on which the SV is predicted to have a loss-of-function effect."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.INT_EXON_DUP, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) on which the SV is predicted to result in intragenic exonic duplication without breaking any coding sequences."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.COPY_GAIN, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) on which the SV is predicted to have a copy-gain effect."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.TSS_DUP, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) for which the SV is predicted to duplicate the transcription start site."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.DUP_PARTIAL, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) which are partially overlapped by an SV's duplication, but the transcription start site is not duplicated."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.INTRONIC, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) where the SV was found to lie entirely within an intron."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.PARTIAL_EXON_DUP, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) where the duplication SV has one breakpoint in the coding sequence."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.INV_SPAN, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) which are entirely spanned by an SV's inversion."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.UTR, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) for which the SV is predicted to disrupt a UTR."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.MSV_EXON_OVERLAP, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) on which the multiallelic SV would be predicted to have a LOF, INTRAGENIC_EXON_DUP, COPY_GAIN, DUP_PARTIAL, TSS_DUP, or PARTIAL_EXON_DUP annotation if the SV were biallelic."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.PROMOTER, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) for which the SV is predicted to overlap the promoter region."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.BREAKEND_EXON, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) for which the SV breakend is predicted to fall in an exon."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.INTERGENIC, 0, VCFHeaderLineType.Flag, "SV does not overlap any protein-coding genes."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.NONCODING_SPAN, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Class(es) of noncoding elements spanned by SV."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.NONCODING_BREAKPOINT, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Class(es) of noncoding elements disrupted by SV breakpoint."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.NEAREST_TSS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Nearest transcription start site to an intergenic variant."));
        vCFHeader.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.PARTIAL_DISPERSED_DUP, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Gene(s) overlapped partially by the duplicated interval involved in a dispersed duplication event in a complex SV."));
    }

    private void updateAndWriteHeader(VCFHeader vCFHeader) {
        addAnnotationInfoKeysToHeader(vCFHeader);
        this.vcfWriter.writeHeader(vCFHeader);
    }

    @Override // org.broadinstitute.hellbender.engine.VariantWalker
    public void apply(VariantContext variantContext, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
        this.vcfWriter.add(this.svAnnotateEngine.createAnnotatedStructuralVariantContext(variantContext));
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void closeTool() {
        if (this.vcfWriter != null) {
            this.vcfWriter.close();
        }
    }
}
