package org.broadinstitute.hellbender.tools;

import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.MutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReadWalker;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.BaseUtils;
import org.broadinstitute.hellbender.utils.clipping.ClippingOp;
import org.broadinstitute.hellbender.utils.clipping.ClippingRepresentation;
import org.broadinstitute.hellbender.utils.clipping.ReadClipper;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.SAMFileGATKReadWriter;
import picard.cmdline.programgroups.ReadDataManipulationProgramGroup;

@CommandLineProgramProperties(summary = "Read clipping based on quality, position or sequence matching. This tool provides simple, powerful read clipping capabilities that allow you to remove low quality strings of bases, sections of reads, and reads containing user-provided sequences.", oneLineSummary = "Clip reads in a SAM/BAM/CRAM file", programGroup = ReadDataManipulationProgramGroup.class)
@DocumentedFeature
/* loaded from: input_file:org/broadinstitute/hellbender/tools/ClipReads.class */
public final class ClipReads extends ReadWalker {
    public static final String OUTPUT_STATISTICS_LONG_NAME = "output-statistics";
    public static final String OUTPUT_STATISTICS_SHORT_NAME = "os";
    public static final String Q_TRIMMING_THRESHOLD_LONG_NAME = "q-trimming-threshold";
    public static final String Q_TRIMMING_THRESHOLD_SHORT_NAME = "QT";
    public static final String CYCLES_TO_TRIM_LONG_NAME = "cycles-to-trim";
    public static final String CYCLES_TO_TRIM_SHORT_NAME = "CT";
    public static final String CLIP_SEQUENCES_FILE_LONG_NAME = "clip-sequences-file";
    public static final String CLIP_SEQUENCES_FILE_SHORT_NAME = "XF";
    public static final String CLIP_SEQUENCE_LONG_NAME = "clip-sequence";
    public static final String CLIP_SEQUENCE_SHORT_NAME = "X";
    public static final String CLIP_REPRESENTATION_LONG_NAME = "clip-representation";
    public static final String CLIP_REPRESENTATION_SHORT_NAME = "CR";
    public static final String READ_LONG_NAME = "read";
    public static final String READ_SHORT_NAME = "read";

    @Argument(doc = "BAM output file", shortName = "O", fullName = "output")
    File OUTPUT;
    private SAMFileGATKReadWriter outputBam;
    private ClippingData accumulator;
    private PrintStream outputStats;
    private final Logger logger = LogManager.getLogger(ClipReads.class);

    @Argument(fullName = OUTPUT_STATISTICS_LONG_NAME, shortName = OUTPUT_STATISTICS_SHORT_NAME, doc = "File to output statistics", optional = true)
    File STATSOUTPUT = null;

    @Argument(fullName = Q_TRIMMING_THRESHOLD_LONG_NAME, shortName = Q_TRIMMING_THRESHOLD_SHORT_NAME, doc = "If provided, the Q-score clipper will be applied", optional = true)
    int qTrimmingThreshold = -1;

    @Argument(fullName = CYCLES_TO_TRIM_LONG_NAME, shortName = CYCLES_TO_TRIM_SHORT_NAME, doc = "String indicating machine cycles to clip from the reads", optional = true)
    String cyclesToClipArg = null;

    @Argument(fullName = CLIP_SEQUENCES_FILE_LONG_NAME, shortName = CLIP_SEQUENCES_FILE_SHORT_NAME, doc = "Remove sequences within reads matching the sequences in this FASTA file", optional = true)
    String clipSequenceFile = null;

    @Argument(fullName = CLIP_SEQUENCE_LONG_NAME, shortName = CLIP_SEQUENCE_SHORT_NAME, doc = "Remove sequences within reads matching this sequence", optional = true)
    List<String> clipSequencesArgs = null;

    @Argument(fullName = CLIP_REPRESENTATION_LONG_NAME, shortName = CLIP_REPRESENTATION_SHORT_NAME, doc = "How should we actually clip the bases?", optional = true)
    ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;

    @Argument(fullName = "read", shortName = "read", doc = "", optional = true)
    String onlyDoRead = null;
    private final List<SeqToClip> sequencesToClip = new ArrayList();
    private List<Pair<Integer, Integer>> cyclesToClip = null;

    /* loaded from: input_file:org/broadinstitute/hellbender/tools/ClipReads$ClippingData.class */
    public static final class ClippingData {
        public long nTotalReads = 0;
        public long nTotalBases = 0;
        public long nClippedReads = 0;
        public long nClippedBases = 0;
        public long nQClippedBases = 0;
        public long nRangeClippedBases = 0;
        public long nSeqClippedBases = 0;
        SortedMap<String, Long> seqClipCounts = new TreeMap();

        public ClippingData(List<SeqToClip> list) {
            Iterator<SeqToClip> it = list.iterator();
            while (it.hasNext()) {
                this.seqClipCounts.put(it.next().seq, 0L);
            }
        }

        public void incNQClippedBases(int i) {
            this.nQClippedBases += i;
            this.nClippedBases += i;
        }

        public void incNRangeClippedBases(int i) {
            this.nRangeClippedBases += i;
            this.nClippedBases += i;
        }

        public void incSeqClippedBases(String str, int i) {
            this.nSeqClippedBases += i;
            this.nClippedBases += i;
            this.seqClipCounts.put(str, Long.valueOf(this.seqClipCounts.get(str).longValue() + i));
        }

        public void addData(ClippingData clippingData) {
            this.nTotalReads += clippingData.nTotalReads;
            this.nTotalBases += clippingData.nTotalBases;
            this.nClippedReads += clippingData.nClippedReads;
            this.nClippedBases += clippingData.nClippedBases;
            this.nQClippedBases += clippingData.nQClippedBases;
            this.nRangeClippedBases += clippingData.nRangeClippedBases;
            this.nSeqClippedBases += clippingData.nSeqClippedBases;
            for (String str : clippingData.seqClipCounts.keySet()) {
                Long l = clippingData.seqClipCounts.get(str);
                if (this.seqClipCounts.containsKey(str)) {
                    l = Long.valueOf(l.longValue() + this.seqClipCounts.get(str).longValue());
                }
                this.seqClipCounts.put(str, l);
            }
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            sb.append(StringUtils.repeat('-', 80) + "\n").append(String.format("Number of examined reads              %d%n", Long.valueOf(this.nTotalReads))).append(String.format("Number of clipped reads               %d%n", Long.valueOf(this.nClippedReads))).append(String.format("Percent of clipped reads              %.2f%n", Double.valueOf((100.0d * this.nClippedReads) / this.nTotalReads))).append(String.format("Number of examined bases              %d%n", Long.valueOf(this.nTotalBases))).append(String.format("Number of clipped bases               %d%n", Long.valueOf(this.nClippedBases))).append(String.format("Percent of clipped bases              %.2f%n", Double.valueOf((100.0d * this.nClippedBases) / this.nTotalBases))).append(String.format("Number of quality-score clipped bases %d%n", Long.valueOf(this.nQClippedBases))).append(String.format("Number of range clipped bases         %d%n", Long.valueOf(this.nRangeClippedBases))).append(String.format("Number of sequence clipped bases      %d%n", Long.valueOf(this.nSeqClippedBases)));
            for (Map.Entry<String, Long> entry : this.seqClipCounts.entrySet()) {
                sb.append(String.format("  %8d clip sites matching %s%n", entry.getValue(), entry.getKey()));
            }
            sb.append(StringUtils.repeat('-', 80) + "\n");
            return sb.toString();
        }
    }

    /* loaded from: input_file:org/broadinstitute/hellbender/tools/ClipReads$ReadClipperWithData.class */
    public static final class ReadClipperWithData extends ReadClipper {
        private ClippingData data;

        public ReadClipperWithData(GATKRead gATKRead, List<SeqToClip> list) {
            super(gATKRead);
            this.data = new ClippingData(list);
        }

        public ClippingData getData() {
            return this.data;
        }

        public void setData(ClippingData clippingData) {
            this.data = clippingData;
        }

        public void addData(ClippingData clippingData) {
            this.data.addData(clippingData);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/broadinstitute/hellbender/tools/ClipReads$SeqToClip.class */
    public static final class SeqToClip {
        String name;
        String seq;
        String revSeq;
        Pattern fwdPat;
        Pattern revPat;

        public SeqToClip(String str, byte[] bArr) {
            this.name = str;
            this.seq = new String(bArr);
            this.fwdPat = Pattern.compile(this.seq, 2);
            this.revSeq = new String(BaseUtils.simpleReverseComplement(bArr));
            this.revPat = Pattern.compile(this.revSeq, 2);
        }
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void onTraversalStart() {
        if (this.qTrimmingThreshold >= 0) {
            this.logger.info(String.format("Creating Q-score clipper with threshold %d", Integer.valueOf(this.qTrimmingThreshold)));
        }
        if (this.clipSequencesArgs != null) {
            int i = 0;
            Iterator<String> it = this.clipSequencesArgs.iterator();
            while (it.hasNext()) {
                i++;
                ReferenceSequence referenceSequence = new ReferenceSequence("CMDLINE-" + i, -1, StringUtil.stringToBytes(it.next()));
                addSeqToClip(referenceSequence.getName(), referenceSequence.getBases());
            }
        }
        if (this.clipSequenceFile != null) {
            ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(new File(this.clipSequenceFile));
            while (true) {
                ReferenceSequence nextSequence = referenceSequenceFile.nextSequence();
                if (nextSequence == null) {
                    break;
                } else {
                    addSeqToClip(nextSequence.getName(), nextSequence.getBases());
                }
            }
        }
        if (this.cyclesToClipArg != null) {
            this.cyclesToClip = new ArrayList();
            for (String str : this.cyclesToClipArg.split(",")) {
                try {
                    String[] split = str.split("-");
                    int parseInt = Integer.parseInt(split[0]) - 1;
                    int parseInt2 = Integer.parseInt(split[1]) - 1;
                    if (parseInt < 0) {
                        throw new Exception();
                    }
                    if (parseInt2 < parseInt) {
                        throw new Exception();
                    }
                    this.logger.info(String.format("Creating cycle clipper %d-%d", Integer.valueOf(parseInt), Integer.valueOf(parseInt2)));
                    this.cyclesToClip.add(new MutablePair(Integer.valueOf(parseInt), Integer.valueOf(parseInt2)));
                } catch (Exception e) {
                    throw new RuntimeException("Badly formatted cyclesToClip argument: " + this.cyclesToClipArg);
                }
            }
        }
        this.outputBam = createSAMWriter(this.OUTPUT, EnumSet.of(ClippingRepresentation.WRITE_NS, ClippingRepresentation.WRITE_NS_Q0S, ClippingRepresentation.WRITE_Q0S).contains(this.clippingRepresentation));
        this.accumulator = new ClippingData(this.sequencesToClip);
        try {
            this.outputStats = this.STATSOUTPUT == null ? null : new PrintStream(this.STATSOUTPUT);
        } catch (FileNotFoundException e2) {
            throw new UserException.CouldNotCreateOutputFile(this.STATSOUTPUT, e2);
        }
    }

    @Override // org.broadinstitute.hellbender.engine.ReadWalker
    public void apply(GATKRead gATKRead, ReferenceContext referenceContext, FeatureContext featureContext) {
        if (this.onlyDoRead == null || gATKRead.getName().equals(this.onlyDoRead)) {
            if (this.clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES || this.clippingRepresentation == ClippingRepresentation.REVERT_SOFTCLIPPED_BASES) {
                gATKRead = ReadClipper.revertSoftClippedBases(gATKRead);
            }
            ReadClipperWithData readClipperWithData = new ReadClipperWithData(gATKRead, this.sequencesToClip);
            clipBadQualityScores(readClipperWithData);
            clipCycles(readClipperWithData);
            clipSequences(readClipperWithData);
            accumulate(readClipperWithData);
        }
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public ClippingData onTraversalSuccess() {
        if (this.outputStats != null) {
            this.outputStats.printf(this.accumulator.toString(), new Object[0]);
        }
        return this.accumulator;
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void closeTool() {
        if (this.outputStats != null) {
            this.outputStats.close();
        }
        if (this.outputBam != null) {
            this.outputBam.close();
        }
    }

    private void addSeqToClip(String str, byte[] bArr) {
        SeqToClip seqToClip = new SeqToClip(str, bArr);
        this.sequencesToClip.add(seqToClip);
        this.logger.info(String.format("Creating sequence clipper %s: %s/%s", seqToClip.name, seqToClip.seq, seqToClip.revSeq));
    }

    private void clipSequences(ReadClipperWithData readClipperWithData) {
        if (this.sequencesToClip != null) {
            GATKRead read = readClipperWithData.getRead();
            ClippingData data = readClipperWithData.getData();
            for (SeqToClip seqToClip : this.sequencesToClip) {
                Matcher matcher = (read.isReverseStrand() ? seqToClip.revPat : seqToClip.fwdPat).matcher(read.getBasesString());
                boolean z = true;
                while (z) {
                    z = matcher.find();
                    if (z) {
                        ClippingOp clippingOp = new ClippingOp(matcher.start(), matcher.end() - 1);
                        readClipperWithData.addOp(clippingOp);
                        data.incSeqClippedBases(seqToClip.seq, clippingOp.getLength());
                    }
                }
            }
            readClipperWithData.setData(data);
        }
    }

    private Pair<Integer, Integer> strandAwarePositions(GATKRead gATKRead, int i, int i2) {
        return gATKRead.isReverseStrand() ? new MutablePair(Integer.valueOf((gATKRead.getLength() - i2) - 1), Integer.valueOf((gATKRead.getLength() - i) - 1)) : new MutablePair(Integer.valueOf(i), Integer.valueOf(i2));
    }

    private void clipCycles(ReadClipperWithData readClipperWithData) {
        if (this.cyclesToClip != null) {
            GATKRead read = readClipperWithData.getRead();
            ClippingData data = readClipperWithData.getData();
            for (Pair<Integer, Integer> pair : this.cyclesToClip) {
                int intValue = ((Integer) pair.getLeft()).intValue();
                int intValue2 = ((Integer) pair.getRight()).intValue();
                if (intValue < read.getLength()) {
                    if (intValue2 >= read.getLength()) {
                        intValue2 = read.getLength() - 1;
                    }
                    Pair<Integer, Integer> strandAwarePositions = strandAwarePositions(read, intValue, intValue2);
                    ClippingOp clippingOp = new ClippingOp(((Integer) strandAwarePositions.getLeft()).intValue(), ((Integer) strandAwarePositions.getRight()).intValue());
                    readClipperWithData.addOp(clippingOp);
                    data.incNRangeClippedBases(clippingOp.getLength());
                }
            }
            readClipperWithData.setData(data);
        }
    }

    private void clipBadQualityScores(ReadClipperWithData readClipperWithData) {
        GATKRead read = readClipperWithData.getRead();
        ClippingData data = readClipperWithData.getData();
        int length = read.getLength();
        byte[] baseQualities = read.getBaseQualities();
        int i = 0;
        int i2 = -1;
        int i3 = -1;
        for (int i4 = length - 1; i4 >= 0; i4--) {
            int i5 = read.isReverseStrand() ? (length - i4) - 1 : i4;
            i += this.qTrimmingThreshold - baseQualities[i5];
            if (i >= 0 && i >= i2) {
                i2 = i;
                i3 = i5;
            }
        }
        if (i3 != -1) {
            ClippingOp clippingOp = new ClippingOp(read.isReverseStrand() ? 0 : i3, read.isReverseStrand() ? i3 : length - 1);
            readClipperWithData.addOp(clippingOp);
            data.incNQClippedBases(clippingOp.getLength());
        }
        readClipperWithData.setData(data);
    }

    private void accumulate(ReadClipperWithData readClipperWithData) {
        if (readClipperWithData == null) {
            return;
        }
        this.outputBam.addRead(readClipperWithData.clipRead(this.clippingRepresentation));
        this.accumulator.nTotalReads++;
        this.accumulator.nTotalBases += readClipperWithData.getRead().getLength();
        if (readClipperWithData.wasClipped()) {
            this.accumulator.nClippedReads++;
            this.accumulator.addData(readClipperWithData.getData());
        }
    }
}
