package org.broadinstitute.hellbender.tools.copynumber;

import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.commons.math3.linear.Array2DRowRealMatrix;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.stat.descriptive.rank.Percentile;
import org.apache.logging.log4j.Logger;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.ArgumentCollection;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.CommandLineProgram;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.cmdline.argumentcollections.IntervalArgumentCollection;
import org.broadinstitute.hellbender.cmdline.argumentcollections.OptionalIntervalArgumentCollection;
import org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberArgumentValidationUtils;
import org.broadinstitute.hellbender.tools.copynumber.arguments.CopyNumberStandardArgument;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.AnnotatedIntervalCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleCountCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.collections.SimpleIntervalCollection;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.LocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.metadata.SampleLocatableMetadata;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.AnnotatedInterval;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.AnnotationKey;
import org.broadinstitute.hellbender.tools.copynumber.formats.records.annotation.CopyNumberAnnotations;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.io.IOUtils;

@CommandLineProgramProperties(summary = "Filters intervals based on annotations and/or count statistics", oneLineSummary = "Filters intervals based on annotations and/or count statistics", programGroup = CopyNumberProgramGroup.class)
@DocumentedFeature
@BetaFeature
/* loaded from: input_file:org/broadinstitute/hellbender/tools/copynumber/FilterIntervals.class */
public final class FilterIntervals extends CommandLineProgram {
    public static final String MINIMUM_GC_CONTENT_LONG_NAME = "minimum-gc-content";
    public static final String MAXIMUM_GC_CONTENT_LONG_NAME = "maximum-gc-content";
    public static final String MINIMUM_MAPPABILITY_LONG_NAME = "minimum-mappability";
    public static final String MAXIMUM_MAPPABILITY_LONG_NAME = "maximum-mappability";
    public static final String MINIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME = "minimum-segmental-duplication-content";
    public static final String MAXIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME = "maximum-segmental-duplication-content";
    public static final String LOW_COUNT_FILTER_COUNT_THRESHOLD_LONG_NAME = "low-count-filter-count-threshold";
    public static final String LOW_COUNT_FILTER_PERCENTAGE_OF_SAMPLES_LONG_NAME = "low-count-filter-percentage-of-samples";
    public static final String EXTREME_COUNT_FILTER_MINIMUM_PERCENTILE_LONG_NAME = "extreme-count-filter-minimum-percentile";
    public static final String EXTREME_COUNT_FILTER_MAXIMUM_PERCENTILE_LONG_NAME = "extreme-count-filter-maximum-percentile";
    public static final String EXTREME_COUNT_FILTER_PERCENTAGE_OF_SAMPLES_LONG_NAME = "extreme-count-filter-percentage-of-samples";

    @Argument(doc = "Output Picard interval-list file containing the filtered intervals.", fullName = "output", shortName = "O")
    private File outputFilteredIntervalsFile;
    private SimpleIntervalCollection specifiedIntervals;
    private AnnotatedIntervalCollection annotatedIntervals;

    @Argument(doc = "Input file containing annotations for genomic intervals (output of AnnotateIntervals).  All intervals specified via -L must be contained.  Must be provided if no counts files are provided.", fullName = CopyNumberStandardArgument.ANNOTATED_INTERVALS_FILE_LONG_NAME, optional = true)
    private File inputAnnotatedIntervalsFile = null;

    @Argument(doc = "Input TSV or HDF5 files containing integer read counts in genomic intervals (output of CollectReadCounts).  All intervals specified via -L must be contained.  Must be provided if no annotated-intervals file is provided.", fullName = StandardArgumentDefinitions.INPUT_LONG_NAME, shortName = StandardArgumentDefinitions.INPUT_SHORT_NAME, optional = true)
    private List<File> inputReadCountFiles = new ArrayList();

    @ArgumentCollection
    protected IntervalArgumentCollection intervalArgumentCollection = new OptionalIntervalArgumentCollection();

    @Argument(doc = "Minimum allowed value for GC-content annotation (inclusive).", fullName = MINIMUM_GC_CONTENT_LONG_NAME, optional = true)
    private double minimumGCContent = 0.1d;

    @Argument(doc = "Maximum allowed value for GC-content annotation (inclusive).", fullName = MAXIMUM_GC_CONTENT_LONG_NAME, optional = true)
    private double maximumGCContent = 0.9d;

    @Argument(doc = "Minimum allowed value for mappability annotation (inclusive).", fullName = MINIMUM_MAPPABILITY_LONG_NAME, optional = true)
    private double minimumMappability = 0.9d;

    @Argument(doc = "Maximum allowed value for mappability annotation (inclusive).", fullName = MAXIMUM_MAPPABILITY_LONG_NAME, optional = true)
    private double maximumMappability = 1.0d;

    @Argument(doc = "Minimum allowed value for segmental-duplication-content annotation (inclusive).", fullName = MINIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME, optional = true)
    private double minimumSegmentalDuplicationContent = 0.0d;

    @Argument(doc = "Maximum allowed value for segmental-duplication-content annotation (inclusive).", fullName = MAXIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME, optional = true)
    private double maximumSegmentalDuplicationContent = 0.5d;

    @Argument(doc = "Count-threshold parameter for the low-count filter.  Intervals with a count strictly less than this threshold in a percentage of samples strictly greater than low-count-filter-percentage-of-samples will be filtered out.  (This is the first count-based filter applied.)", fullName = LOW_COUNT_FILTER_COUNT_THRESHOLD_LONG_NAME, minValue = 0.0d, optional = true)
    private int lowCountFilterCountThreshold = 5;

    @Argument(doc = "Percentage-of-samples parameter for the low-count filter.  Intervals with a count strictly less than low-count-filter-count-threshold in a percentage of samples strictly greater than this will be filtered out.  (This is the first count-based filter applied.)", fullName = LOW_COUNT_FILTER_PERCENTAGE_OF_SAMPLES_LONG_NAME, minValue = 0.0d, maxValue = 100.0d, optional = true)
    private double lowCountFilterPercentageOfSamples = 90.0d;

    @Argument(doc = "Minimum-percentile parameter for the extreme-count filter.  Intervals with a count that has a percentile strictly less than this in a percentage of samples strictly greater than extreme-count-filter-percentage-of-samples will be filtered out.  (This is the second count-based filter applied.)", fullName = EXTREME_COUNT_FILTER_MINIMUM_PERCENTILE_LONG_NAME, minValue = 0.0d, maxValue = 100.0d, optional = true)
    private double extremeCountFilterMinimumPercentile = 1.0d;

    @Argument(doc = "Maximum-percentile parameter for the extreme-count filter.  Intervals with a count that has a percentile strictly greater than this in a percentage of samples strictly greater than extreme-count-filter-percentage-of-samples will be filtered out.  (This is the second count-based filter applied.)", fullName = EXTREME_COUNT_FILTER_MAXIMUM_PERCENTILE_LONG_NAME, minValue = 0.0d, maxValue = 100.0d, optional = true)
    private double extremeCountFilterMaximumPercentile = 99.0d;

    @Argument(doc = "Percentage-of-samples parameter for the extreme-count filter.  Intervals with a count that has a percentile outside of [extreme-count-filter-minimum-percentile, extreme-count-filter-maximum-percentile] in a percentage of samples strictly greater than this will be filtered out.  (This is the second count-based filter applied.)", fullName = EXTREME_COUNT_FILTER_PERCENTAGE_OF_SAMPLES_LONG_NAME, minValue = 0.0d, maxValue = 100.0d, optional = true)
    private double extremeCountFilterPercentageOfSamples = 90.0d;

    @Override // org.broadinstitute.hellbender.cmdline.CommandLineProgram
    public Object doWork() {
        validateFilesAndResolveIntervals();
        SimpleIntervalCollection filterIntervals = filterIntervals();
        this.logger.info(String.format("Writing filtered intervals to %s...", this.outputFilteredIntervalsFile));
        IntervalList intervalList = new IntervalList(((LocatableMetadata) filterIntervals.getMetadata()).getSequenceDictionary());
        filterIntervals.getIntervals().forEach(simpleInterval -> {
            intervalList.add(new Interval(simpleInterval));
        });
        intervalList.write(this.outputFilteredIntervalsFile);
        return "SUCCESS";
    }

    private void validateFilesAndResolveIntervals() {
        CopyNumberArgumentValidationUtils.validateIntervalArgumentCollection(this.intervalArgumentCollection);
        if (this.inputAnnotatedIntervalsFile == null && this.inputReadCountFiles.isEmpty()) {
            throw new UserException("Must provide annotated intervals or counts.");
        }
        if (this.inputAnnotatedIntervalsFile != null) {
            IOUtils.canReadFile(this.inputAnnotatedIntervalsFile);
        }
        this.inputReadCountFiles.forEach(file -> {
            IOUtils.canReadFile(file);
        });
        Utils.validateArg(this.inputReadCountFiles.size() == new HashSet(this.inputReadCountFiles).size(), "List of input read-count files cannot contain duplicates.");
        if (this.inputReadCountFiles.isEmpty()) {
            this.annotatedIntervals = new AnnotatedIntervalCollection(this.inputAnnotatedIntervalsFile);
            this.specifiedIntervals = new SimpleIntervalCollection((LocatableMetadata) this.annotatedIntervals.getMetadata(), this.intervalArgumentCollection.getIntervals(((LocatableMetadata) this.annotatedIntervals.getMetadata()).getSequenceDictionary()));
            Utils.validateArg(this.specifiedIntervals.size() != 0, "At least one interval must be specified.");
            Utils.validateArg(new HashSet(this.annotatedIntervals.getIntervals()).containsAll(this.specifiedIntervals.getIntervals()), "Annotated intervals do not contain all specified intervals.");
            return;
        }
        this.specifiedIntervals = CopyNumberArgumentValidationUtils.resolveIntervals(this.inputReadCountFiles.get(0), this.intervalArgumentCollection, this.logger);
        if (this.inputAnnotatedIntervalsFile != null) {
            this.annotatedIntervals = CopyNumberArgumentValidationUtils.validateAnnotatedIntervalsSubset(this.inputAnnotatedIntervalsFile, this.specifiedIntervals, this.logger);
        }
    }

    private SimpleIntervalCollection filterIntervals() {
        int size = this.specifiedIntervals.size();
        boolean[] zArr = new boolean[size];
        if (this.annotatedIntervals != null) {
            this.logger.info("Applying annotation-based filters...");
            List<AnnotationKey<?>> keys = ((AnnotatedInterval) this.annotatedIntervals.getRecords().get(0)).getAnnotationMap().getKeys();
            if (keys.contains(CopyNumberAnnotations.GC_CONTENT)) {
                updateMaskByAnnotationFilter(this.logger, this.annotatedIntervals, zArr, CopyNumberAnnotations.GC_CONTENT, "GC-content", this.minimumGCContent, this.maximumGCContent);
            }
            if (keys.contains(CopyNumberAnnotations.MAPPABILITY)) {
                updateMaskByAnnotationFilter(this.logger, this.annotatedIntervals, zArr, CopyNumberAnnotations.MAPPABILITY, "mappability", this.minimumMappability, this.maximumMappability);
            }
            if (keys.contains(CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT)) {
                updateMaskByAnnotationFilter(this.logger, this.annotatedIntervals, zArr, CopyNumberAnnotations.SEGMENTAL_DUPLICATION_CONTENT, "segmental-duplication-content", this.minimumSegmentalDuplicationContent, this.maximumSegmentalDuplicationContent);
            }
        }
        if (!this.inputReadCountFiles.isEmpty()) {
            RealMatrix constructReadCountMatrix = constructReadCountMatrix(this.logger, this.inputReadCountFiles, this.specifiedIntervals);
            int rowDimension = constructReadCountMatrix.getRowDimension();
            this.logger.info("Applying count-based filters...");
            IntStream.range(0, size).filter(i -> {
                return !zArr[i];
            }).forEach(i2 -> {
                if (Arrays.stream(constructReadCountMatrix.getColumn(i2)).filter(d -> {
                    return d < ((double) this.lowCountFilterCountThreshold);
                }).count() > (this.lowCountFilterPercentageOfSamples * rowDimension) / 100.0d) {
                    zArr[i2] = true;
                }
            });
            this.logger.info(String.format("After applying low-count filter (intervals with a count < %d in > %s%% of samples fail), %d / %d intervals remain...", Integer.valueOf(this.lowCountFilterCountThreshold), Double.valueOf(this.lowCountFilterPercentageOfSamples), Integer.valueOf(countNumberPassing(zArr)), Integer.valueOf(size)));
            boolean[][] zArr2 = new boolean[rowDimension][size];
            for (int i3 = 0; i3 < rowDimension; i3++) {
                double[] row = constructReadCountMatrix.getRow(i3);
                double[] array = IntStream.range(0, size).filter(i4 -> {
                    return !zArr[i4];
                }).mapToDouble(i5 -> {
                    return row[i5];
                }).toArray();
                double evaluate = this.extremeCountFilterMinimumPercentile == 0.0d ? 0.0d : new Percentile(this.extremeCountFilterMinimumPercentile).evaluate(array);
                double evaluate2 = this.extremeCountFilterMaximumPercentile == 0.0d ? 0.0d : new Percentile(this.extremeCountFilterMaximumPercentile).evaluate(array);
                for (int i6 = 0; i6 < size; i6++) {
                    double entry = constructReadCountMatrix.getEntry(i3, i6);
                    if (evaluate > entry || entry > evaluate2) {
                        zArr2[i3][i6] = true;
                    }
                }
            }
            IntStream.range(0, size).filter(i7 -> {
                return !zArr[i7];
            }).forEach(i8 -> {
                if (IntStream.range(0, rowDimension).filter(i8 -> {
                    return zArr2[i8][i8];
                }).count() > (this.extremeCountFilterPercentageOfSamples * rowDimension) / 100.0d) {
                    zArr[i8] = true;
                }
            });
            this.logger.info(String.format("After applying extreme-count filter (intervals with a count percentile outside of [%s, %s] in > %s%% of samples fail), %d / %d intervals remain...", Double.valueOf(this.extremeCountFilterMinimumPercentile), Double.valueOf(this.extremeCountFilterMaximumPercentile), Double.valueOf(this.extremeCountFilterPercentageOfSamples), Integer.valueOf(countNumberPassing(zArr)), Integer.valueOf(size)));
        }
        this.logger.info(String.format("%d / %d intervals passed all filters...", Integer.valueOf(countNumberPassing(zArr)), Integer.valueOf(size)));
        return new SimpleIntervalCollection((LocatableMetadata) this.specifiedIntervals.getMetadata(), (List) IntStream.range(0, size).filter(i9 -> {
            return !zArr[i9];
        }).mapToObj(i10 -> {
            return (SimpleInterval) this.specifiedIntervals.getRecords().get(i10);
        }).collect(Collectors.toList()));
    }

    private static void updateMaskByAnnotationFilter(Logger logger, AnnotatedIntervalCollection annotatedIntervalCollection, boolean[] zArr, AnnotationKey<Double> annotationKey, String str, double d, double d2) {
        IntStream.range(0, annotatedIntervalCollection.size()).filter(i -> {
            return !zArr[i];
        }).forEach(i2 -> {
            double doubleValue = ((Double) ((AnnotatedInterval) annotatedIntervalCollection.getRecords().get(i2)).getAnnotationMap().getValue(annotationKey)).doubleValue();
            if (d > doubleValue || doubleValue > d2) {
                zArr[i2] = true;
            }
        });
        logger.info(String.format("After applying %s filter (intervals with values outside of [%s, %s] fail), %d / %d intervals remain...", str, Double.valueOf(d), Double.valueOf(d2), Integer.valueOf(countNumberPassing(zArr)), Integer.valueOf(annotatedIntervalCollection.size())));
    }

    private static RealMatrix constructReadCountMatrix(Logger logger, List<File> list, SimpleIntervalCollection simpleIntervalCollection) {
        logger.info("Validating and aggregating input read-counts files...");
        int size = list.size();
        int size2 = simpleIntervalCollection.size();
        HashSet hashSet = new HashSet(simpleIntervalCollection.getRecords());
        Array2DRowRealMatrix array2DRowRealMatrix = new Array2DRowRealMatrix(size, size2);
        ListIterator<File> listIterator = list.listIterator();
        while (listIterator.hasNext()) {
            int nextIndex = listIterator.nextIndex();
            File next = listIterator.next();
            logger.info(String.format("Aggregating read-counts file %s (%d / %d)", next, Integer.valueOf(nextIndex + 1), Integer.valueOf(size)));
            SimpleCountCollection read = SimpleCountCollection.read(next);
            if (!CopyNumberArgumentValidationUtils.isSameDictionary(((SampleLocatableMetadata) read.getMetadata()).getSequenceDictionary(), ((LocatableMetadata) simpleIntervalCollection.getMetadata()).getSequenceDictionary())) {
                logger.warn(String.format("Sequence dictionary for read-counts file %s is inconsistent with those for other inputs.", next));
            }
            double[] array = read.getRecords().stream().filter(simpleCount -> {
                return hashSet.contains(simpleCount.getInterval());
            }).mapToDouble((v0) -> {
                return v0.getCount();
            }).toArray();
            Utils.validateArg(array.length == hashSet.size(), String.format("Intervals for read-count file %s do not contain all specified intervals.", next));
            array2DRowRealMatrix.setRow(nextIndex, array);
        }
        return array2DRowRealMatrix;
    }

    private static int countNumberPassing(boolean[] zArr) {
        int count = (int) IntStream.range(0, zArr.length).filter(i -> {
            return !zArr[i];
        }).count();
        if (count == 0) {
            throw new UserException.BadInput("Filtering removed all intervals.  Select less strict filtering criteria.");
        }
        return count;
    }
}
