package org.broadinstitute.hellbender.tools.spark.pathseq;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.SequenceUtil;
import java.lang.invoke.SerializedLambda;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.spark.HashPartitioner;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.broadinstitute.hellbender.engine.filters.AmbiguousBaseReadFilter;
import org.broadinstitute.hellbender.engine.filters.ReadLengthReadFilter;
import org.broadinstitute.hellbender.tools.spark.pathseq.loggers.PSFilterLogger;
import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.spark.utils.ReadFilterSparkifier;
import org.broadinstitute.hellbender.tools.spark.utils.ReadTransformerSparkifier;
import org.broadinstitute.hellbender.transformers.AdapterTrimTransformer;
import org.broadinstitute.hellbender.transformers.BaseQualityClipReadTransformer;
import org.broadinstitute.hellbender.transformers.BaseQualityReadTransformer;
import org.broadinstitute.hellbender.transformers.DUSTReadTransformer;
import org.broadinstitute.hellbender.transformers.SimpleRepeatMaskTransformer;
import org.broadinstitute.hellbender.transformers.StripMateNumberTransformer;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.bwa.BwaMemIndexCache;
import org.broadinstitute.hellbender.utils.illumina.IlluminaAdapterPair;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.SAMRecordToGATKReadAdapter;
import scala.Tuple2;

/* loaded from: input_file:org/broadinstitute/hellbender/tools/spark/pathseq/PSFilter.class */
public final class PSFilter implements AutoCloseable {
    private final JavaSparkContext ctx;
    private final PSFilterArgumentCollection filterArgs;
    private final SAMFileHeader header;
    private static final List<String> ADAPTER_SEQUENCES = CollectionUtil.makeList(new String[]{IlluminaAdapterPair.SINGLE_END.get5PrimeAdapter(), IlluminaAdapterPair.SINGLE_END.get3PrimeAdapter(), IlluminaAdapterPair.PAIRED_END.get5PrimeAdapter(), IlluminaAdapterPair.PAIRED_END.get3PrimeAdapter(), IlluminaAdapterPair.INDEXED.get5PrimeAdapter(), IlluminaAdapterPair.INDEXED.get3PrimeAdapter()});
    private static final int REPEAT_WINDOW_SIZE_1 = 30;
    private static final int MAX_AT_CONTENT_1 = 29;
    private static final int MAX_GC_CONTENT_1 = 29;
    private static final int REPEAT_WINDOW_SIZE_2 = 100;
    private static final int MAX_AT_CONTENT_2 = 87;
    private static final int MAX_GC_CONTENT_2 = 89;

    public PSFilter(JavaSparkContext javaSparkContext, PSFilterArgumentCollection pSFilterArgumentCollection, SAMFileHeader sAMFileHeader) {
        Utils.nonNull(javaSparkContext, "JavaSparkContext cannot be null");
        Utils.nonNull(pSFilterArgumentCollection, "Filter arguments cannot be null");
        this.ctx = javaSparkContext;
        this.filterArgs = pSFilterArgumentCollection;
        this.header = sAMFileHeader;
    }

    @VisibleForTesting
    static JavaRDD<GATKRead> setPairFlags(JavaRDD<GATKRead> javaRDD, int i) {
        return repartitionReadsByName(javaRDD).mapPartitions(it -> {
            return setPartitionUnpairedFlags(it, i);
        });
    }

    private static JavaRDD<GATKRead> clearAllAlignments(JavaRDD<GATKRead> javaRDD, SAMFileHeader sAMFileHeader) {
        return javaRDD.map(gATKRead -> {
            return clearReadAlignment(gATKRead, sAMFileHeader);
        });
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static GATKRead clearReadAlignment(GATKRead gATKRead, SAMFileHeader sAMFileHeader) {
        SAMRecordToGATKReadAdapter sAMRecordToGATKReadAdapter = new SAMRecordToGATKReadAdapter(new SAMRecord(sAMFileHeader));
        sAMRecordToGATKReadAdapter.setName(gATKRead.getName());
        sAMRecordToGATKReadAdapter.setBases(gATKRead.getBases());
        sAMRecordToGATKReadAdapter.setBaseQualities(gATKRead.getBaseQualities());
        if (gATKRead.isReverseStrand()) {
            SequenceUtil.reverseComplement(sAMRecordToGATKReadAdapter.getBases());
            SequenceUtil.reverseQualities(sAMRecordToGATKReadAdapter.getBaseQualities());
        }
        sAMRecordToGATKReadAdapter.setIsUnmapped();
        sAMRecordToGATKReadAdapter.setIsPaired(gATKRead.isPaired());
        if (gATKRead.isPaired()) {
            sAMRecordToGATKReadAdapter.setMateIsUnmapped();
            if (gATKRead.isFirstOfPair()) {
                sAMRecordToGATKReadAdapter.setIsFirstOfPair();
            } else if (gATKRead.isSecondOfPair()) {
                sAMRecordToGATKReadAdapter.setIsSecondOfPair();
            }
        }
        String readGroup = gATKRead.getReadGroup();
        if (readGroup != null) {
            sAMRecordToGATKReadAdapter.setAttribute(SAMTag.RG.name(), readGroup);
        }
        return sAMRecordToGATKReadAdapter;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static Iterator<GATKRead> setPartitionUnpairedFlags(Iterator<GATKRead> it, int i) {
        Tuple2<List<GATKRead>, List<GATKRead>> pairedAndUnpairedLists = getPairedAndUnpairedLists(it, i);
        List list = (List) pairedAndUnpairedLists._1;
        List list2 = (List) pairedAndUnpairedLists._2;
        Iterator it2 = list2.iterator();
        while (it2.hasNext()) {
            ((GATKRead) it2.next()).setIsPaired(false);
        }
        ArrayList arrayList = new ArrayList(list.size() + list2.size());
        arrayList.addAll(list);
        arrayList.addAll(list2);
        return arrayList.iterator();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static JavaRDD<GATKRead> repartitionReadsByName(JavaRDD<GATKRead> javaRDD) {
        return repartitionReadsByName(javaRDD, javaRDD.getNumPartitions());
    }

    static JavaRDD<GATKRead> repartitionReadsByName(JavaRDD<GATKRead> javaRDD, int i) {
        return javaRDD.mapToPair(gATKRead -> {
            return new Tuple2(gATKRead.getName(), gATKRead);
        }).partitionBy(new HashPartitioner(i)).map((v0) -> {
            return v0._2();
        });
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Tuple2<List<GATKRead>, List<GATKRead>> getPairedAndUnpairedLists(Iterator<GATKRead> it, int i) {
        ArrayList arrayList = new ArrayList(i);
        HashMap hashMap = new HashMap(i);
        while (it.hasNext()) {
            GATKRead next = it.next();
            String name = next.getName();
            if (hashMap.containsKey(name)) {
                arrayList.add(next);
                arrayList.add(hashMap.remove(name));
            } else {
                hashMap.put(name, next);
            }
        }
        ArrayList arrayList2 = new ArrayList(hashMap.values());
        arrayList.trimToSize();
        return new Tuple2<>(arrayList, arrayList2);
    }

    @VisibleForTesting
    static JavaRDD<GATKRead> filterDuplicateSequences(JavaRDD<GATKRead> javaRDD) {
        return javaRDD.mapToPair(PSFilter::canonicalizeRead).groupByKey().values().map(iterable -> {
            Iterator it = iterable.iterator();
            while (it.hasNext()) {
                GATKRead gATKRead = (GATKRead) it.next();
                if (!gATKRead.isPaired()) {
                    return gATKRead;
                }
            }
            return (GATKRead) iterable.iterator().next();
        });
    }

    @VisibleForTesting
    static Tuple2<Long, GATKRead> canonicalizeRead(GATKRead gATKRead) {
        byte[] bases = gATKRead.getBases();
        long fnvByteArray64 = SVUtils.fnvByteArray64(bases);
        SequenceUtil.reverseComplement(bases);
        return new Tuple2<>(Long.valueOf(Math.min(fnvByteArray64, SVUtils.fnvByteArray64(bases))), gATKRead);
    }

    private static JavaRDD<GATKRead> doKmerFiltering(JavaRDD<GATKRead> javaRDD, String str, int i) {
        return javaRDD.filter(new ContainsKmerReadFilterSpark(str, i));
    }

    @VisibleForTesting
    static JavaRDD<GATKRead> doBwaFilter(JavaRDD<GATKRead> javaRDD, String str, int i, int i2, int i3) {
        return javaRDD.mapPartitions(it -> {
            return new PSBwaFilter(str, i3, i, i2, false).apply(it);
        });
    }

    public Tuple2<JavaRDD<GATKRead>, JavaRDD<GATKRead>> doFilter(JavaRDD<GATKRead> javaRDD, PSFilterLogger pSFilterLogger) {
        Utils.nonNull(javaRDD, "Input reads cannot be null");
        JavaRDD<GATKRead> primaryReads = PSUtils.primaryReads(javaRDD);
        pSFilterLogger.logPrimaryReads(primaryReads);
        if (this.filterArgs.alignedInput) {
            primaryReads = primaryReads.filter(new ReadFilterSparkifier(new HostAlignmentReadFilter(this.filterArgs.minIdentity)));
        }
        pSFilterLogger.logReadsAfterPrealignedHostFilter(primaryReads);
        JavaRDD<GATKRead> map = clearAllAlignments(primaryReads, this.header).map(new ReadTransformerSparkifier(new StripMateNumberTransformer()));
        if (!this.filterArgs.skipFilters) {
            map = map.map(new ReadTransformerSparkifier(new AdapterTrimTransformer(this.filterArgs.maxAdapterMismatches, this.filterArgs.minAdapterLength, ADAPTER_SEQUENCES))).map(new ReadTransformerSparkifier(new SimpleRepeatMaskTransformer(29, 29, 30))).map(new ReadTransformerSparkifier(new SimpleRepeatMaskTransformer(87, 89, 100))).map(new ReadTransformerSparkifier(new DUSTReadTransformer(this.filterArgs.dustMask, this.filterArgs.dustW, this.filterArgs.dustT))).map(new ReadTransformerSparkifier(new BaseQualityClipReadTransformer(this.filterArgs.readTrimThresh))).filter(new ReadFilterSparkifier(new ReadLengthReadFilter(this.filterArgs.minReadLength, ReadMetadata.PartitionBounds.UNMAPPED))).map(new ReadTransformerSparkifier(new BaseQualityReadTransformer(this.filterArgs.qualPhredThresh))).filter(new ReadFilterSparkifier(new AmbiguousBaseReadFilter(this.filterArgs.maxAmbiguousBases)));
        }
        pSFilterLogger.logReadsAfterQualityFilter(map);
        if (this.filterArgs.kmerFilePath != null) {
            map = doKmerFiltering(map, this.filterArgs.kmerFilePath, this.filterArgs.hostKmerThresh);
        }
        if (!this.filterArgs.skipPreBwaRepartition) {
            map = repartitionReadsByName(map);
        }
        if (this.filterArgs.indexImageFile != null) {
            String str = this.filterArgs.indexImageFile;
            int i = this.filterArgs.minSeedLength;
            this.filterArgs.getClass();
            map = doBwaFilter(map, str, i, 1, this.filterArgs.minIdentity);
        }
        pSFilterLogger.logReadsAfterHostFilter(map);
        if (this.filterArgs.filterDuplicates) {
            map = filterDuplicateSequences(setPairFlags(map, this.filterArgs.filterReadsPerPartition));
        }
        pSFilterLogger.logReadsAfterDeduplication(map);
        PSPairedUnpairedSplitterSpark pSPairedUnpairedSplitterSpark = new PSPairedUnpairedSplitterSpark(clearAllAlignments(setPairFlags(map, this.filterArgs.filterReadsPerPartition), this.header), this.filterArgs.filterReadsPerPartition, false);
        JavaRDD<GATKRead> pairedReads = pSPairedUnpairedSplitterSpark.getPairedReads();
        JavaRDD<GATKRead> unpairedReads = pSPairedUnpairedSplitterSpark.getUnpairedReads();
        pSFilterLogger.logFinalPairedReads(pairedReads);
        return new Tuple2<>(pairedReads, unpairedReads);
    }

    @Override // java.lang.AutoCloseable
    public void close() {
        BwaMemIndexCache.closeAllDistributedInstances(this.ctx);
        ContainsKmerReadFilterSpark.closeAllDistributedInstances(this.ctx);
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1034933902:
                if (implMethodName.equals("lambda$clearAllAlignments$afc2523d$1")) {
                    z = false;
                    break;
                }
                break;
            case -938537193:
                if (implMethodName.equals("lambda$doBwaFilter$9ded5e08$1")) {
                    z = 3;
                    break;
                }
                break;
            case 2995:
                if (implMethodName.equals("_2")) {
                    z = true;
                    break;
                }
                break;
            case 335923496:
                if (implMethodName.equals("lambda$repartitionReadsByName$c886c051$1")) {
                    z = 6;
                    break;
                }
                break;
            case 379758255:
                if (implMethodName.equals("lambda$setPairFlags$8befc806$1")) {
                    z = 5;
                    break;
                }
                break;
            case 561134998:
                if (implMethodName.equals("canonicalizeRead")) {
                    z = 2;
                    break;
                }
                break;
            case 926694471:
                if (implMethodName.equals("lambda$filterDuplicateSequences$66f9aadc$1")) {
                    z = 4;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/pathseq/PSFilter") && serializedLambda.getImplMethodSignature().equals("(Lhtsjdk/samtools/SAMFileHeader;Lorg/broadinstitute/hellbender/utils/read/GATKRead;)Lorg/broadinstitute/hellbender/utils/read/GATKRead;")) {
                    SAMFileHeader sAMFileHeader = (SAMFileHeader) serializedLambda.getCapturedArg(0);
                    return gATKRead -> {
                        return clearReadAlignment(gATKRead, sAMFileHeader);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 5 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("scala/Tuple2") && serializedLambda.getImplMethodSignature().equals("()Ljava/lang/Object;")) {
                    return (v0) -> {
                        return v0._2();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/pathseq/PSFilter") && serializedLambda.getImplMethodSignature().equals("(Lorg/broadinstitute/hellbender/utils/read/GATKRead;)Lscala/Tuple2;")) {
                    return PSFilter::canonicalizeRead;
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/pathseq/PSFilter") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/String;IIILjava/util/Iterator;)Ljava/util/Iterator;")) {
                    String str = (String) serializedLambda.getCapturedArg(0);
                    int intValue = ((Integer) serializedLambda.getCapturedArg(1)).intValue();
                    int intValue2 = ((Integer) serializedLambda.getCapturedArg(2)).intValue();
                    int intValue3 = ((Integer) serializedLambda.getCapturedArg(3)).intValue();
                    return it -> {
                        return new PSBwaFilter(str, intValue, intValue2, intValue3, false).apply(it);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/Function") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/lang/Object;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/pathseq/PSFilter") && serializedLambda.getImplMethodSignature().equals("(Ljava/lang/Iterable;)Lorg/broadinstitute/hellbender/utils/read/GATKRead;")) {
                    return iterable -> {
                        Iterator it2 = iterable.iterator();
                        while (it2.hasNext()) {
                            GATKRead gATKRead2 = (GATKRead) it2.next();
                            if (!gATKRead2.isPaired()) {
                                return gATKRead2;
                            }
                        }
                        return (GATKRead) iterable.iterator().next();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/pathseq/PSFilter") && serializedLambda.getImplMethodSignature().equals("(ILjava/util/Iterator;)Ljava/util/Iterator;")) {
                    int intValue4 = ((Integer) serializedLambda.getCapturedArg(0)).intValue();
                    return it2 -> {
                        return setPartitionUnpairedFlags(it2, intValue4);
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/pathseq/PSFilter") && serializedLambda.getImplMethodSignature().equals("(Lorg/broadinstitute/hellbender/utils/read/GATKRead;)Lscala/Tuple2;")) {
                    return gATKRead2 -> {
                        return new Tuple2(gATKRead2.getName(), gATKRead2);
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
