package org.broadinstitute.hellbender.tools.spark.sv.evidence;

import com.google.common.annotations.VisibleForTesting;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.lang.invoke.SerializedLambda;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.stream.Stream;
import org.apache.spark.HashPartitioner;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.BetaFeature;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.spark.GATKSparkTool;
import org.broadinstitute.hellbender.engine.spark.datasources.ReferenceMultiSparkSource;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.tools.spark.sv.utils.KmerAndCount;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVDUSTFilteredKmerizer;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVFileUtils;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVKmer;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVKmerLong;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVReferenceUtils;
import org.broadinstitute.hellbender.tools.spark.sv.utils.SVUtils;
import org.broadinstitute.hellbender.tools.spark.utils.HopscotchMapSpark;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import picard.cmdline.programgroups.ReferenceProgramGroup;
import scala.Tuple2;

@DocumentedFeature
@CommandLineProgramProperties(oneLineSummary = "Identifies sequences that occur at high frequency in a reference", summary = "Search the reference for kmers (fixed-length substrings) that occur more than a specified number of times, and list them to an output file.  The resulting output file is appropriate for use as the --kmers-to-ignore input file by the StructuralVariationDiscoveryPipelineSpark tool, which will ignore these kmers when trying to produce candidate reads for local assemblies.", programGroup = ReferenceProgramGroup.class)
@BetaFeature
/* loaded from: input_file:org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBadGenomicKmersSpark.class */
public final class FindBadGenomicKmersSpark extends GATKSparkTool {
    private static final long serialVersionUID = 1;
    public static final int REF_RECORD_LEN = 10000;
    public static final int REF_RECORDS_PER_PARTITION = 104;
    public static final int MAX_KMER_FREQ = 3;

    @Argument(doc = "file for ubiquitous kmer output", shortName = "O", fullName = "output")
    private String outputFile;

    @Argument(doc = "kmer size", fullName = "k-size")
    private int kSize = 51;

    @Argument(doc = "maximum kmer DUST score", fullName = "kmer-max-dust-score")
    private int maxDUSTScore = 49;

    @Argument(doc = "additional high copy kmers (mitochondrion, e.g.) fasta file name", fullName = "high-copy-fasta", optional = true)
    private String highCopyFastaFilename;

    @Override // org.broadinstitute.hellbender.engine.spark.GATKSparkTool
    public boolean requiresReference() {
        return true;
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // org.broadinstitute.hellbender.engine.spark.GATKSparkTool
    protected void runTool(JavaSparkContext javaSparkContext) {
        SAMFileHeader headerForReads = getHeaderForReads();
        SAMSequenceDictionary sAMSequenceDictionary = null;
        if (headerForReads != null) {
            sAMSequenceDictionary = headerForReads.getSequenceDictionary();
        }
        Collection findBadGenomicKmers = findBadGenomicKmers(javaSparkContext, this.kSize, this.maxDUSTScore, getReference(), sAMSequenceDictionary);
        if (this.highCopyFastaFilename != null) {
            findBadGenomicKmers = SVUtils.uniquify(findBadGenomicKmers, processFasta(this.kSize, this.maxDUSTScore, this.highCopyFastaFilename));
        }
        SVFileUtils.writeKmersFile(this.outputFile, this.kSize, findBadGenomicKmers);
    }

    @VisibleForTesting
    static List<SVKmer> findBadGenomicKmers(JavaSparkContext javaSparkContext, int i, int i2, ReferenceMultiSparkSource referenceMultiSparkSource, SAMSequenceDictionary sAMSequenceDictionary) {
        SAMSequenceDictionary referenceSequenceDictionary = referenceMultiSparkSource.getReferenceSequenceDictionary(sAMSequenceDictionary);
        if (referenceSequenceDictionary == null) {
            throw new GATKException("No reference dictionary available");
        }
        return collectUbiquitousKmersInReference(i, i2, 3, SVReferenceUtils.getReferenceBasesRDD(javaSparkContext, i, referenceMultiSparkSource, referenceSequenceDictionary, 10000, REF_RECORDS_PER_PARTITION));
    }

    @VisibleForTesting
    static List<SVKmer> collectUbiquitousKmersInReference(int i, int i2, int i3, JavaRDD<byte[]> javaRDD) {
        Utils.nonNull(javaRDD, "reference bases RDD is null");
        Utils.validateArg(i > 0, "provided kmer size is non positive");
        Utils.validateArg(i2 > 0, "provided DUST filter score is non positive");
        Utils.validateArg(i3 > 0, "provided kmer frequency is non positive");
        return javaRDD.mapPartitions(it -> {
            HopscotchMapSpark hopscotchMapSpark = new HopscotchMapSpark(208);
            while (it.hasNext()) {
                SVDUSTFilteredKmerizer.canonicalStream((byte[]) it.next(), i, i2, new SVKmerLong()).forEach(sVKmer -> {
                    KmerAndCount kmerAndCount = (KmerAndCount) hopscotchMapSpark.find(sVKmer);
                    if (kmerAndCount == null) {
                        hopscotchMapSpark.add(new KmerAndCount((SVKmerLong) sVKmer));
                    } else {
                        kmerAndCount.bumpCount();
                    }
                });
            }
            return hopscotchMapSpark.iterator();
        }).mapToPair(kmerAndCount -> {
            return new Tuple2(kmerAndCount.getKey(), kmerAndCount.getValue());
        }).partitionBy(new HashPartitioner(javaRDD.getNumPartitions())).mapPartitions(it2 -> {
            HopscotchMapSpark hopscotchMapSpark = new HopscotchMapSpark(208);
            while (it2.hasNext()) {
                Tuple2 tuple2 = (Tuple2) it2.next();
                SVKmer sVKmer = (SVKmer) tuple2._1();
                int intValue = ((Integer) tuple2._2()).intValue();
                KmerAndCount kmerAndCount2 = (KmerAndCount) hopscotchMapSpark.find(sVKmer);
                if (kmerAndCount2 == null) {
                    hopscotchMapSpark.add(new KmerAndCount((SVKmerLong) sVKmer, intValue));
                } else {
                    kmerAndCount2.bumpCount(intValue);
                }
            }
            return hopscotchMapSpark.stream().filter(kmerAndCount3 -> {
                return kmerAndCount3.grabCount() > i3;
            }).map((v0) -> {
                return v0.getKey();
            }).iterator();
        }).collect();
    }

    @VisibleForTesting
    static List<SVKmer> processFasta(int i, int i2, String str) {
        try {
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(BucketUtils.openFile(str)));
            try {
                ArrayList arrayList = new ArrayList((int) BucketUtils.fileSize(str));
                StringBuilder sb = new StringBuilder();
                SVKmerLong sVKmerLong = new SVKmerLong();
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    if (readLine.charAt(0) != '>') {
                        sb.append(readLine);
                    } else if (sb.length() > 0) {
                        Stream<SVKmer> canonicalStream = SVDUSTFilteredKmerizer.canonicalStream(sb, i, i2, sVKmerLong);
                        Objects.requireNonNull(arrayList);
                        canonicalStream.forEach((v1) -> {
                            r1.add(v1);
                        });
                        sb.setLength(0);
                    }
                }
                if (sb.length() > 0) {
                    Stream<SVKmer> canonicalStream2 = SVDUSTFilteredKmerizer.canonicalStream(sb, i, i2, sVKmerLong);
                    Objects.requireNonNull(arrayList);
                    canonicalStream2.forEach((v1) -> {
                        r1.add(v1);
                    });
                }
                bufferedReader.close();
                return arrayList;
            } finally {
            }
        } catch (IOException e) {
            throw new GATKException("Can't read high copy kmers fasta file " + str, e);
        }
    }

    private static /* synthetic */ Object $deserializeLambda$(SerializedLambda serializedLambda) {
        String implMethodName = serializedLambda.getImplMethodName();
        boolean z = -1;
        switch (implMethodName.hashCode()) {
            case -1481171712:
                if (implMethodName.equals("lambda$collectUbiquitousKmersInReference$12d39e59$1")) {
                    z = true;
                    break;
                }
                break;
            case -973380132:
                if (implMethodName.equals("lambda$collectUbiquitousKmersInReference$7458bc61$1")) {
                    z = 2;
                    break;
                }
                break;
            case 669652986:
                if (implMethodName.equals("lambda$collectUbiquitousKmersInReference$8abddd2a$1")) {
                    z = false;
                    break;
                }
                break;
        }
        switch (z) {
            case false:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/PairFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Lscala/Tuple2;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBadGenomicKmersSpark") && serializedLambda.getImplMethodSignature().equals("(Lorg/broadinstitute/hellbender/tools/spark/sv/utils/KmerAndCount;)Lscala/Tuple2;")) {
                    return kmerAndCount -> {
                        return new Tuple2(kmerAndCount.getKey(), kmerAndCount.getValue());
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBadGenomicKmersSpark") && serializedLambda.getImplMethodSignature().equals("(IILjava/util/Iterator;)Ljava/util/Iterator;")) {
                    int intValue = ((Integer) serializedLambda.getCapturedArg(0)).intValue();
                    int intValue2 = ((Integer) serializedLambda.getCapturedArg(1)).intValue();
                    return it -> {
                        HopscotchMapSpark hopscotchMapSpark = new HopscotchMapSpark(208);
                        while (it.hasNext()) {
                            SVDUSTFilteredKmerizer.canonicalStream((byte[]) it.next(), intValue, intValue2, new SVKmerLong()).forEach(sVKmer -> {
                                KmerAndCount kmerAndCount2 = (KmerAndCount) hopscotchMapSpark.find(sVKmer);
                                if (kmerAndCount2 == null) {
                                    hopscotchMapSpark.add(new KmerAndCount((SVKmerLong) sVKmer));
                                } else {
                                    kmerAndCount2.bumpCount();
                                }
                            });
                        }
                        return hopscotchMapSpark.iterator();
                    };
                }
                break;
            case true:
                if (serializedLambda.getImplMethodKind() == 6 && serializedLambda.getFunctionalInterfaceClass().equals("org/apache/spark/api/java/function/FlatMapFunction") && serializedLambda.getFunctionalInterfaceMethodName().equals("call") && serializedLambda.getFunctionalInterfaceMethodSignature().equals("(Ljava/lang/Object;)Ljava/util/Iterator;") && serializedLambda.getImplClass().equals("org/broadinstitute/hellbender/tools/spark/sv/evidence/FindBadGenomicKmersSpark") && serializedLambda.getImplMethodSignature().equals("(ILjava/util/Iterator;)Ljava/util/Iterator;")) {
                    int intValue3 = ((Integer) serializedLambda.getCapturedArg(0)).intValue();
                    return it2 -> {
                        HopscotchMapSpark hopscotchMapSpark = new HopscotchMapSpark(208);
                        while (it2.hasNext()) {
                            Tuple2 tuple2 = (Tuple2) it2.next();
                            SVKmer sVKmer = (SVKmer) tuple2._1();
                            int intValue4 = ((Integer) tuple2._2()).intValue();
                            KmerAndCount kmerAndCount2 = (KmerAndCount) hopscotchMapSpark.find(sVKmer);
                            if (kmerAndCount2 == null) {
                                hopscotchMapSpark.add(new KmerAndCount((SVKmerLong) sVKmer, intValue4));
                            } else {
                                kmerAndCount2.bumpCount(intValue4);
                            }
                        }
                        return hopscotchMapSpark.stream().filter(kmerAndCount3 -> {
                            return kmerAndCount3.grabCount() > intValue3;
                        }).map((v0) -> {
                            return v0.getKey();
                        }).iterator();
                    };
                }
                break;
        }
        throw new IllegalArgumentException("Invalid lambda deserialization");
    }
}
