package org.broadinstitute.hellbender.tools;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFHeader;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.stream.Collectors;
import org.apache.commons.collections.CollectionUtils;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.VariantWalker;
import org.broadinstitute.hellbender.exceptions.GATKException;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.genomicsdb.GenomicsDBImport;
import org.broadinstitute.hellbender.tools.walkers.genotyper.StandardCallerArgumentCollection;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import picard.cmdline.programgroups.OtherProgramGroup;

@DocumentedFeature
@CommandLineProgramProperties(summary = "Fix the sample names in a vcf that ran through the GenomicsDBImport when the batch ordering bug was present, this will restore the correct sample names provided it is given the exact sample name mapping / vcf orderingand batch sized that was used in the initial import. See https://github.com/broadinstitute/gatk/issues/3682 for more information", oneLineSummary = "fix sample names in a shuffled callset", programGroup = OtherProgramGroup.class, omitFromCommandLine = true)
@ExperimentalFeature
/* loaded from: input_file:org/broadinstitute/hellbender/tools/FixCallSetSampleOrdering.class */
public final class FixCallSetSampleOrdering extends VariantWalker {
    public static final String SKIP_PROMPT_LONG_NAME = "skipPrompt";

    @Argument(fullName = GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME, doc = "the same sampleNameMap file which was used to import the callset using GenomicsDBImport", optional = false)
    public String sampleNameMapPath;

    @Argument(fullName = GenomicsDBImport.BATCHSIZE_ARG_LONG_NAME, doc = "the exact batch size that was used to import the callset using GenomicsDBImport", minValue = StandardCallerArgumentCollection.DEFAULT_CONTAMINATION_FRACTION, optional = false)
    public Integer batchSize;

    @Argument(fullName = GenomicsDBImport.VCF_INITIALIZER_THREADS_LONG_NAME, doc = "the value of the --reader-threads argument used when importing the callset via GenomicsDBImport", optional = false)
    public Integer readerThreads;

    @Argument(fullName = "gvcf-to-header-sample-map-file", doc = "A mapping of GVCF to actual sample name in the GVCF header. You must provide this file if and only if GenomicsDBImport was run with --reader-threads > 1. Each line in this file should contain a GVCF path (matching the one from the sample name map file), followed by whitespace, followed by the actual sample name declared in the header for that GVCF.", optional = true)
    public String gvcfToHeaderSampleMapFile;

    @Argument(fullName = "output", shortName = "O", doc = "where to write a reheadered version of the input VCF with the sample names in the correct order", optional = false)
    public File output;

    @Argument(fullName = SKIP_PROMPT_LONG_NAME, shortName = "Y", doc = "skip the commandline prompt as if you had entered Y", optional = true)
    public boolean skipCommandLinePrompt = false;
    private VariantContextWriter writer;
    private LinkedHashMap<String, Path> sampleNameMapFromGenomicsDBImport;
    private Map<Path, String> gvcfToHeaderSampleMap;

    /* JADX INFO: Access modifiers changed from: package-private */
    @VisibleForTesting
    /* loaded from: input_file:org/broadinstitute/hellbender/tools/FixCallSetSampleOrdering$SampleNameFixingCannotProceedException.class */
    public static class SampleNameFixingCannotProceedException extends UserException {
        private static final long serialVersionUID = 1;

        SampleNameFixingCannotProceedException(String str) {
            super(str);
        }
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void onTraversalStart() {
        assertThatTheyReallyWantToProceed();
        if (this.batchSize.intValue() == 0) {
            throw new SampleNameFixingCannotProceedException("your callset is not affected by the bug if you ran with --batch-size 0");
        }
        if (this.readerThreads.intValue() > 1) {
            if (this.gvcfToHeaderSampleMapFile == null) {
                throw new SampleNameFixingCannotProceedException("You must provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --reader-threads > 1");
            }
        } else if (this.gvcfToHeaderSampleMapFile != null) {
            throw new SampleNameFixingCannotProceedException("You must NOT provide a --gvcfToHeaderSampleMapFile if GenomicsDBImport was run with --reader-threads 1");
        }
        VCFHeader headerForVariants = getHeaderForVariants();
        LinkedHashSet linkedHashSet = new LinkedHashSet(headerForVariants.getMetaDataInInputOrder());
        linkedHashSet.addAll(getDefaultToolVCFHeaderLines());
        loadSampleNameMappings();
        ArrayList arrayList = new ArrayList(this.sampleNameMapFromGenomicsDBImport.keySet());
        if (arrayList.size() <= this.batchSize.intValue()) {
            throw new SampleNameFixingCannotProceedException("you are not affected by the sample name ordering bug if your batch size is >= the number of samples in your callset. \nbatch size: " + this.batchSize + "\nnumber of samples: " + arrayList.size());
        }
        assertSampleNamesMatchInputVCF(headerForVariants.getSampleNamesInOrder(), arrayList);
        VCFHeader vCFHeader = new VCFHeader(linkedHashSet, getBatchSortedList());
        this.logger.info("Writing the new header with corrected sample names");
        this.writer = createVCFWriter(this.output);
        this.writer.writeHeader(vCFHeader);
        this.logger.info("Copying the rest of the VCF");
    }

    private void loadSampleNameMappings() {
        this.sampleNameMapFromGenomicsDBImport = GenomicsDBImport.loadSampleNameMapFile(IOUtils.getPath(this.sampleNameMapPath));
        this.gvcfToHeaderSampleMap = loadGvcfToHeaderSampleMap();
    }

    private Map<Path, String> loadGvcfToHeaderSampleMap() {
        if (this.gvcfToHeaderSampleMapFile == null) {
            return null;
        }
        HashMap hashMap = new HashMap();
        HashSet hashSet = new HashSet(this.sampleNameMapFromGenomicsDBImport.values());
        try {
            List<String> readAllLines = Files.readAllLines(IOUtils.getPath(this.gvcfToHeaderSampleMapFile));
            if (readAllLines.size() != this.sampleNameMapFromGenomicsDBImport.size()) {
                throw new SampleNameFixingCannotProceedException("Number of lines in the provided --gvcfToHeaderSampleMapFile (" + readAllLines.size() + ") does not match the number of lines in the original --" + GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME + " file (" + this.sampleNameMapFromGenomicsDBImport.size() + ")");
            }
            for (String str : readAllLines) {
                String[] split = str.split("\\s+", -1);
                if (split.length != 2) {
                    throw new SampleNameFixingCannotProceedException("Malformed line in " + this.gvcfToHeaderSampleMapFile + " does not have exactly two fields: " + str);
                }
                if (split[0].isEmpty() || split[1].isEmpty()) {
                    throw new SampleNameFixingCannotProceedException("Malformed line in " + this.gvcfToHeaderSampleMapFile + " contains an empty key or value: " + str);
                }
                Path path = IOUtils.getPath(split[0]);
                String str2 = split[1];
                if (!hashSet.contains(path)) {
                    throw new SampleNameFixingCannotProceedException("GVCF path " + split[0] + " from provided --gvcfToHeaderSampleMapFile file is not present in the original --" + GenomicsDBImport.SAMPLE_NAME_MAP_LONG_NAME + " file");
                }
                hashSet.remove(path);
                if (hashMap.containsKey(path)) {
                    throw new SampleNameFixingCannotProceedException("Duplicate GVCF path specified in the provided --gvcfToHeaderSampleMapFile: " + split[0]);
                }
                hashMap.put(path, str2);
            }
            if (hashSet.isEmpty()) {
                return hashMap;
            }
            throw new SampleNameFixingCannotProceedException("Not all GVCF paths from the --sample-name-map were found in the provided --gvcfToHeaderSampleMapFile");
        } catch (IOException e) {
            throw new UserException.CouldNotReadInputFile("Error loading " + this.gvcfToHeaderSampleMapFile, e);
        }
    }

    private static void assertSampleNamesMatchInputVCF(ArrayList<String> arrayList, List<String> list) {
        if (!CollectionUtils.isEqualCollection(arrayList, list)) {
            throw new SampleNameFixingCannotProceedException("The sample names in the provided sample name map file do not match the sample names in the provided vcf.\nIt is important this tool is run with exactly the same sample name map file that was used to create the vcf.");
        }
    }

    private List<String> getBatchSortedList() {
        List<List> partition = Lists.partition(new ArrayList(this.sampleNameMapFromGenomicsDBImport.keySet()), this.batchSize.intValue());
        if (this.readerThreads.intValue() == 1) {
            return (List) partition.stream().flatMap(list -> {
                return list.stream().sorted();
            }).collect(Collectors.toList());
        }
        ArrayList arrayList = new ArrayList();
        for (List<String> list2 : partition) {
            HashMap hashMap = new HashMap();
            HashMap hashMap2 = new HashMap();
            for (String str : list2) {
                Path path = this.sampleNameMapFromGenomicsDBImport.get(str);
                if (path == null) {
                    throw new GATKException("Hash lookup failed for sample " + str);
                }
                String str2 = this.gvcfToHeaderSampleMap.get(path);
                if (str2 == null) {
                    throw new GATKException("Hash lookup failed for path " + path);
                }
                if (hashMap2.containsKey(str2)) {
                    throw new SampleNameFixingCannotProceedException("Duplicate sample name from the VCF headers detected within the same batch! This tool is currently unable to repair callsets in this scenario. Sample name was: " + str2 + " (as declared in the vcf header)");
                }
                if (hashMap.containsKey(str)) {
                    throw new GATKException("Duplicate sample name from the sample name map file (" + str + ") detected within a batch. This should never happen!");
                }
                hashMap.put(str, str2);
                hashMap2.put(str2, str);
            }
            arrayList.addAll((List) list2.stream().map(str3 -> {
                return (String) hashMap.get(str3);
            }).sorted().map(str4 -> {
                return (String) hashMap2.get(str4);
            }).collect(Collectors.toList()));
        }
        return arrayList;
    }

    @Override // org.broadinstitute.hellbender.engine.VariantWalkerBase
    public void apply(VariantContext variantContext, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) {
        this.writer.add(variantContext);
    }

    @Override // org.broadinstitute.hellbender.engine.GATKTool
    public void closeTool() {
        this.logger.info("Finished writing the new vcf with corrected sample names.");
        if (this.writer != null) {
            this.writer.close();
        }
    }

    private void assertThatTheyReallyWantToProceed() {
        if (this.skipCommandLinePrompt) {
            return;
        }
        Scanner scanner = new Scanner(System.in);
        Throwable th = null;
        try {
            System.out.println("\n\nYou are about to create a new VCF that has its header corrected for the sample swapping bug described in https://github.com/broadinstitute/gatk/issues/3682.\nYou should be certain you want to do this before proceeding.\nIf the following description does not apply to your VCF then the newly generated vcf will be \n\n \t\tHORRIBLY CORRUPTED: by having its sample names shuffled so that the genotypes don't correspond to the correct samples\n\n1: your vcf was generated using a GenomicsDBImport released before gatk version 4.beta.6\n2: you set --batchSize != 0 when running GenomicsDBImport\n3: your callset was imported in multiple batches, i.e. your number of samples > batchSize\n4: you supplied the exact same sampleNameMap file and batch size you used in the initial GenomicsDBImport\nor:\n1. you ran GenomicsDBImport with --readerThreads > 1, and at least one sample name as declared\n   in a GVCF header did not match the sample name specified for that file in the sample name map file\n   provided to GenomicsDBImport\n\nIf you don't know if this applies to you, please contact GATK support at https://gatkforums.broadinstitute.org/gatk/ for assistance\nWould you like to proceed? You can rerun with --skipPrompt to skip this check (y/N)");
            if (!scanner.nextLine().equalsIgnoreCase("y")) {
                System.out.println("Aborting");
                System.exit(1);
            }
            if (scanner != null) {
                if (0 == 0) {
                    scanner.close();
                    return;
                }
                try {
                    scanner.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (scanner != null) {
                if (0 != 0) {
                    try {
                        scanner.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    scanner.close();
                }
            }
            throw th3;
        }
    }
}
