package com.google.cloud.genomics.dataflow.pipelines;

import com.google.api.client.util.Strings;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.io.TextIO;
import com.google.cloud.dataflow.sdk.options.Default;
import com.google.cloud.dataflow.sdk.options.Description;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.transforms.Create;
import com.google.cloud.dataflow.sdk.transforms.DoFn;
import com.google.cloud.dataflow.sdk.transforms.Filter;
import com.google.cloud.dataflow.sdk.transforms.ParDo;
import com.google.cloud.dataflow.sdk.transforms.SerializableFunction;
import com.google.cloud.dataflow.sdk.transforms.View;
import com.google.cloud.dataflow.sdk.transforms.join.CoGbkResult;
import com.google.cloud.dataflow.sdk.transforms.join.CoGroupByKey;
import com.google.cloud.dataflow.sdk.transforms.join.KeyedPCollectionTuple;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.PCollectionView;
import com.google.cloud.dataflow.sdk.values.TupleTag;
import com.google.cloud.genomics.dataflow.coders.GenericJsonCoder;
import com.google.cloud.genomics.dataflow.functions.VariantFunctions;
import com.google.cloud.genomics.dataflow.functions.verifybamid.LikelihoodFn;
import com.google.cloud.genomics.dataflow.functions.verifybamid.ReadFunctions;
import com.google.cloud.genomics.dataflow.functions.verifybamid.Solver;
import com.google.cloud.genomics.dataflow.model.AlleleFreq;
import com.google.cloud.genomics.dataflow.model.ReadBaseQuality;
import com.google.cloud.genomics.dataflow.model.ReadBaseWithReference;
import com.google.cloud.genomics.dataflow.model.ReadCounts;
import com.google.cloud.genomics.dataflow.model.ReadQualityCount;
import com.google.cloud.genomics.dataflow.pipelines.CalculateCoverage;
import com.google.cloud.genomics.dataflow.readers.ReadGroupStreamer;
import com.google.cloud.genomics.dataflow.readers.VariantStreamer;
import com.google.cloud.genomics.dataflow.utils.CallSetNamesOptions;
import com.google.cloud.genomics.dataflow.utils.GCSOutputOptions;
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions;
import com.google.cloud.genomics.dataflow.utils.ShardOptions;
import com.google.cloud.genomics.utils.GenomicsUtils;
import com.google.cloud.genomics.utils.OfflineAuth;
import com.google.cloud.genomics.utils.ShardBoundary;
import com.google.cloud.genomics.utils.ShardUtils;
import com.google.common.collect.ImmutableMultiset;
import com.google.common.collect.Lists;
import com.google.common.collect.Multiset;
import com.google.common.collect.UnmodifiableIterator;
import com.google.genomics.v1.Position;
import com.google.genomics.v1.Read;
import com.google.genomics.v1.StreamVariantsRequest;
import com.google.genomics.v1.Variant;
import com.google.protobuf.ListValue;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.security.GeneralSecurityException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId.class */
public class VerifyBamId {
    private static Pipeline p;
    private static Options pipelineOptions;
    private static OfflineAuth auth;
    private static final String HASH_PREFIX = "";
    private static final String VARIANT_FIELDS = "variants(alternateBases,filter,info,quality,referenceBases,referenceName,start)";

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId$FilterFreq.class */
    public static class FilterFreq implements SerializableFunction<KV<Position, AlleleFreq>, Boolean> {
        private final double minFreq;

        public FilterFreq(double d) {
            this.minFreq = d;
        }

        public Boolean apply(KV<Position, AlleleFreq> kv) {
            double refFreq = ((AlleleFreq) kv.getValue()).getRefFreq();
            return refFreq >= this.minFreq && 1.0d - refFreq >= this.minFreq;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId$GetAlleleFreq.class */
    public static class GetAlleleFreq extends DoFn<Variant, KV<Position, AlleleFreq>> {
        GetAlleleFreq() {
        }

        public void processElement(DoFn<Variant, KV<Position, AlleleFreq>>.ProcessContext processContext) throws Exception {
            ListValue listValue = (ListValue) ((Variant) processContext.element()).getInfo().get("AF");
            if (listValue == null || listValue.getValuesCount() <= 0) {
                throw new IllegalArgumentException("Variant " + ((Variant) processContext.element()).getId() + " does not have allele frequency information stored in INFO field AF.");
            }
            Position build = Position.newBuilder().setPosition(((Variant) processContext.element()).getStart()).setReferenceName(((Variant) processContext.element()).getReferenceName()).build();
            AlleleFreq alleleFreq = new AlleleFreq();
            alleleFreq.setRefFreq(Double.parseDouble(listValue.getValues(0).getStringValue()));
            alleleFreq.setAltBases(((Variant) processContext.element()).getAlternateBasesList());
            alleleFreq.setRefBases(((Variant) processContext.element()).getReferenceBases());
            processContext.output(KV.of(build, alleleFreq));
        }
    }

    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId$Maximizer.class */
    static class Maximizer extends DoFn<Object, String> {
        private final PCollectionView<Map<Position, ReadCounts>> view;
        private static final double ABS_ERR = 1.0E-5d;
        private static final double REL_ERR = 1.0E-4d;
        private static final int MAX_EVAL = 1000;
        private static final int MAX_ITER = 1000;
        private static final double GRID_STEP = 0.001d;

        public Maximizer(PCollectionView<Map<Position, ReadCounts>> pCollectionView) {
            this.view = pCollectionView;
        }

        public void processElement(DoFn<Object, String>.ProcessContext processContext) throws Exception {
            for (float f : new float[]{0.1f, 0.05f, 0.01f, 0.005f, 0.001f}) {
                processContext.output(Float.toString(f) + ": " + Double.toString(Solver.maximize(new LikelihoodFn((Map) processContext.sideInput(this.view)), 0.0d, 0.5d, f, REL_ERR, ABS_ERR, 1000, 1000)));
            }
        }
    }

    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId$Options.class */
    public interface Options extends CallSetNamesOptions, ShardOptions, GCSOutputOptions {
        public static final String DEFAULT_VARIANTSET = "10473108253681171589";

        /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId$Options$Methods.class */
        public static class Methods {
            public static void validateOptions(Options options) {
                GCSOutputOptions.Methods.validateOptions(options);
            }
        }

        @Default.String(VerifyBamId.HASH_PREFIX)
        @Description("A comma delimited list of the IDs of the Google Genomics ReadGroupSets this pipeline is working with. Default (empty) indicates all ReadGroupSets in InputDatasetId.  This or InputDatasetId must be set.  InputDatasetId overrides ReadGroupSetIds (if InputDatasetId is set, this field will be ignored).")
        String getReadGroupSetIds();

        void setReadGroupSetIds(String str);

        @Default.String(VerifyBamId.HASH_PREFIX)
        @Description("The ID of the Google Genomics Dataset that the pipeline will get its input reads from.  Default (empty) means to use ReadGroupSetIds and VariantSetIds instead.  This or ReadGroupSetIds and VariantSetIds must be set.  InputDatasetId overrides ReadGroupSetIds and VariantSetIds (if this field is set, ReadGroupSetIds and VariantSetIds will be ignored).")
        String getInputDatasetId();

        void setInputDatasetId(String str);

        @Override // com.google.cloud.genomics.dataflow.utils.CallSetNamesOptions
        @Default.String(DEFAULT_VARIANTSET)
        @Description("The ID of the Google Genomics VariantSet this pipeline is working with.  It assumes the variant set has INFO field 'AF' from which it retrieves the allele frequency for the variant, such as 1,000 Genomes phase 1 or phase 3 variants.  Defaults to the 1,000 Genomes phase 1 VariantSet with id 10473108253681171589.")
        String getVariantSetId();

        @Override // com.google.cloud.genomics.dataflow.utils.CallSetNamesOptions
        void setVariantSetId(String str);

        @Description("The minimum allele frequency to use in analysis.  Defaults to 0.01.")
        @Default.Double(0.01d)
        double getMinFrequency();

        void setMinFrequency(double d);

        @Description("The fraction of positions to check.  Defaults to 0.01.")
        @Default.Double(0.01d)
        double getSamplingFraction();

        void setSamplingFraction(double d);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId$PileupAndJoinReads.class */
    public static class PileupAndJoinReads extends DoFn<KV<Position, CoGbkResult>, KV<Position, ReadCounts>> {
        private final TupleTag<ReadBaseQuality> readCountsTag;
        private final TupleTag<AlleleFreq> refFreqTag;

        public PileupAndJoinReads(TupleTag<ReadBaseQuality> tupleTag, TupleTag<AlleleFreq> tupleTag2) {
            this.readCountsTag = tupleTag;
            this.refFreqTag = tupleTag2;
        }

        public void processElement(DoFn<KV<Position, CoGbkResult>, KV<Position, ReadCounts>>.ProcessContext processContext) throws Exception {
            AlleleFreq alleleFreq = (AlleleFreq) ((CoGbkResult) ((KV) processContext.element()).getValue()).getOnly(this.refFreqTag, (Object) null);
            if (alleleFreq == null || alleleFreq.getAltBases() == null) {
                return;
            }
            if (alleleFreq.getAltBases().size() != 1) {
                throw new IllegalArgumentException("Wrong number (" + alleleFreq.getAltBases().size() + ") of alternate bases for Position " + ((KV) processContext.element()).getKey());
            }
            Iterable<ReadBaseQuality> all = ((CoGbkResult) ((KV) processContext.element()).getValue()).getAll(this.readCountsTag);
            ImmutableMultiset.Builder builder = ImmutableMultiset.builder();
            for (ReadBaseQuality readBaseQuality : all) {
                ReadQualityCount.Base base = alleleFreq.getRefBases().equals(readBaseQuality.getBase()) ? ReadQualityCount.Base.REF : alleleFreq.getAltBases().get(0).equals(readBaseQuality.getBase()) ? ReadQualityCount.Base.NONREF : ReadQualityCount.Base.OTHER;
                ReadQualityCount readQualityCount = new ReadQualityCount();
                readQualityCount.setBase(base);
                readQualityCount.setQuality(readBaseQuality.getQuality());
                builder.add(readQualityCount);
            }
            ReadCounts readCounts = new ReadCounts();
            readCounts.setRefFreq(alleleFreq.getRefFreq());
            UnmodifiableIterator it = builder.build().entrySet().iterator();
            while (it.hasNext()) {
                ReadQualityCount readQualityCount2 = (ReadQualityCount) ((Multiset.Entry) it.next()).getElement();
                readQualityCount2.setCount(r0.getCount());
                readCounts.addReadQualityCount(readQualityCount2);
            }
            processContext.output(KV.of(((KV) processContext.element()).getKey(), readCounts));
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId$SampleReads.class */
    public static class SampleReads implements SerializableFunction<KV<Position, ReadBaseQuality>, Boolean> {
        private final double samplingFraction;
        private final String samplingPrefix;

        public SampleReads(double d, String str) {
            this.samplingFraction = d;
            this.samplingPrefix = str;
        }

        public Boolean apply(KV<Position, ReadBaseQuality> kv) {
            if (this.samplingFraction == 1.0d) {
                return true;
            }
            Position position = (Position) kv.getKey();
            try {
                try {
                    byte[] digest = MessageDigest.getInstance("MD5").digest((this.samplingPrefix + position.getReferenceName() + ":" + position.getPosition() + ":" + position.getReverseStrand()).getBytes("UTF-8"));
                    if (digest.length != 16) {
                        throw new AssertionError("MD5 should return 128 bits");
                    }
                    ByteBuffer allocate = ByteBuffer.allocate(64);
                    allocate.put(Arrays.copyOf(digest, 64));
                    return Boolean.valueOf(((((double) allocate.getLong(0)) / (-9.223372036854776E18d)) + 1.0d) * 0.5d < this.samplingFraction);
                } catch (NoSuchAlgorithmException e) {
                    throw new AssertionError("MD5 not available - should not happen");
                }
            } catch (UnsupportedEncodingException e2) {
                throw new AssertionError("UTF-8 not available - should not happen");
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/VerifyBamId$SplitReads.class */
    public static class SplitReads extends DoFn<Read, KV<Position, ReadBaseQuality>> {
        SplitReads() {
        }

        public void processElement(DoFn<Read, KV<Position, ReadBaseQuality>>.ProcessContext processContext) throws Exception {
            List<ReadBaseWithReference> extractReadBases = ReadFunctions.extractReadBases((Read) processContext.element());
            if (extractReadBases.isEmpty()) {
                return;
            }
            for (ReadBaseWithReference readBaseWithReference : extractReadBases) {
                processContext.output(KV.of(readBaseWithReference.getRefPosition(), readBaseWithReference.getRbq()));
            }
        }
    }

    public static void main(String[] strArr) throws GeneralSecurityException, IOException {
        PipelineOptionsFactory.register(Options.class);
        pipelineOptions = PipelineOptionsFactory.fromArgs(strArr).withValidation().as(Options.class);
        Options.Methods.validateOptions(pipelineOptions);
        StreamVariantsRequest requestPrototype = CallSetNamesOptions.Methods.getRequestPrototype(pipelineOptions);
        auth = GenomicsOptions.Methods.getGenomicsAuth(pipelineOptions);
        p = Pipeline.create(pipelineOptions);
        p.getCoderRegistry().setFallbackCoderProvider(GenericJsonCoder.PROVIDER);
        if (pipelineOptions.getInputDatasetId().isEmpty() && pipelineOptions.getReadGroupSetIds().isEmpty()) {
            throw new IllegalArgumentException("InputDatasetId or ReadGroupSetIds must be specified");
        }
        List newArrayList = pipelineOptions.getInputDatasetId().isEmpty() ? Lists.newArrayList(pipelineOptions.getReadGroupSetIds().split(",")) : GenomicsUtils.getReadGroupSetIds(pipelineOptions.getInputDatasetId(), auth);
        String referenceSetId = GenomicsUtils.getReferenceSetId((String) newArrayList.get(0), auth);
        if (Strings.isNullOrEmpty(referenceSetId)) {
            throw new IllegalArgumentException("No ReferenceSetId associated with ReadGroupSetId " + ((String) newArrayList.get(0)) + ". All ReadGroupSets in given input must have an associated ReferenceSet.");
        }
        PCollectionView apply = combineReads(p.begin().apply(Create.of(newArrayList)).apply(ParDo.of(new CalculateCoverage.CheckMatchingReferenceSet(referenceSetId, auth))).apply(new ReadGroupStreamer(auth, ShardBoundary.Requirement.STRICT, null, ShardUtils.SexChromosomeFilter.INCLUDE_XY)), pipelineOptions.getSamplingFraction(), HASH_PREFIX, getFreq(p.apply(Create.of(pipelineOptions.isAllReferences() ? ShardUtils.getVariantRequests(requestPrototype, ShardUtils.SexChromosomeFilter.INCLUDE_XY, pipelineOptions.getBasesPerShard(), auth) : ShardUtils.getVariantRequests(requestPrototype, pipelineOptions.getBasesPerShard(), pipelineOptions.getReferences()))).apply(new VariantStreamer(auth, ShardBoundary.Requirement.STRICT, VARIANT_FIELDS)), pipelineOptions.getMinFrequency())).apply(View.asMap());
        p.begin().apply(Create.of(new String[]{HASH_PREFIX})).apply(ParDo.of(new Maximizer(apply)).withSideInputs(new PCollectionView[]{apply})).apply(TextIO.Write.to(pipelineOptions.getOutput()).named("WriteOutput").withoutSharding());
        p.run();
    }

    static PCollection<KV<Position, AlleleFreq>> getFreq(PCollection<Variant> pCollection, double d) {
        return pCollection.apply(Filter.byPredicate(VariantFunctions.IS_PASSING).named("PassingFilter")).apply(Filter.byPredicate(VariantFunctions.IS_ON_CHROMOSOME).named("OnChromosomeFilter")).apply(Filter.byPredicate(VariantFunctions.IS_NOT_LOW_QUALITY).named("NotLowQualityFilter")).apply(Filter.byPredicate(VariantFunctions.IS_SINGLE_ALTERNATE_SNP).named("SNPFilter")).apply(ParDo.of(new GetAlleleFreq())).apply(Filter.byPredicate(new FilterFreq(d)));
    }

    static PCollection<KV<Position, ReadCounts>> combineReads(PCollection<Read> pCollection, double d, String str, PCollection<KV<Position, AlleleFreq>> pCollection2) {
        PCollection apply = pCollection.apply(Filter.byPredicate(ReadFunctions.IS_ON_CHROMOSOME).named("IsOnChromosome")).apply(Filter.byPredicate(ReadFunctions.IS_NOT_QC_FAILURE).named("IsNotQCFailure")).apply(Filter.byPredicate(ReadFunctions.IS_NOT_DUPLICATE).named("IsNotDuplicate")).apply(Filter.byPredicate(ReadFunctions.IS_PROPER_PLACEMENT).named("IsProperPlacement")).apply(ParDo.of(new SplitReads())).apply(Filter.byPredicate(new SampleReads(d, str)));
        TupleTag tupleTag = new TupleTag();
        TupleTag tupleTag2 = new TupleTag();
        return KeyedPCollectionTuple.of(tupleTag, apply).and(tupleTag2, pCollection2).apply(CoGroupByKey.create()).apply(ParDo.of(new PileupAndJoinReads(tupleTag, tupleTag2)));
    }
}
