package com.google.cloud.genomics.dataflow.pipelines;

import com.google.api.client.util.Strings;
import com.google.api.services.genomics.model.Annotation;
import com.google.api.services.genomics.model.AnnotationSet;
import com.google.api.services.genomics.model.BatchCreateAnnotationsRequest;
import com.google.api.services.genomics.model.Position;
import com.google.cloud.genomics.dataflow.coders.GenericJsonCoder;
import com.google.cloud.genomics.dataflow.model.PosRgsMq;
import com.google.cloud.genomics.dataflow.readers.ReadGroupStreamer;
import com.google.cloud.genomics.dataflow.readers.bam.BAMShard;
import com.google.cloud.genomics.dataflow.readers.bam.ReadBAMTransform;
import com.google.cloud.genomics.dataflow.readers.bam.ReaderOptions;
import com.google.cloud.genomics.dataflow.readers.bam.ShardingPolicy;
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions;
import com.google.cloud.genomics.dataflow.utils.ShardOptions;
import com.google.cloud.genomics.utils.Contig;
import com.google.cloud.genomics.utils.GenomicsFactory;
import com.google.cloud.genomics.utils.GenomicsUtils;
import com.google.cloud.genomics.utils.OfflineAuth;
import com.google.cloud.genomics.utils.RetryPolicy;
import com.google.cloud.genomics.utils.ShardBoundary;
import com.google.cloud.genomics.utils.ShardUtils;
import com.google.common.collect.Lists;
import com.google.genomics.v1.CigarUnit;
import com.google.genomics.v1.Read;
import htsjdk.samtools.ValidationStringency;
import java.io.IOException;
import java.net.URISyntaxException;
import java.security.GeneralSecurityException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.coders.CoderRegistry;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.options.Validation;
import org.apache.beam.sdk.transforms.ApproximateQuantiles;
import org.apache.beam.sdk.transforms.Combine;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.GroupByKey;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.sdk.values.PCollection;

/* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage.class */
public class CalculateCoverage {
    private static final String READ_FIELDS = "alignments(alignment,readGroupSetId)";
    private static Options options;
    private static Pipeline p;
    private static OfflineAuth auth;

    /* renamed from: com.google.cloud.genomics.dataflow.pipelines.CalculateCoverage$2, reason: invalid class name */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$2.class */
    static /* synthetic */ class AnonymousClass2 {
        static final /* synthetic */ int[] $SwitchMap$com$google$genomics$v1$CigarUnit$Operation = new int[CigarUnit.Operation.values().length];

        static {
            try {
                $SwitchMap$com$google$genomics$v1$CigarUnit$Operation[CigarUnit.Operation.ALIGNMENT_MATCH.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$com$google$genomics$v1$CigarUnit$Operation[CigarUnit.Operation.SEQUENCE_MATCH.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$com$google$genomics$v1$CigarUnit$Operation[CigarUnit.Operation.SEQUENCE_MISMATCH.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$com$google$genomics$v1$CigarUnit$Operation[CigarUnit.Operation.PAD.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$com$google$genomics$v1$CigarUnit$Operation[CigarUnit.Operation.DELETE.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$com$google$genomics$v1$CigarUnit$Operation[CigarUnit.Operation.SKIP.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
        }
    }

    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$CalculateCoverageMean.class */
    public static class CalculateCoverageMean extends PTransform<PCollection<Read>, PCollection<KV<PosRgsMq, Double>>> {
        private final long bucketWidth;

        public CalculateCoverageMean(long j) {
            this.bucketWidth = j;
        }

        public PCollection<KV<PosRgsMq, Double>> expand(PCollection<Read> pCollection) {
            return pCollection.apply(ParDo.of(new CoverageCounts(this.bucketWidth))).apply(Combine.perKey(new SumCounts())).apply(ParDo.of(new CoverageMeans()));
        }
    }

    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$CalculateQuantiles.class */
    public static class CalculateQuantiles extends PTransform<PCollection<KV<PosRgsMq, Double>>, PCollection<KV<Position, KV<PosRgsMq.MappingQuality, List<Double>>>>> {
        private final int numQuantiles;

        public CalculateQuantiles(int i) {
            this.numQuantiles = i;
        }

        public PCollection<KV<Position, KV<PosRgsMq.MappingQuality, List<Double>>>> expand(PCollection<KV<PosRgsMq, Double>> pCollection) {
            return pCollection.apply(ParDo.of(new RemoveRgsId())).apply(ApproximateQuantiles.perKey(this.numQuantiles)).apply(ParDo.of(new MoveMapping()));
        }
    }

    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$CheckMatchingReferenceSet.class */
    static class CheckMatchingReferenceSet extends DoFn<String, String> {
        private final String referenceSetIdForAllReadGroupSets;
        private final OfflineAuth auth;

        public CheckMatchingReferenceSet(String str, OfflineAuth offlineAuth) {
            this.referenceSetIdForAllReadGroupSets = str;
            this.auth = offlineAuth;
        }

        @DoFn.ProcessElement
        public void processElement(DoFn<String, String>.ProcessContext processContext) throws Exception {
            String str = (String) processContext.element();
            String referenceSetId = GenomicsUtils.getReferenceSetId(str, this.auth);
            if (Strings.isNullOrEmpty(referenceSetId)) {
                throw new IllegalArgumentException("No ReferenceSetId associated with ReadGroupSetId " + str + ". All ReadGroupSets in given input must have an associated ReferenceSet.");
            }
            if (!this.referenceSetIdForAllReadGroupSets.equals(referenceSetId)) {
                throw new IllegalArgumentException("ReadGroupSets in given input must all be associated with the same ReferenceSetId : " + referenceSetId);
            }
            processContext.output(str);
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$CoverageCounts.class */
    public static class CoverageCounts extends DoFn<Read, KV<PosRgsMq, Long>> {
        private static final int LOW_MQ = 10;
        private static final int HIGH_MQ = 30;
        private final long bucketWidth;

        public CoverageCounts(long j) {
            this.bucketWidth = j;
        }

        @DoFn.ProcessElement
        public void processElement(DoFn<Read, KV<PosRgsMq, Long>>.ProcessContext processContext) {
            if (((Read) processContext.element()).getAlignment() != null) {
                long j = 0;
                for (CigarUnit cigarUnit : ((Read) processContext.element()).getAlignment().getCigarList()) {
                    switch (AnonymousClass2.$SwitchMap$com$google$genomics$v1$CigarUnit$Operation[cigarUnit.getOperation().ordinal()]) {
                        case 1:
                        case 2:
                        case 3:
                        case 4:
                        case 5:
                        case 6:
                            j += cigarUnit.getOperationLength();
                            break;
                    }
                }
                long position = ((Read) processContext.element()).getAlignment().getPosition().getPosition();
                long j2 = position + j;
                long j3 = (position / this.bucketWidth) * this.bucketWidth;
                long j4 = position;
                while (j4 < j2) {
                    long min = Math.min(j3 + this.bucketWidth, j2) - j4;
                    j4 += min;
                    long j5 = 0 + min;
                    Position referenceName = new Position().setPosition(Long.valueOf(j3)).setReferenceName(((Read) processContext.element()).getAlignment().getPosition().getReferenceName());
                    Integer valueOf = Integer.valueOf(((Read) processContext.element()).getAlignment().getMappingQuality());
                    if (valueOf == null) {
                        valueOf = 0;
                    }
                    processContext.output(KV.of(new PosRgsMq(referenceName, ((Read) processContext.element()).getReadGroupSetId(), valueOf.intValue() < LOW_MQ ? PosRgsMq.MappingQuality.L : (valueOf.intValue() < LOW_MQ || valueOf.intValue() >= HIGH_MQ) ? PosRgsMq.MappingQuality.H : PosRgsMq.MappingQuality.M), Long.valueOf(j5)));
                    processContext.output(KV.of(new PosRgsMq(referenceName, ((Read) processContext.element()).getReadGroupSetId(), PosRgsMq.MappingQuality.A), Long.valueOf(j5)));
                    j3 += this.bucketWidth;
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$CoverageMeans.class */
    public static class CoverageMeans extends DoFn<KV<PosRgsMq, Long>, KV<PosRgsMq, Double>> {
        CoverageMeans() {
        }

        @DoFn.ProcessElement
        public void processElement(DoFn<KV<PosRgsMq, Long>, KV<PosRgsMq, Double>>.ProcessContext processContext) {
            processContext.output(KV.of(((KV) processContext.element()).getKey(), Double.valueOf(((Long) ((KV) processContext.element()).getValue()).longValue() / processContext.getPipelineOptions().as(Options.class).getBucketWidth())));
        }
    }

    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$CreateAnnotations.class */
    static class CreateAnnotations extends DoFn<KV<Position, Iterable<KV<PosRgsMq.MappingQuality, List<Double>>>>, Annotation> {
        private final String asId;
        private final OfflineAuth auth;
        private final List<Annotation> currAnnotations = Lists.newArrayList();
        private final boolean write;

        public CreateAnnotations(String str, OfflineAuth offlineAuth, boolean z) {
            this.asId = str;
            this.auth = offlineAuth;
            this.write = z;
        }

        @DoFn.ProcessElement
        public void processElement(DoFn<KV<Position, Iterable<KV<PosRgsMq.MappingQuality, List<Double>>>>, Annotation>.ProcessContext processContext) throws GeneralSecurityException, IOException {
            Options as = processContext.getPipelineOptions().as(Options.class);
            Position position = (Position) ((KV) processContext.element()).getKey();
            Annotation info = new Annotation().setAnnotationSetId(this.asId).setStart(position.getPosition()).setEnd(Long.valueOf(position.getPosition().longValue() + as.getBucketWidth())).setReferenceName(position.getReferenceName()).setType("GENERIC").setInfo(new HashMap());
            for (KV kv : (Iterable) ((KV) processContext.element()).getValue()) {
                ArrayList newArrayList = Lists.newArrayList();
                for (int i = 0; i < ((List) kv.getValue()).size(); i++) {
                    newArrayList.add(Double.toString(Math.round(((Double) ((List) kv.getValue()).get(i)).doubleValue() * 1000000.0d) / 1000000.0d));
                }
                info.getInfo().put(((PosRgsMq.MappingQuality) kv.getKey()).toString(), newArrayList);
            }
            if (this.write) {
                this.currAnnotations.add(info);
                if (this.currAnnotations.size() == 512) {
                    batchCreateAnnotations();
                }
            }
            processContext.output(info);
        }

        @DoFn.FinishBundle
        public void finishBundle(DoFn<KV<Position, Iterable<KV<PosRgsMq.MappingQuality, List<Double>>>>, Annotation>.FinishBundleContext finishBundleContext) throws IOException, GeneralSecurityException {
            if (!this.write || this.currAnnotations.isEmpty()) {
                return;
            }
            batchCreateAnnotations();
        }

        private void batchCreateAnnotations() throws IOException, GeneralSecurityException {
            RetryPolicy.nAttempts(4).execute(GenomicsFactory.builder().build().fromOfflineAuth(this.auth).annotations().batchCreate(new BatchCreateAnnotationsRequest().setAnnotations(this.currAnnotations)));
            this.currAnnotations.clear();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$MoveMapping.class */
    public static class MoveMapping extends DoFn<KV<KV<Position, PosRgsMq.MappingQuality>, List<Double>>, KV<Position, KV<PosRgsMq.MappingQuality, List<Double>>>> {
        MoveMapping() {
        }

        @DoFn.ProcessElement
        public void processElement(DoFn<KV<KV<Position, PosRgsMq.MappingQuality>, List<Double>>, KV<Position, KV<PosRgsMq.MappingQuality, List<Double>>>>.ProcessContext processContext) {
            processContext.output(KV.of(((KV) ((KV) processContext.element()).getKey()).getKey(), KV.of(((KV) ((KV) processContext.element()).getKey()).getValue(), ((KV) processContext.element()).getValue())));
        }
    }

    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$Options.class */
    public interface Options extends ShardOptions {
        @Default.String("")
        @Description("Cloud storage prefix containing BAM and BAI files from which to read or a path to a local file containing the newline-separated prefixes. Use one of --bamInput, --inputDatasetId, or --readGroupSetIds.")
        String getBamInput();

        void setBamInput(String str);

        @Default.String("")
        @Description("A comma delimited list of the IDs of the Google Genomics ReadGroupSets this pipeline is working with. Default (empty) indicates all ReadGroupSets in InputDatasetId.  This or InputDatasetId must be set.  InputDatasetId overrides ReadGroupSetIds (if InputDatasetId is set, this field will be ignored).  All of the referenceSetIds for all ReadGroupSets in this list must be the same for the purposes of setting the referenceSetId of the output AnnotationSet. Use one of --bamInput, --inputDatasetId, or --readGroupSetIds.")
        String getReadGroupSetIds();

        void setReadGroupSetIds(String str);

        @Default.String("")
        @Description("The ID of the Google Genomics Dataset that the pipeline will get its input reads from.  Default (empty) means to use ReadGroupSetIds instead.  This or ReadGroupSetIds must be set.  InputDatasetId overrides ReadGroupSetIds (if this field is set, ReadGroupSetIds will be ignored).  All of the referenceSetIds for all ReadGroupSets in this Dataset must be the same for the purposes of setting the referenceSetId of the output AnnotationSet. Use one of --bamInput, --inputDatasetId, or --readGroupSetIds.")
        String getInputDatasetId();

        void setInputDatasetId(String str);

        @Default.String("")
        @Description("The ID of the Google Genomics Dataset that the output AnnotationSet will be posted to.")
        @Validation.Required
        String getOutputDatasetId();

        void setOutputDatasetId(String str);

        @Default.String("EMWV_ZfLxrDY-wE")
        @Description("The Google Genomics reference set id to use for the annotation set.Only used when --bamInput is used. Defaults to \"EMWV_ZfLxrDY-wE\" for hg19.")
        String getReferenceSetId();

        void setReferenceSetId(String str);

        @Description("For BAM file input, the maximum size in bytes of a shard processed by a single worker. Only used when --bamInput is used. (For --inputDatasetId or --readGroupSetIds, shard size is controlled by --basesPerShard.)")
        @Default.Integer(104857600)
        int getMaxShardSizeBytes();

        void setMaxShardSizeBytes(int i);

        @Description("The bucket width you would like to calculate coverage for.  Default is 2048.  Buckets are non-overlapping.  If bucket width does not divide into the reference size, then the remainder will be a smaller bucket at the end of the reference.")
        @Default.Integer(2048)
        int getBucketWidth();

        void setBucketWidth(int i);

        @Description("The number of quantiles you would like to calculate for in the output.  Defaultis 11 (minimum value for a particular grouping, 10-90 percentiles, and the maximum value).  The InputDataset or list of ReadGroupSetIds specified must have an amount of ReadGroupSetIds greater than or equal to the number of quantiles that you are asking for.")
        @Default.Integer(11)
        int getNumQuantiles();

        void setNumQuantiles(int i);

        @Default.String("")
        @Description("This provides the name for the AnnotationSet. Default (empty) will set the name to the input References. For more information on AnnotationSets, please visit: https://cloud.google.com/genomics/v1beta2/reference/annotationSets#resource")
        String getAnnotationSetName();

        void setAnnotationSetName(String str);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$RemoveRgsId.class */
    public static class RemoveRgsId extends DoFn<KV<PosRgsMq, Double>, KV<KV<Position, PosRgsMq.MappingQuality>, Double>> {
        RemoveRgsId() {
        }

        @DoFn.ProcessElement
        public void processElement(DoFn<KV<PosRgsMq, Double>, KV<KV<Position, PosRgsMq.MappingQuality>, Double>>.ProcessContext processContext) {
            processContext.output(KV.of(KV.of(((PosRgsMq) ((KV) processContext.element()).getKey()).getPos(), ((PosRgsMq) ((KV) processContext.element()).getKey()).getMq()), ((KV) processContext.element()).getValue()));
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/CalculateCoverage$SumCounts.class */
    public static class SumCounts implements SerializableFunction<Iterable<Long>, Long> {
        SumCounts() {
        }

        public Long apply(Iterable<Long> iterable) {
            long j = 0;
            Iterator<Long> it = iterable.iterator();
            while (it.hasNext()) {
                j += it.next().longValue();
            }
            return Long.valueOf(j);
        }
    }

    public static void registerPipelineCoders(Pipeline pipeline) {
        CoderRegistry coderRegistry = pipeline.getCoderRegistry();
        coderRegistry.registerCoderForClass(Annotation.class, GenericJsonCoder.of(Annotation.class));
        coderRegistry.registerCoderForClass(AnnotationSet.class, GenericJsonCoder.of(AnnotationSet.class));
        coderRegistry.registerCoderForClass(BatchCreateAnnotationsRequest.class, GenericJsonCoder.of(BatchCreateAnnotationsRequest.class));
        coderRegistry.registerCoderForClass(PosRgsMq.class, GenericJsonCoder.of(PosRgsMq.class));
        coderRegistry.registerCoderForClass(Position.class, GenericJsonCoder.of(Position.class));
    }

    public static void main(String[] strArr) throws GeneralSecurityException, IOException, URISyntaxException {
        PCollection<Read> apply;
        PipelineOptionsFactory.register(Options.class);
        options = PipelineOptionsFactory.fromArgs(strArr).withValidation().as(Options.class);
        auth = GenomicsOptions.Methods.getGenomicsAuth(options);
        p = Pipeline.create(options);
        registerPipelineCoders(p);
        if (options.getBamInput().isEmpty() && options.getInputDatasetId().isEmpty() && options.getReadGroupSetIds().isEmpty()) {
            throw new IllegalArgumentException("BamInput or InputDatasetId or ReadGroupSetIds must be specified");
        }
        String referenceSetId = options.getReferenceSetId();
        if (options.getBamInput().isEmpty()) {
            List newArrayList = options.getInputDatasetId().isEmpty() ? Lists.newArrayList(options.getReadGroupSetIds().split(",")) : GenomicsUtils.getReadGroupSetIds(options.getInputDatasetId(), auth);
            if (newArrayList.size() < options.getNumQuantiles()) {
                throw new IllegalArgumentException("Number of ReadGroupSets must be greater than or equal to the number of requested quantiles.");
            }
            referenceSetId = GenomicsUtils.getReferenceSetId((String) newArrayList.get(0), auth);
            if (Strings.isNullOrEmpty(referenceSetId)) {
                throw new IllegalArgumentException("No ReferenceSetId associated with ReadGroupSetId " + ((String) newArrayList.get(0)) + ". All ReadGroupSets in given input must have an associated ReferenceSet.");
            }
            apply = p.begin().apply(Create.of(newArrayList)).apply(ParDo.of(new CheckMatchingReferenceSet(referenceSetId, auth))).apply(new ReadGroupStreamer(auth, ShardBoundary.Requirement.STRICT, READ_FIELDS, ShardUtils.SexChromosomeFilter.INCLUDE_XY));
        } else {
            if (options.isAllReferences()) {
                throw new IllegalArgumentException("--allReferences is not currently supported for BAM file reading. Instead use --references and list all of the genomic regions in the file.");
            }
            apply = ReadBAMTransform.getReadsFromBAMFilesSharded(p, options, auth, Lists.newArrayList(Contig.parseContigsFromCommandLine(options.getReferences())), new ReaderOptions(ValidationStringency.LENIENT, false), options.getBamInput(), new ShardingPolicy() { // from class: com.google.cloud.genomics.dataflow.pipelines.CalculateCoverage.1
                final int MAX_BYTES_PER_SHARD = CalculateCoverage.options.getMaxShardSizeBytes();

                public Boolean apply(BAMShard bAMShard) {
                    return Boolean.valueOf(bAMShard.approximateSizeInBytes() > ((long) this.MAX_BYTES_PER_SHARD));
                }
            });
        }
        apply.apply("CalculateCoverateMean", new CalculateCoverageMean(options.getBucketWidth())).apply("CalculateQuantiles", new CalculateQuantiles(options.getNumQuantiles())).apply(GroupByKey.create()).apply("CreateAnnotations", ParDo.of(new CreateAnnotations(createAnnotationSet(referenceSetId).getId(), auth, true)));
        p.run();
    }

    private static AnnotationSet createAnnotationSet(String str) throws GeneralSecurityException, IOException {
        if (str == null) {
            throw new IOException("ReferenceSetId was null for this readgroupset");
        }
        AnnotationSet annotationSet = new AnnotationSet();
        annotationSet.setDatasetId(options.getOutputDatasetId());
        if (!"".equals(options.getAnnotationSetName())) {
            annotationSet.setName(options.getAnnotationSetName());
        } else if (options.isAllReferences()) {
            annotationSet.setName("Read Depth for all references");
        } else {
            annotationSet.setName("Read Depth for " + options.getReferences());
        }
        annotationSet.setReferenceSetId(str);
        annotationSet.setType("GENERIC");
        return (AnnotationSet) GenomicsFactory.builder().build().fromOfflineAuth(auth).annotationsets().create(annotationSet).execute();
    }
}
