package com.google.cloud.genomics.dataflow.pipelines;

import com.google.api.client.util.Strings;
import com.google.api.services.genomics.Genomics;
import com.google.api.services.genomics.model.Annotation;
import com.google.api.services.genomics.model.AnnotationSet;
import com.google.api.services.genomics.model.ListBasesResponse;
import com.google.api.services.genomics.model.QueryRange;
import com.google.api.services.genomics.model.RangePosition;
import com.google.api.services.genomics.model.SearchAnnotationsRequest;
import com.google.api.services.genomics.model.Variant;
import com.google.api.services.genomics.model.VariantAnnotation;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.io.TextIO;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.transforms.Create;
import com.google.cloud.dataflow.sdk.transforms.DoFn;
import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
import com.google.cloud.dataflow.sdk.transforms.ParDo;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.genomics.dataflow.utils.AnnotationUtils;
import com.google.cloud.genomics.dataflow.utils.DataflowWorkarounds;
import com.google.cloud.genomics.dataflow.utils.GenomicsDatasetOptions;
import com.google.cloud.genomics.dataflow.utils.GenomicsOptions;
import com.google.cloud.genomics.dataflow.utils.VariantUtils;
import com.google.cloud.genomics.utils.Contig;
import com.google.cloud.genomics.utils.GenomicsFactory;
import com.google.cloud.genomics.utils.Paginator;
import com.google.common.base.Joiner;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Maps;
import com.google.common.collect.Range;
import htsjdk.samtools.util.IntervalTree;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.logging.Logger;

/* loaded from: input_file:com/google/cloud/genomics/dataflow/pipelines/AnnotateVariants.class */
public final class AnnotateVariants extends DoFn<Contig, KV<String, VariantAnnotation>> {
    private static final Logger LOG = Logger.getLogger(AnnotateVariants.class.getName());
    private static final int VARIANTS_PAGE_SIZE = 5000;
    private static final String VARIANT_FIELDS = "nextPageToken,variants(id,referenceName,start,end,alternateBases,referenceBases)";
    private final GenomicsFactory.OfflineAuth auth;
    private final String varsetId;
    private final List<String> callSetIds;
    private final List<String> transcriptSetIds;
    private final List<String> variantAnnotationSetIds;
    private final Map<Range<Long>, String> refBaseCache = Maps.newHashMap();

    public AnnotateVariants(GenomicsFactory.OfflineAuth offlineAuth, String str, List<String> list, List<String> list2, List<String> list3) {
        this.auth = offlineAuth;
        this.varsetId = str;
        this.callSetIds = list;
        this.transcriptSetIds = list2;
        this.variantAnnotationSetIds = list3;
    }

    public void processElement(DoFn<Contig, KV<String, VariantAnnotation>>.ProcessContext processContext) throws Exception {
        Genomics genomics = this.auth.getGenomics(this.auth.getDefaultFactory());
        Contig contig = (Contig) processContext.element();
        LOG.info("processing contig " + contig);
        FluentIterable<Variant> filter = FluentIterable.from(Paginator.Variants.create(genomics, Paginator.ShardBoundary.STRICT).search(contig.getVariantsRequest(this.varsetId).setCallSetIds(this.callSetIds).setPageSize(Integer.valueOf(VARIANTS_PAGE_SIZE)), VARIANT_FIELDS)).filter(VariantUtils.IS_SNP);
        if (!filter.iterator().hasNext()) {
            LOG.info("region has no variants, skipping");
            return;
        }
        IntervalTree<Annotation> retrieveTranscripts = retrieveTranscripts(genomics, contig);
        ListMultimap<Range<Long>, Annotation> retrieveVariantAnnotations = retrieveVariantAnnotations(genomics, contig);
        Stopwatch createStarted = Stopwatch.createStarted();
        int i = 0;
        for (Variant variant : filter) {
            ImmutableList<String> build = ImmutableList.builder().addAll(variant.getAlternateBases()).add(variant.getReferenceBases()).build();
            Range openClosed = Range.openClosed(variant.getStart(), variant.getEnd());
            for (String str : build) {
                String join = Joiner.on(":").join(variant.getReferenceName(), variant.getStart(), new Object[]{str, variant.getId()});
                for (Annotation annotation : retrieveVariantAnnotations.get(openClosed)) {
                    if (str.equals(annotation.getVariant().getAlternateBases())) {
                        processContext.output(KV.of(join, annotation.getVariant()));
                    }
                }
                Iterator overlappers = retrieveTranscripts.overlappers(((Long) openClosed.lowerEndpoint()).intValue(), ((Long) openClosed.upperEndpoint()).intValue() - 1);
                while (overlappers.hasNext()) {
                    Annotation annotation2 = (Annotation) ((IntervalTree.Node) overlappers.next()).getValue();
                    AnnotationUtils.VariantEffect determineVariantTranscriptEffect = AnnotationUtils.determineVariantTranscriptEffect(variant.getStart().longValue(), str, annotation2, getCachedTranscriptBases(genomics, annotation2));
                    if (determineVariantTranscriptEffect != null && !AnnotationUtils.VariantEffect.SYNONYMOUS_SNP.equals(determineVariantTranscriptEffect)) {
                        processContext.output(KV.of(join, new VariantAnnotation().setAlternateBases(str).setType("SNP").setEffect(determineVariantTranscriptEffect.toString()).setGeneId(annotation2.getTranscript().getGeneId()).setTranscriptIds(ImmutableList.of(annotation2.getId()))));
                    }
                }
            }
            i++;
            if (i % 1000.0d == 0.0d) {
                LOG.info(String.format("read %d variants (%.2f / s)", Integer.valueOf(i), Double.valueOf(i / createStarted.elapsed(TimeUnit.SECONDS))));
            }
        }
        LOG.info("finished reading " + i + " variants in " + createStarted);
    }

    private ListMultimap<Range<Long>, Annotation> retrieveVariantAnnotations(Genomics genomics, Contig contig) {
        Stopwatch createStarted = Stopwatch.createStarted();
        ArrayListMultimap create = ArrayListMultimap.create();
        for (Annotation annotation : Paginator.Annotations.create(genomics, Paginator.ShardBoundary.OVERLAPS).search(new SearchAnnotationsRequest().setAnnotationSetIds(this.variantAnnotationSetIds).setRange(new QueryRange().setReferenceName(canonicalizeRefName(contig.referenceName)).setStart(Long.valueOf(contig.start)).setEnd(Long.valueOf(contig.end))))) {
            RangePosition position = annotation.getPosition();
            long j = 0;
            if (position.getStart() != null) {
                j = position.getStart().longValue();
            }
            create.put(Range.closedOpen(Long.valueOf(j), position.getEnd()), annotation);
        }
        LOG.info(String.format("read %d variant annotations in %s (%.2f / s)", Integer.valueOf(create.size()), createStarted, Double.valueOf(create.size() / createStarted.elapsed(TimeUnit.SECONDS))));
        return create;
    }

    private IntervalTree<Annotation> retrieveTranscripts(Genomics genomics, Contig contig) {
        Stopwatch createStarted = Stopwatch.createStarted();
        IntervalTree<Annotation> intervalTree = new IntervalTree<>();
        for (Annotation annotation : Paginator.Annotations.create(genomics, Paginator.ShardBoundary.OVERLAPS).search(new SearchAnnotationsRequest().setAnnotationSetIds(this.transcriptSetIds).setRange(new QueryRange().setReferenceName(canonicalizeRefName(contig.referenceName)).setStart(Long.valueOf(contig.start)).setEnd(Long.valueOf(contig.end))))) {
            RangePosition position = annotation.getPosition();
            intervalTree.put(position.getStart().intValue(), position.getEnd().intValue(), annotation);
        }
        LOG.info(String.format("read %d transcripts in %s (%.2f / s)", Integer.valueOf(intervalTree.size()), createStarted, Double.valueOf(intervalTree.size() / createStarted.elapsed(TimeUnit.SECONDS))));
        return intervalTree;
    }

    private String getCachedTranscriptBases(Genomics genomics, Annotation annotation) throws IOException {
        RangePosition position = annotation.getPosition();
        Range<Long> closedOpen = Range.closedOpen(position.getStart(), position.getEnd());
        if (!this.refBaseCache.containsKey(closedOpen)) {
            this.refBaseCache.put(closedOpen, retrieveReferenceBases(genomics, position));
        }
        return this.refBaseCache.get(closedOpen);
    }

    private String retrieveReferenceBases(Genomics genomics, RangePosition rangePosition) throws IOException {
        StringBuilder sb = new StringBuilder();
        String str = "";
        do {
            ListBasesResponse listBasesResponse = (ListBasesResponse) genomics.references().bases().list(rangePosition.getReferenceId()).setStart(rangePosition.getStart()).setEnd(rangePosition.getEnd()).setPageToken(str).execute();
            sb.append(listBasesResponse.getSequence());
            str = listBasesResponse.getNextPageToken();
        } while (!Strings.isNullOrEmpty(str));
        return sb.toString();
    }

    private static String canonicalizeRefName(String str) {
        return str.replace("chr", "");
    }

    public static void main(String[] strArr) throws Exception {
        PipelineOptionsFactory.register(GenomicsDatasetOptions.class);
        GenomicsDatasetOptions as = PipelineOptionsFactory.fromArgs(strArr).withValidation().as(GenomicsDatasetOptions.class);
        GenomicsDatasetOptions.Methods.validateOptions(as);
        GenomicsFactory.OfflineAuth genomicsAuth = GenomicsOptions.Methods.getGenomicsAuth(as);
        Genomics genomics = genomicsAuth.getGenomics(genomicsAuth.getDefaultFactory());
        ImmutableList of = ImmutableList.of();
        if (!Strings.isNullOrEmpty(as.getCallSetIds().trim())) {
            of = ImmutableList.copyOf(as.getCallSetIds().split(","));
        }
        List<String> validateAnnotationSetsFlag = validateAnnotationSetsFlag(genomics, as.getTranscriptSetIds(), "TRANSCRIPT");
        List<String> validateAnnotationSetsFlag2 = validateAnnotationSetsFlag(genomics, as.getVariantAnnotationSetIds(), "VARIANT");
        validateRefsetForAnnotationSets(genomics, validateAnnotationSetsFlag);
        Iterable contigsInVariantSet = as.isAllReferences() ? Contig.getContigsInVariantSet(genomics, as.getDatasetId(), Contig.SexChromosomeFilter.INCLUDE_XY) : Contig.parseContigsFromCommandLine(as.getReferences());
        Pipeline create = Pipeline.create(as);
        DataflowWorkarounds.registerGenomicsCoders(create);
        create.begin().apply(Create.of(contigsInVariantSet)).apply(ParDo.of(new AnnotateVariants(genomicsAuth, as.getDatasetId(), of, validateAnnotationSetsFlag, validateAnnotationSetsFlag2))).apply(GroupByKey.create()).apply(ParDo.of(new DoFn<KV<String, Iterable<VariantAnnotation>>, String>() { // from class: com.google.cloud.genomics.dataflow.pipelines.AnnotateVariants.1
            public void processElement(DoFn<KV<String, Iterable<VariantAnnotation>>, String>.ProcessContext processContext) {
                processContext.output(((String) ((KV) processContext.element()).getKey()) + ": " + ((KV) processContext.element()).getValue());
            }
        })).apply(TextIO.Write.to(as.getOutput()));
        create.run();
    }

    private static void validateRefsetForAnnotationSets(Genomics genomics, List<String> list) throws IOException {
        String str = null;
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            String referenceSetId = ((AnnotationSet) genomics.annotationSets().get(it.next()).execute()).getReferenceSetId();
            if (str == null) {
                str = referenceSetId;
            } else if (!str.equals(referenceSetId)) {
                throw new IllegalArgumentException("want consistent reference sets across the provided annotation sets, got " + str + " and " + referenceSetId);
            }
        }
    }

    private static List<String> validateAnnotationSetsFlag(Genomics genomics, String str, String str2) throws IOException {
        ImmutableList<String> copyOf = ImmutableList.copyOf(str.split(","));
        for (String str3 : copyOf) {
            AnnotationSet annotationSet = (AnnotationSet) genomics.annotationSets().get(str3).execute();
            if (!str2.equals(annotationSet.getType())) {
                throw new IllegalArgumentException("annotation set " + str3 + " has type " + annotationSet.getType() + ", wanted type " + str2);
            }
        }
        return copyOf;
    }
}
