package org.forester.ws.seqdb;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
import org.forester.go.GoTerm;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Annotation;
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;
import org.forester.util.SequenceAccessionTools;

/* loaded from: input_file:org/forester/ws/seqdb/SequenceDbWsTools.class */
public final class SequenceDbWsTools {
    public static final String BASE_UNIPROT_URL = "http://www.uniprot.org/";
    public static final int DEFAULT_LINES_TO_RETURN = 4000;
    public static final String EMBL_DBS_REFSEQ_N = "refseqn";
    public static final String EMBL_DBS_REFSEQ_P = "refseqp";
    public static final String EMBL_GENBANK = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id=";
    public static final String EMBL_REFSEQ = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id=";
    public static final String EMBL_EMBL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id=";
    private static final boolean DEBUG = false;
    private static final String URL_ENC = "UTF-8";
    private static final int SLEEP = 200;
    private static final boolean ALLOW_TO_OVERWRITE_MOL_SEQ = false;

    public static List<UniProtTaxonomy> getTaxonomiesFromCommonNameStrict(String str, int i) throws IOException {
        List<UniProtTaxonomy> taxonomiesFromCommonName = getTaxonomiesFromCommonName(str, i);
        if (taxonomiesFromCommonName == null || taxonomiesFromCommonName.size() <= 0) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        for (UniProtTaxonomy uniProtTaxonomy : taxonomiesFromCommonName) {
            if (uniProtTaxonomy.getCommonName().equalsIgnoreCase(str)) {
                arrayList.add(uniProtTaxonomy);
            }
        }
        return arrayList;
    }

    public static List<UniProtTaxonomy> getTaxonomiesFromId(String str, int i) throws IOException {
        List<String> taxonomyStringFromId = getTaxonomyStringFromId(str, i);
        if (taxonomyStringFromId.size() > 0) {
            return parseUniProtTaxonomy(taxonomyStringFromId);
        }
        return null;
    }

    public static List<UniProtTaxonomy> getTaxonomiesFromScientificNameStrict(String str, int i) throws IOException {
        List<UniProtTaxonomy> taxonomiesFromScientificName = getTaxonomiesFromScientificName(str, i);
        if (taxonomiesFromScientificName == null || taxonomiesFromScientificName.size() <= 0) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        for (UniProtTaxonomy uniProtTaxonomy : taxonomiesFromScientificName) {
            if (uniProtTaxonomy.getScientificName().equalsIgnoreCase(str)) {
                arrayList.add(uniProtTaxonomy);
            }
        }
        return arrayList;
    }

    public static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode(String str, int i) throws IOException {
        List<String> taxonomyStringFromTaxonomyCode = getTaxonomyStringFromTaxonomyCode(new String(str), i);
        if (taxonomyStringFromTaxonomyCode.size() > 0) {
            return parseUniProtTaxonomy(taxonomyStringFromTaxonomyCode);
        }
        return null;
    }

    public static SequenceDatabaseEntry obtainEmblEntry(Accession accession) throws IOException {
        return obtainEmblEntry(accession, DEFAULT_LINES_TO_RETURN);
    }

    public static SequenceDatabaseEntry obtainEmblEntry(Accession accession, int i) throws IOException {
        return EbiDbEntry.createInstanceFromPlainTextForRefSeq(queryEmblDb(accession, i));
    }

    public static SequenceDatabaseEntry obtainEntry(String str) throws IOException {
        if (ForesterUtil.isEmpty(str)) {
            throw new IllegalArgumentException("cannot not extract sequence db accessor from null or empty string");
        }
        Accession parseAccessorFromString = SequenceAccessionTools.parseAccessorFromString(str);
        if (parseAccessorFromString == null) {
            throw new IllegalArgumentException("could not extract acceptable sequence db accessor from \"" + str + "\"");
        }
        if (parseAccessorFromString.getSource().equals(Accession.Source.REFSEQ.toString()) || parseAccessorFromString.getSource().equals(Accession.Source.EMBL.toString()) || parseAccessorFromString.getSource().equals(Accession.Source.NCBI.toString())) {
            return obtainEmblEntry(parseAccessorFromString, DEFAULT_LINES_TO_RETURN);
        }
        if (parseAccessorFromString.getSource().equals(Accession.Source.UNIPROT.toString())) {
            return obtainUniProtEntry(parseAccessorFromString.getValue(), DEFAULT_LINES_TO_RETURN);
        }
        throw new IllegalArgumentException("don't know how to handle request for source \"" + parseAccessorFromString.getSource() + "\"");
    }

    public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl(Accession accession) throws IOException {
        return obtainRefSeqEntryFromEmbl(accession, DEFAULT_LINES_TO_RETURN);
    }

    public static SequenceDatabaseEntry obtainRefSeqEntryFromEmbl(Accession accession, int i) throws IOException {
        return EbiDbEntry.createInstanceFromPlainTextForRefSeq(queryEmblDbForRefSeqEntry(accession, i));
    }

    public static final Accession obtainSeqAccession(PhylogenyNode phylogenyNode) {
        Accession obtainFromSeqAccession = SequenceAccessionTools.obtainFromSeqAccession(phylogenyNode);
        if (!isAccessionAcceptable(obtainFromSeqAccession)) {
            obtainFromSeqAccession = SequenceAccessionTools.obtainAccessorFromDataFields(phylogenyNode);
        }
        return obtainFromSeqAccession;
    }

    public static final void obtainSeqInformation(boolean z, int i, SortedSet<String> sortedSet, PhylogenyNode phylogenyNode) throws IOException {
        Accession obtainSeqAccession = obtainSeqAccession(phylogenyNode);
        if (isAccessionAcceptable(obtainSeqAccession)) {
            addDataFromDbToNode(z, i, sortedSet, phylogenyNode, obtainSeqAccession);
        } else if (phylogenyNode.isExternal() || !phylogenyNode.isEmpty()) {
            sortedSet.add(phylogenyNode.toString());
        }
    }

    public static final void obtainSeqInformation(boolean z, SortedSet<String> sortedSet, PhylogenyNode phylogenyNode) throws IOException {
        obtainSeqInformation(z, DEFAULT_LINES_TO_RETURN, sortedSet, phylogenyNode);
    }

    public static final SortedSet<String> obtainSeqInformation(Phylogeny phylogeny, boolean z, boolean z2, int i) throws IOException {
        TreeSet treeSet = new TreeSet();
        PhylogenyNodeIterator iteratorPostorder = phylogeny.iteratorPostorder();
        while (iteratorPostorder.hasNext()) {
            PhylogenyNode next = iteratorPostorder.next();
            if (next.isExternal() || !z) {
                obtainSeqInformation(z2, i, treeSet, next);
            }
        }
        return treeSet;
    }

    public static final void obtainSeqInformation(PhylogenyNode phylogenyNode) throws IOException {
        obtainSeqInformation(true, DEFAULT_LINES_TO_RETURN, (SortedSet<String>) new TreeSet(), phylogenyNode);
    }

    public static SequenceDatabaseEntry obtainUniProtEntry(String str) throws IOException {
        return obtainUniProtEntry(str, DEFAULT_LINES_TO_RETURN);
    }

    public static SequenceDatabaseEntry obtainUniProtEntry(String str, int i) throws IOException {
        return UniProtEntry.createInstanceFromPlainText(queryUniprot("uniprot/" + str + ".txt", i));
    }

    public static List<String> queryDb(String str, int i, String str2) throws IOException {
        if (ForesterUtil.isEmpty(str)) {
            throw new IllegalArgumentException("illegal attempt to use empty query ");
        }
        if (i < 1) {
            i = 1;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new URL(str2 + str).openConnection().getInputStream()));
        ArrayList arrayList = new ArrayList();
        do {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                break;
            }
            arrayList.add(readLine);
        } while (arrayList.size() <= i);
        bufferedReader.close();
        try {
            Thread.sleep(200L);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        return arrayList;
    }

    public static List<String> queryEmblDb(Accession accession, int i) throws IOException {
        StringBuilder sb = new StringBuilder();
        if (accession.getSource().equals(Accession.Source.NCBI.toString())) {
            sb.append(EMBL_GENBANK);
        } else if (accession.getSource().equals(Accession.Source.REFSEQ.toString())) {
            sb.append(EMBL_REFSEQ);
        } else {
            if (!accession.getSource().equals(Accession.Source.EMBL.toString())) {
                throw new IllegalArgumentException("unable to handle source: " + accession.getSource());
            }
            sb.append(EMBL_EMBL);
        }
        return queryDb(accession.getValue(), i, sb.toString());
    }

    public static List<String> queryEmblDbForRefSeqEntry(Accession accession, int i) throws IOException {
        return queryDb(accession.getValue(), i, EMBL_REFSEQ);
    }

    public static List<String> queryUniprot(String str, int i) throws IOException {
        return queryDb(str, i, BASE_UNIPROT_URL);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static final String extractFrom(String str, String str2) {
        return str.substring(str.indexOf(str2) + str2.length()).trim();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static final String extractFromTo(String str, String str2, String str3) {
        int indexOf = str.indexOf(str2);
        int indexOf2 = str.indexOf(str3);
        return (indexOf < 0 || indexOf2 < indexOf) ? "" : str.substring(indexOf + str2.length(), indexOf2).trim();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static final String extractTo(String str, String str2) {
        return str.substring(0, str.indexOf(str2)).trim();
    }

    private static void addDataFromDbToNode(boolean z, int i, SortedSet<String> sortedSet, PhylogenyNode phylogenyNode, Accession accession) throws IOException {
        SequenceDatabaseEntry sequenceDatabaseEntry = null;
        String value = accession.getValue();
        if (accession.getSource().equals(Accession.Source.UNIPROT.toString())) {
            try {
                sequenceDatabaseEntry = obtainUniProtEntry(value, i);
            } catch (FileNotFoundException e) {
            }
        } else if (accession.getSource().equals(Accession.Source.REFSEQ.toString())) {
            try {
                sequenceDatabaseEntry = obtainRefSeqEntryFromEmbl(new Accession(value), i);
            } catch (FileNotFoundException e2) {
            }
        } else if (accession.getSource().equals(Accession.Source.EMBL.toString()) || accession.getSource().equals(Accession.Source.NCBI.toString()) || accession.getSource().equals(Accession.Source.EMBL.toString())) {
            try {
                sequenceDatabaseEntry = obtainEmblEntry(accession, i);
            } catch (FileNotFoundException e3) {
            }
        } else if (accession.getSource().equals(Accession.Source.GI.toString())) {
            try {
                sequenceDatabaseEntry = obtainRefSeqEntryFromEmbl(new Accession(value), i);
            } catch (FileNotFoundException e4) {
            }
        }
        if (sequenceDatabaseEntry != null && !sequenceDatabaseEntry.isEmpty()) {
            Sequence sequence = phylogenyNode.getNodeData().isHasSequence() ? phylogenyNode.getNodeData().getSequence() : new Sequence();
            if (!ForesterUtil.isEmpty(sequenceDatabaseEntry.getAccession())) {
                sequence.setAccession(new Accession(sequenceDatabaseEntry.getAccession(), accession.getSource()));
            }
            if (!ForesterUtil.isEmpty(sequenceDatabaseEntry.getSequenceName())) {
                sequence.setName(sequenceDatabaseEntry.getSequenceName());
            }
            if (!ForesterUtil.isEmpty(sequenceDatabaseEntry.getGeneName())) {
                sequence.setGeneName(sequenceDatabaseEntry.getGeneName());
            }
            if (!ForesterUtil.isEmpty(sequenceDatabaseEntry.getSequenceSymbol())) {
                try {
                    sequence.setSymbol(sequenceDatabaseEntry.getSequenceSymbol());
                } catch (PhyloXmlDataFormatException e5) {
                }
            }
            if (sequenceDatabaseEntry.getMolecularSequence() != null && !ForesterUtil.isEmpty(sequenceDatabaseEntry.getMolecularSequence().getMolecularSequenceAsString()) && sequence.getMolecularSequence().isEmpty()) {
                sequence.setMolecularSequence(sequenceDatabaseEntry.getMolecularSequence().getMolecularSequenceAsString());
                sequence.setMolecularSequenceAligned(false);
                if (sequenceDatabaseEntry.getMolecularSequence().getType() == MolecularSequence.TYPE.AA) {
                    sequence.setType(PhyloXmlUtil.SEQ_TYPE_PROTEIN);
                } else if (sequenceDatabaseEntry.getMolecularSequence().getType() == MolecularSequence.TYPE.DNA) {
                    sequence.setType(PhyloXmlUtil.SEQ_TYPE_DNA);
                } else if (sequenceDatabaseEntry.getMolecularSequence().getType() == MolecularSequence.TYPE.RNA) {
                    sequence.setType(PhyloXmlUtil.SEQ_TYPE_RNA);
                }
            }
            if (sequenceDatabaseEntry.getGoTerms() != null && !sequenceDatabaseEntry.getGoTerms().isEmpty()) {
                for (GoTerm goTerm : sequenceDatabaseEntry.getGoTerms()) {
                    Annotation annotation = new Annotation(goTerm.getGoId().getId());
                    annotation.setDesc(goTerm.getName());
                    sequence.addAnnotation(annotation);
                }
            }
            if (sequenceDatabaseEntry.getCrossReferences() != null && !sequenceDatabaseEntry.getCrossReferences().isEmpty()) {
                Iterator<Accession> it = sequenceDatabaseEntry.getCrossReferences().iterator();
                while (it.hasNext()) {
                    sequence.addCrossReference(it.next());
                }
            }
            if (!ForesterUtil.isEmpty(sequenceDatabaseEntry.getChromosome()) && !ForesterUtil.isEmpty(sequenceDatabaseEntry.getMap())) {
                sequence.setLocation("chr " + sequenceDatabaseEntry.getChromosome() + ", " + sequenceDatabaseEntry.getMap());
            } else if (!ForesterUtil.isEmpty(sequenceDatabaseEntry.getChromosome())) {
                sequence.setLocation("chr " + sequenceDatabaseEntry.getChromosome());
            } else if (!ForesterUtil.isEmpty(sequenceDatabaseEntry.getMap())) {
                sequence.setLocation(sequenceDatabaseEntry.getMap());
            }
            Taxonomy taxonomy = phylogenyNode.getNodeData().isHasTaxonomy() ? phylogenyNode.getNodeData().getTaxonomy() : new Taxonomy();
            if (!ForesterUtil.isEmpty(sequenceDatabaseEntry.getTaxonomyScientificName())) {
                taxonomy.setScientificName(sequenceDatabaseEntry.getTaxonomyScientificName());
            }
            if (z && !ForesterUtil.isEmpty(sequenceDatabaseEntry.getTaxonomyIdentifier())) {
                taxonomy.setIdentifier(new Identifier(sequenceDatabaseEntry.getTaxonomyIdentifier(), PhyloXmlUtil.UNIPROT_TAX_PROVIDER));
            }
            phylogenyNode.getNodeData().setTaxonomy(taxonomy);
            phylogenyNode.getNodeData().setSequence(sequence);
        } else if (phylogenyNode.isExternal() || !phylogenyNode.isEmpty()) {
            sortedSet.add(phylogenyNode.toString());
        }
        try {
            Thread.sleep(200L);
        } catch (InterruptedException e6) {
        }
    }

    private static String encode(String str) throws UnsupportedEncodingException {
        return URLEncoder.encode(str.trim(), "UTF-8");
    }

    private static List<UniProtTaxonomy> getTaxonomiesFromCommonName(String str, int i) throws IOException {
        List<String> taxonomyStringFromCommonName = getTaxonomyStringFromCommonName(str, i);
        if (taxonomyStringFromCommonName.size() > 0) {
            return parseUniProtTaxonomy(taxonomyStringFromCommonName);
        }
        return null;
    }

    private static List<UniProtTaxonomy> getTaxonomiesFromScientificName(String str, int i) throws IOException {
        List<String> taxonomyStringFromScientificName = getTaxonomyStringFromScientificName(str, i);
        if (taxonomyStringFromScientificName.size() > 0) {
            return parseUniProtTaxonomy(taxonomyStringFromScientificName);
        }
        return null;
    }

    private static List<String> getTaxonomyStringFromCommonName(String str, int i) throws IOException {
        return queryUniprot("taxonomy/?query=common%3a%22" + encode(str) + "%22&format=tab", i);
    }

    private static List<String> getTaxonomyStringFromId(String str, int i) throws IOException {
        return queryUniprot("taxonomy/?query=id%3a%22" + encode(str) + "%22&format=tab", i);
    }

    private static List<String> getTaxonomyStringFromScientificName(String str, int i) throws IOException {
        return queryUniprot("taxonomy/?query=scientific%3a%22" + encode(str) + "%22&format=tab", i);
    }

    private static List<String> getTaxonomyStringFromTaxonomyCode(String str, int i) throws IOException {
        return queryUniprot("taxonomy/?query=mnemonic%3a%22" + encode(str) + "%22&format=tab", i);
    }

    private static final boolean isAccessionAcceptable(Accession accession) {
        return (accession == null || ForesterUtil.isEmpty(accession.getSource()) || ForesterUtil.isEmpty(accession.getValue()) || (accession.getSource().equals(Accession.Source.UNIPROT.toString()) && accession.getSource().toString().equals(Accession.Source.EMBL.toString()) && accession.getSource().toString().equals(Accession.Source.REFSEQ.toString()))) ? false : true;
    }

    private static List<UniProtTaxonomy> parseUniProtTaxonomy(List<String> list) throws IOException {
        ArrayList arrayList = new ArrayList();
        for (String str : list) {
            if (!ForesterUtil.isEmpty(str)) {
                if (str.startsWith("Taxon")) {
                    String[] split = str.split("\t");
                    if (!split[1].equalsIgnoreCase("Mnemonic") || !split[2].equalsIgnoreCase("Scientific name") || !split[3].equalsIgnoreCase("Common name") || !split[4].equalsIgnoreCase("Synonym") || !split[5].equalsIgnoreCase("Other Names") || !split[6].equalsIgnoreCase("Reviewed") || !split[7].equalsIgnoreCase("Rank") || !split[8].equalsIgnoreCase("Lineage")) {
                        throw new IOException("Unreconized UniProt Taxonomy format: " + str);
                    }
                } else if (str.split("\t").length > 4) {
                    arrayList.add(new UniProtTaxonomy(str));
                }
            }
        }
        return arrayList;
    }
}
