package org.biojava.nbio.core.sequence.loader;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import org.biojava.nbio.core.exceptions.CompoundNotFoundException;
import org.biojava.nbio.core.sequence.AccessionID;
import org.biojava.nbio.core.sequence.DataSource;
import org.biojava.nbio.core.sequence.Strand;
import org.biojava.nbio.core.sequence.features.DBReferenceInfo;
import org.biojava.nbio.core.sequence.features.DatabaseReferenceInterface;
import org.biojava.nbio.core.sequence.features.FeaturesKeyWordInterface;
import org.biojava.nbio.core.sequence.storage.SequenceAsStringHelper;
import org.biojava.nbio.core.sequence.template.Compound;
import org.biojava.nbio.core.sequence.template.CompoundSet;
import org.biojava.nbio.core.sequence.template.ProxySequenceReader;
import org.biojava.nbio.core.sequence.template.Sequence;
import org.biojava.nbio.core.sequence.template.SequenceMixin;
import org.biojava.nbio.core.sequence.template.SequenceProxyView;
import org.biojava.nbio.core.sequence.template.SequenceView;
import org.biojava.nbio.core.util.Equals;
import org.biojava.nbio.core.util.XMLHelper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/biojava/nbio/core/sequence/loader/UniprotProxySequenceReader.class */
public class UniprotProxySequenceReader<C extends Compound> implements ProxySequenceReader<C>, FeaturesKeyWordInterface, DatabaseReferenceInterface {
    private static final String SPID_PATTERN = "[OPQ][0-9][A-Z0-9]{3}[0-9]";
    private static final String TREMBLID_PATTERN = "[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}";
    private String sequence;
    private CompoundSet<C> compoundSet;
    private List<C> parsedCompounds = new ArrayList();
    Document uniprotDoc;
    private static final Logger logger = LoggerFactory.getLogger(UniprotProxySequenceReader.class);
    public static final Pattern UP_AC_PATTERN = Pattern.compile("([OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})");
    public static final String DEFAULT_UNIPROT_BASE_URL = "https://www.uniprot.org";
    private static String uniprotbaseURL = DEFAULT_UNIPROT_BASE_URL;
    private static String uniprotDirectoryCache = null;

    public UniprotProxySequenceReader(String str, CompoundSet<C> compoundSet) throws CompoundNotFoundException, IOException {
        if (!UP_AC_PATTERN.matcher(str.toUpperCase()).matches()) {
            throw new IllegalArgumentException("Accession provided " + str + " doesn't comply with the uniprot acession pattern.");
        }
        setCompoundSet(compoundSet);
        this.uniprotDoc = getUniprotXML(str);
        setContents(getSequence(this.uniprotDoc));
    }

    public UniprotProxySequenceReader(Document document, CompoundSet<C> compoundSet) throws CompoundNotFoundException {
        setCompoundSet(compoundSet);
        this.uniprotDoc = document;
        setContents(getSequence(this.uniprotDoc));
    }

    public static <C extends Compound> UniprotProxySequenceReader<C> parseUniprotXMLString(String str, CompoundSet<C> compoundSet) {
        try {
            return new UniprotProxySequenceReader<>(XMLHelper.inputStreamToDocument(new ByteArrayInputStream(str.getBytes())), compoundSet);
        } catch (Exception e) {
            logger.error("Exception on xml parse of: {}", str);
            return null;
        }
    }

    @Override // org.biojava.nbio.core.sequence.template.SequenceReader
    public void setCompoundSet(CompoundSet<C> compoundSet) {
        this.compoundSet = compoundSet;
    }

    @Override // org.biojava.nbio.core.sequence.template.SequenceReader
    public void setContents(String str) throws CompoundNotFoundException {
        this.sequence = str.replaceAll("\\s", "").trim();
        this.parsedCompounds.clear();
        int i = 0;
        while (i < this.sequence.length()) {
            String str2 = null;
            C c = null;
            for (int i2 = 1; c == null && i2 <= this.compoundSet.getMaxSingleCompoundStringLength(); i2++) {
                str2 = this.sequence.substring(i, i + i2);
                c = this.compoundSet.getCompoundForString(str2);
            }
            if (c == null) {
                throw new CompoundNotFoundException("Compound " + str2 + " not found");
            }
            i += str2.length();
            this.parsedCompounds.add(c);
        }
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public int getLength() {
        return this.parsedCompounds.size();
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public C getCompoundAt(int i) {
        return this.parsedCompounds.get(i - 1);
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public int getIndexOf(C c) {
        return this.parsedCompounds.indexOf(c) + 1;
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public int getLastIndexOf(C c) {
        return this.parsedCompounds.lastIndexOf(c) + 1;
    }

    public String toString() {
        return getSequenceAsString();
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public String getSequenceAsString() {
        return this.sequence;
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public List<C> getAsList() {
        return this.parsedCompounds;
    }

    public boolean equals(Object obj) {
        if (!Equals.classEqual(this, obj)) {
            return false;
        }
        Sequence sequence = (Sequence) obj;
        if (sequence.getCompoundSet() != getCompoundSet()) {
            return false;
        }
        List<C> asList = getAsList();
        List<C> asList2 = sequence.getAsList();
        if (asList.size() != asList2.size()) {
            return false;
        }
        for (int i = 0; i < asList.size(); i++) {
            if (!asList.get(i).equalsIgnoreCase(asList2.get(i))) {
                return false;
            }
        }
        return true;
    }

    public int hashCode() {
        return getSequenceAsString().hashCode();
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public SequenceView<C> getInverse() {
        return SequenceMixin.inverse(this);
    }

    public String getSequenceAsString(Integer num, Integer num2, Strand strand) {
        return new SequenceAsStringHelper().getSequenceAsString(this.parsedCompounds, this.compoundSet, num, num2, strand);
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public SequenceView<C> getSubSequence(Integer num, Integer num2) {
        return new SequenceProxyView(this, num, num2);
    }

    @Override // java.lang.Iterable
    public Iterator<C> iterator() {
        return this.parsedCompounds.iterator();
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public CompoundSet<C> getCompoundSet() {
        return this.compoundSet;
    }

    @Override // org.biojava.nbio.core.sequence.template.Accessioned
    public AccessionID getAccession() {
        AccessionID accessionID = new AccessionID();
        if (this.uniprotDoc == null) {
            return accessionID;
        }
        try {
            accessionID = new AccessionID(XMLHelper.selectSingleElement(XMLHelper.selectSingleElement(this.uniprotDoc.getDocumentElement(), "entry"), "name").getTextContent(), DataSource.UNIPROT);
        } catch (XPathExpressionException e) {
            logger.error("Exception: ", e);
        }
        return accessionID;
    }

    public ArrayList<AccessionID> getAccessions() throws XPathExpressionException {
        ArrayList<AccessionID> arrayList = new ArrayList<>();
        if (this.uniprotDoc == null) {
            return arrayList;
        }
        Iterator<Element> it = XMLHelper.selectElements(XMLHelper.selectSingleElement(this.uniprotDoc.getDocumentElement(), "entry"), "accession").iterator();
        while (it.hasNext()) {
            arrayList.add(new AccessionID(it.next().getTextContent(), DataSource.UNIPROT));
        }
        return arrayList;
    }

    public ArrayList<String> getAliases() throws XPathExpressionException {
        return getProteinAliases();
    }

    public ArrayList<String> getProteinAliases() throws XPathExpressionException {
        ArrayList<String> arrayList = new ArrayList<>();
        if (this.uniprotDoc == null) {
            return arrayList;
        }
        Element selectSingleElement = XMLHelper.selectSingleElement(XMLHelper.selectSingleElement(this.uniprotDoc.getDocumentElement(), "entry"), "protein");
        getProteinAliasesFromNameGroup(arrayList, selectSingleElement);
        Iterator<Element> it = XMLHelper.selectElements(selectSingleElement, "component").iterator();
        while (it.hasNext()) {
            getProteinAliasesFromNameGroup(arrayList, it.next());
        }
        Iterator<Element> it2 = XMLHelper.selectElements(selectSingleElement, "domain").iterator();
        while (it2.hasNext()) {
            getProteinAliasesFromNameGroup(arrayList, it2.next());
        }
        Iterator<Element> it3 = XMLHelper.selectElements(selectSingleElement, "submittedName").iterator();
        while (it3.hasNext()) {
            getProteinAliasesFromNameGroup(arrayList, it3.next());
        }
        Iterator<Element> it4 = XMLHelper.selectElements(selectSingleElement, "cdAntigenName").iterator();
        while (it4.hasNext()) {
            String textContent = it4.next().getTextContent();
            if (null != textContent && !textContent.trim().isEmpty()) {
                arrayList.add(textContent);
            }
        }
        Iterator<Element> it5 = XMLHelper.selectElements(selectSingleElement, "innName").iterator();
        while (it5.hasNext()) {
            String textContent2 = it5.next().getTextContent();
            if (null != textContent2 && !textContent2.trim().isEmpty()) {
                arrayList.add(textContent2);
            }
        }
        Iterator<Element> it6 = XMLHelper.selectElements(selectSingleElement, "biotechName").iterator();
        while (it6.hasNext()) {
            String textContent3 = it6.next().getTextContent();
            if (null != textContent3 && !textContent3.trim().isEmpty()) {
                arrayList.add(textContent3);
            }
        }
        Iterator<Element> it7 = XMLHelper.selectElements(selectSingleElement, "allergenName").iterator();
        while (it7.hasNext()) {
            String textContent4 = it7.next().getTextContent();
            if (null != textContent4 && !textContent4.trim().isEmpty()) {
                arrayList.add(textContent4);
            }
        }
        return arrayList;
    }

    private void getProteinAliasesFromNameGroup(ArrayList<String> arrayList, Element element) throws XPathExpressionException {
        Iterator<Element> it = XMLHelper.selectElements(element, "alternativeName").iterator();
        while (it.hasNext()) {
            getProteinAliasesFromElement(arrayList, it.next());
        }
        Iterator<Element> it2 = XMLHelper.selectElements(element, "recommendedName").iterator();
        while (it2.hasNext()) {
            getProteinAliasesFromElement(arrayList, it2.next());
        }
    }

    private void getProteinAliasesFromElement(ArrayList<String> arrayList, Element element) throws XPathExpressionException {
        String textContent;
        arrayList.add(XMLHelper.selectSingleElement(element, "fullName").getTextContent());
        Element selectSingleElement = XMLHelper.selectSingleElement(element, "shortName");
        if (null == selectSingleElement || null == (textContent = selectSingleElement.getTextContent()) || textContent.trim().isEmpty()) {
            return;
        }
        arrayList.add(textContent);
    }

    public ArrayList<String> getGeneAliases() throws XPathExpressionException {
        ArrayList<String> arrayList = new ArrayList<>();
        if (this.uniprotDoc == null) {
            return arrayList;
        }
        Iterator<Element> it = XMLHelper.selectElements(XMLHelper.selectSingleElement(this.uniprotDoc.getDocumentElement(), "entry"), "gene").iterator();
        while (it.hasNext()) {
            Iterator<Element> it2 = XMLHelper.selectElements(it.next(), "name").iterator();
            while (it2.hasNext()) {
                arrayList.add(it2.next().getTextContent());
            }
        }
        return arrayList;
    }

    @Override // org.biojava.nbio.core.sequence.template.Sequence
    public int countCompounds(C... cArr) {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    private Document getUniprotXML(String str) throws IOException, CompoundNotFoundException {
        StringBuilder sb = new StringBuilder();
        if (uniprotDirectoryCache != null && uniprotDirectoryCache.length() > 0) {
            sb = fetchFromCache(str);
        }
        if (sb.length() == 0) {
            String str2 = getUniprotbaseURL() + "/uniprot/" + str.toUpperCase() + ".xml";
            logger.info("Loading: {}", str2);
            sb = fetchUniprotXML(str2);
            int indexOf = sb.indexOf("xmlns=");
            if (indexOf != -1) {
                sb.replace(indexOf, sb.indexOf(">", indexOf), "");
            }
            if (uniprotDirectoryCache != null && uniprotDirectoryCache.length() > 0) {
                writeCache(sb, str);
            }
        }
        logger.info("Load complete");
        try {
            return XMLHelper.inputStreamToDocument(new ByteArrayInputStream(sb.toString().getBytes()));
        } catch (ParserConfigurationException | SAXException e) {
            logger.error("Exception on xml parse of: {}", sb.toString());
            return null;
        }
    }

    private void writeCache(StringBuilder sb, String str) throws IOException {
        FileWriter fileWriter = new FileWriter(new File(uniprotDirectoryCache + File.separatorChar + str + ".xml"));
        try {
            fileWriter.write(sb.toString());
            fileWriter.close();
        } catch (Throwable th) {
            try {
                fileWriter.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    private static HttpURLConnection openURLConnection(URL url) throws IOException {
        HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
        httpURLConnection.setRequestProperty("User-Agent", "BioJava");
        httpURLConnection.setInstanceFollowRedirects(true);
        httpURLConnection.setConnectTimeout(5000);
        httpURLConnection.setReadTimeout(5000);
        int responseCode = httpURLConnection.getResponseCode();
        while (true) {
            if (responseCode != 302 && responseCode != 301 && responseCode != 303) {
                httpURLConnection.connect();
                return httpURLConnection;
            }
            String headerField = httpURLConnection.getHeaderField("Location");
            if (headerField.equals(url.toString())) {
                throw new IOException("Cyclic redirect detected at " + headerField);
            }
            String headerField2 = httpURLConnection.getHeaderField("Set-Cookie");
            url = new URL(headerField);
            httpURLConnection.disconnect();
            httpURLConnection = (HttpURLConnection) url.openConnection();
            if (headerField2 != null) {
                httpURLConnection.setRequestProperty("Cookie", headerField2);
            }
            httpURLConnection.addRequestProperty("User-Agent", "BioJava");
            httpURLConnection.setInstanceFollowRedirects(true);
            httpURLConnection.setConnectTimeout(5000);
            httpURLConnection.setReadTimeout(5000);
            httpURLConnection.connect();
            responseCode = httpURLConnection.getResponseCode();
            logger.info("Redirecting from {} to {}", url, headerField);
        }
    }

    private StringBuilder fetchUniprotXML(String str) throws IOException, CompoundNotFoundException {
        StringBuilder sb = new StringBuilder();
        URL url = new URL(str);
        int i = 5;
        ArrayList arrayList = new ArrayList();
        while (i > 0) {
            HttpURLConnection openURLConnection = openURLConnection(url);
            int responseCode = openURLConnection.getResponseCode();
            if (responseCode == 200) {
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(openURLConnection.getInputStream()));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        bufferedReader.close();
                        return sb;
                    }
                    sb.append(readLine);
                }
            } else {
                i--;
                arrayList.add(String.valueOf(responseCode));
            }
        }
        throw new RemoteException("Couldn't fetch accession from the url " + str + " error codes on 5 attempts are " + arrayList.toString());
    }

    private StringBuilder fetchFromCache(String str) throws IOException {
        File file = new File(uniprotDirectoryCache + File.separatorChar + str + ".xml");
        StringBuilder sb = new StringBuilder();
        if (file.exists()) {
            FileReader fileReader = new FileReader(file);
            try {
                char[] cArr = new char[(int) file.length()];
                fileReader.read(cArr);
                fileReader.close();
                sb.append(cArr);
                int indexOf = sb.indexOf("xmlns=");
                if (indexOf != -1) {
                    sb.replace(indexOf, sb.indexOf(">", indexOf), "");
                }
            } catch (Throwable th) {
                try {
                    fileReader.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
                throw th;
            }
        }
        return sb;
    }

    private String getSequence(Document document) {
        try {
            return XMLHelper.selectSingleElement(XMLHelper.selectSingleElement(document.getDocumentElement(), "entry"), "sequence").getTextContent();
        } catch (XPathExpressionException e) {
            logger.error("Problems while parsing sequence in UniProt XML: {}. Sequence will be blank.", e.getMessage());
            return "";
        }
    }

    public static String getUniprotbaseURL() {
        return uniprotbaseURL;
    }

    public static void setUniprotbaseURL(String str) {
        uniprotbaseURL = str;
    }

    public static String getUniprotDirectoryCache() {
        return uniprotDirectoryCache;
    }

    public static void setUniprotDirectoryCache(String str) {
        File file = new File(str);
        if (!file.exists()) {
            file.mkdirs();
        }
        uniprotDirectoryCache = str;
    }

    public String getGeneName() {
        Element selectSingleElement;
        if (this.uniprotDoc == null) {
            return "";
        }
        try {
            Element selectSingleElement2 = XMLHelper.selectSingleElement(XMLHelper.selectSingleElement(this.uniprotDoc.getDocumentElement(), "entry"), "gene");
            return (selectSingleElement2 == null || (selectSingleElement = XMLHelper.selectSingleElement(selectSingleElement2, "name")) == null) ? "" : selectSingleElement.getTextContent();
        } catch (XPathExpressionException e) {
            logger.error("Problems while parsing gene name in UniProt XML: {}. Gene name will be blank.", e.getMessage());
            return "";
        }
    }

    public String getOrganismName() {
        Element selectSingleElement;
        if (this.uniprotDoc == null) {
            return "";
        }
        try {
            Element selectSingleElement2 = XMLHelper.selectSingleElement(XMLHelper.selectSingleElement(this.uniprotDoc.getDocumentElement(), "entry"), "organism");
            return (selectSingleElement2 == null || (selectSingleElement = XMLHelper.selectSingleElement(selectSingleElement2, "name")) == null) ? "" : selectSingleElement.getTextContent();
        } catch (XPathExpressionException e) {
            logger.error("Problems while parsing organism name in UniProt XML: {}. Organism name will be blank.", e.getMessage());
            return "";
        }
    }

    @Override // org.biojava.nbio.core.sequence.features.FeaturesKeyWordInterface
    public ArrayList<String> getKeyWords() {
        ArrayList<String> arrayList = new ArrayList<>();
        if (this.uniprotDoc == null) {
            return arrayList;
        }
        try {
            Iterator<Element> it = XMLHelper.selectElements(XMLHelper.selectSingleElement(this.uniprotDoc.getDocumentElement(), "entry"), "keyword").iterator();
            while (it.hasNext()) {
                arrayList.add(it.next().getTextContent());
            }
            return arrayList;
        } catch (XPathExpressionException e) {
            logger.error("Problems while parsing keywords in UniProt XML: {}. No keywords will be available.", e.getMessage());
            return new ArrayList<>();
        }
    }

    @Override // org.biojava.nbio.core.sequence.features.DatabaseReferenceInterface
    public Map<String, List<DBReferenceInfo>> getDatabaseReferences() {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        if (this.uniprotDoc == null) {
            return linkedHashMap;
        }
        try {
            Iterator<Element> it = XMLHelper.selectElements(XMLHelper.selectSingleElement(this.uniprotDoc.getDocumentElement(), "entry"), "dbReference").iterator();
            while (it.hasNext()) {
                Element next = it.next();
                String attribute = next.getAttribute("type");
                String attribute2 = next.getAttribute("id");
                List list = (List) linkedHashMap.get(attribute);
                if (list == null) {
                    list = new ArrayList();
                    linkedHashMap.put(attribute, list);
                }
                DBReferenceInfo dBReferenceInfo = new DBReferenceInfo(attribute, attribute2);
                Iterator<Element> it2 = XMLHelper.selectElements(next, "property").iterator();
                while (it2.hasNext()) {
                    Element next2 = it2.next();
                    dBReferenceInfo.addProperty(next2.getAttribute("type"), next2.getAttribute("value"));
                }
                list.add(dBReferenceInfo);
            }
            return linkedHashMap;
        } catch (XPathExpressionException e) {
            logger.error("Problems while parsing db references in UniProt XML: {}. No db references will be available.", e.getMessage());
            return new LinkedHashMap();
        }
    }
}
