/*
 * Decompiled with CFR 0.152.
 */
package org.forester.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Sequence;
import org.forester.util.ForesterUtil;

public final class SequenceAccessionTools {
    public static final Pattern GENBANK_NUC_PATTERN_1 = Pattern.compile("(?:\\A|.*[^a-zA-Z0-9])([A-Z]\\d{5}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)");
    public static final Pattern GENBANK_NUC_PATTERN_2 = Pattern.compile("(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}\\d{6}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)");
    public static final Pattern GENBANK_PROT_PATTERN = Pattern.compile("(?:\\A|.*[^a-zA-Z0-9])([A-Z]{3}\\d{5}(?:\\.\\d+)?)(?:[^a-zA-Z0-9]|\\Z)");
    public static final Pattern GI_PATTERN = Pattern.compile("(?:\\b|_)(?:GI|gi)[|_=:](\\d+)(?:\\b|_)");
    public static final String UNIPROT_KB_BASE_PATTERN_STR = "((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}))";
    public static final Pattern UNIPROT_KB_PATTERN_0 = Pattern.compile("(?:\\b|_)((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}))(?:\\b|_)");
    public static final Pattern UNIPROT_KB_PATTERN_1 = Pattern.compile("(?:\\b|_)(?:sp|tr)[\\.|\\-_=/\\\\]((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}))(?:\\b|_)");
    public static final Pattern UNIPROT_KB_PATTERN_2 = Pattern.compile("(?:\\b|_)(?:[A-Z0-9]{2,5}|((?:[OPQ][0-9][A-Z0-9]{3}[0-9])|(?:[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2})))_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)(?:\\b|_)");
    public static final Pattern ENSEMBL_PATTERN = Pattern.compile("(?:\\b|_)(ENS[A-Z]*[0-9]+)(?:\\b|_)");
    private static final Pattern REFSEQ_PATTERN = Pattern.compile("(?:\\A|.*[^a-zA-Z0-9])([A-Z]{2}_\\d{6,})(?:[^a-zA-Z0-9]|\\Z)");

    private SequenceAccessionTools() {
    }

    public static final boolean isProteinDbQuery(String query) {
        String r1 = SequenceAccessionTools.parseRefSeqAccessorFromString(query);
        if (!ForesterUtil.isEmpty(r1) && r1.charAt(1) == 'P') {
            return true;
        }
        String r2 = SequenceAccessionTools.parseUniProtAccessorFromString(query);
        if (!ForesterUtil.isEmpty(r2)) {
            return true;
        }
        return GENBANK_PROT_PATTERN.matcher(query).lookingAt();
    }

    public static final Accession obtainAccessorFromDataFields(PhylogenyNode n) {
        String a = SequenceAccessionTools.obtainUniProtAccessorFromDataFields(n);
        if (!ForesterUtil.isEmpty(a)) {
            return new Accession(a, Accession.Source.UNIPROT);
        }
        a = SequenceAccessionTools.obtainGenbankAccessorFromDataFields(n);
        if (!ForesterUtil.isEmpty(a)) {
            return new Accession(a, Accession.Source.NCBI);
        }
        a = SequenceAccessionTools.obtainRefSeqAccessorFromDataFields(n);
        if (!ForesterUtil.isEmpty(a)) {
            return new Accession(a, Accession.Source.REFSEQ);
        }
        a = SequenceAccessionTools.obtainGiNumberFromDataFields(n);
        if (!ForesterUtil.isEmpty(a)) {
            return new Accession(a, Accession.Source.GI);
        }
        return null;
    }

    public static final Accession obtainFromSeqAccession(PhylogenyNode n) {
        if (n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(n.getNodeData().getSequence().getAccession().getSource()) && !ForesterUtil.isEmpty(n.getNodeData().getSequence().getAccession().getValue())) {
            String source = n.getNodeData().getSequence().getAccession().getSource().toLowerCase();
            String value = n.getNodeData().getSequence().getAccession().getValue();
            if (source.startsWith("uniprot") || source.equals("swissprot") || source.equals("trembl") || source.equals("sp")) {
                return new Accession(value, Accession.Source.UNIPROT);
            }
            if (source.equals("embl") || source.equals("ebi")) {
                return new Accession(value, Accession.Source.EMBL);
            }
            if (source.equals("ncbi") || source.equals("genbank")) {
                return new Accession(value, Accession.Source.NCBI);
            }
            if (source.equals("refseq")) {
                return new Accession(value, Accession.Source.REFSEQ);
            }
            if (source.equals("gi")) {
                return new Accession(value, Accession.Source.GI);
            }
        }
        return null;
    }

    public static final String obtainGenbankAccessorFromDataFields(PhylogenyNode n) {
        String a = null;
        if (n.getNodeData().isHasSequence()) {
            Sequence seq = n.getNodeData().getSequence();
            if (!ForesterUtil.isEmpty(seq.getSymbol())) {
                a = SequenceAccessionTools.parseGenbankAccessorFromString(seq.getSymbol());
            }
            if (!ForesterUtil.isEmpty(seq.getGeneName())) {
                a = SequenceAccessionTools.parseGenbankAccessorFromString(seq.getGeneName());
            }
            if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(seq.getName())) {
                a = SequenceAccessionTools.parseGenbankAccessorFromString(seq.getName());
            }
            if (ForesterUtil.isEmpty(a) && n.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(seq.getAccession().getValue())) {
                a = SequenceAccessionTools.parseGenbankAccessorFromString(seq.getAccession().getValue());
            }
        }
        if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(n.getName())) {
            a = SequenceAccessionTools.parseGenbankAccessorFromString(n.getName());
        }
        return a;
    }

    public static final String obtainGiNumberFromDataFields(PhylogenyNode n) {
        String a = null;
        if (n.getNodeData().isHasSequence()) {
            Sequence seq = n.getNodeData().getSequence();
            if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(seq.getName())) {
                a = SequenceAccessionTools.parseGInumberFromString(seq.getName());
            }
            if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(seq.getGeneName())) {
                a = SequenceAccessionTools.parseGInumberFromString(seq.getGeneName());
            }
            if (ForesterUtil.isEmpty(a) && n.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(seq.getAccession().getValue())) {
                a = SequenceAccessionTools.parseGInumberFromString(seq.getAccession().getValue());
            }
        }
        if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(n.getName())) {
            a = SequenceAccessionTools.parseGInumberFromString(n.getName());
        }
        return a;
    }

    public static final String obtainRefSeqAccessorFromDataFields(PhylogenyNode n) {
        String a = null;
        if (n.getNodeData().isHasSequence()) {
            Sequence seq = n.getNodeData().getSequence();
            if (!ForesterUtil.isEmpty(seq.getSymbol())) {
                a = SequenceAccessionTools.parseRefSeqAccessorFromString(seq.getSymbol());
            }
            if (!ForesterUtil.isEmpty(seq.getGeneName())) {
                a = SequenceAccessionTools.parseRefSeqAccessorFromString(seq.getGeneName());
            }
            if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(seq.getName())) {
                a = SequenceAccessionTools.parseRefSeqAccessorFromString(seq.getName());
            }
            if (ForesterUtil.isEmpty(a) && n.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(seq.getAccession().getValue())) {
                a = SequenceAccessionTools.parseRefSeqAccessorFromString(seq.getAccession().getValue());
            }
        }
        if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(n.getName())) {
            a = SequenceAccessionTools.parseRefSeqAccessorFromString(n.getName());
        }
        return a;
    }

    public static final String obtainUniProtAccessorFromDataFields(PhylogenyNode n) {
        String a = null;
        if (n.getNodeData().isHasSequence()) {
            Sequence seq = n.getNodeData().getSequence();
            if (!ForesterUtil.isEmpty(seq.getSymbol())) {
                a = SequenceAccessionTools.parseUniProtAccessorFromString(seq.getSymbol());
            }
            if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(seq.getName())) {
                a = SequenceAccessionTools.parseUniProtAccessorFromString(seq.getName());
            }
            if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(seq.getGeneName())) {
                a = SequenceAccessionTools.parseUniProtAccessorFromString(seq.getGeneName());
            }
            if (ForesterUtil.isEmpty(a) && n.getNodeData().getSequence().getAccession() != null && !ForesterUtil.isEmpty(seq.getAccession().getValue())) {
                a = SequenceAccessionTools.parseUniProtAccessorFromString(seq.getAccession().getValue());
            }
        }
        if (ForesterUtil.isEmpty(a) && !ForesterUtil.isEmpty(n.getName())) {
            a = SequenceAccessionTools.parseUniProtAccessorFromString(n.getName());
        }
        return a;
    }

    public static final Accession parseAccessorFromString(String s2) {
        if (!ForesterUtil.isEmpty(s2)) {
            String v = SequenceAccessionTools.parseUniProtAccessorFromString(s2);
            if (!ForesterUtil.isEmpty(v)) {
                return new Accession(v, Accession.Source.UNIPROT);
            }
            v = SequenceAccessionTools.parseGenbankAccessorFromString(s2);
            if (!ForesterUtil.isEmpty(v)) {
                return new Accession(v, Accession.Source.NCBI);
            }
            v = SequenceAccessionTools.parseRefSeqAccessorFromString(s2);
            if (!ForesterUtil.isEmpty(v)) {
                return new Accession(v, Accession.Source.REFSEQ);
            }
            v = SequenceAccessionTools.parseGInumberFromString(s2);
            if (!ForesterUtil.isEmpty(v)) {
                return new Accession(v, Accession.Source.GI);
            }
            v = SequenceAccessionTools.parseEnsemlAccessorFromString(s2);
            if (!ForesterUtil.isEmpty(v)) {
                return new Accession(v, Accession.Source.ENSEMBL);
            }
        }
        return null;
    }

    public static final String parseGenbankAccessorFromString(String s2) {
        Matcher m3 = GENBANK_NUC_PATTERN_1.matcher(s2);
        if (m3.lookingAt()) {
            return m3.group(1);
        }
        m3 = GENBANK_NUC_PATTERN_2.matcher(s2);
        if (m3.lookingAt()) {
            return m3.group(1);
        }
        m3 = GENBANK_PROT_PATTERN.matcher(s2);
        if (m3.lookingAt()) {
            return m3.group(1);
        }
        return null;
    }

    public static final String parseGenbankProteinAccessorFromString(String s2) {
        Matcher m3 = GENBANK_PROT_PATTERN.matcher(s2);
        if (m3.lookingAt()) {
            return m3.group(1);
        }
        return null;
    }

    public static final String parseGInumberFromString(String s2) {
        Matcher m3 = GI_PATTERN.matcher(s2);
        if (m3.find()) {
            return m3.group(1);
        }
        return null;
    }

    public static final String parseEnsemlAccessorFromString(String s2) {
        Matcher m3 = ENSEMBL_PATTERN.matcher(s2);
        if (m3.find()) {
            return m3.group(1);
        }
        return null;
    }

    public static final String parseRefSeqAccessorFromString(String s2) {
        Matcher m3 = REFSEQ_PATTERN.matcher(s2);
        if (m3.lookingAt()) {
            return m3.group(1);
        }
        return null;
    }

    public static final String parseUniProtAccessorFromString(String s2) {
        Matcher m3 = UNIPROT_KB_PATTERN_1.matcher(s2);
        if (m3.find()) {
            return m3.group(1);
        }
        m3 = UNIPROT_KB_PATTERN_2.matcher(s2);
        if (m3.find()) {
            return m3.group();
        }
        m3 = UNIPROT_KB_PATTERN_0.matcher(s2);
        if (m3.find()) {
            return m3.group(1);
        }
        return null;
    }
}

