package org.clulab.processors.clu.bio;

import java.io.BufferedReader;
import java.util.regex.Pattern;
import org.clulab.struct.MutableNumber;
import org.clulab.utils.Files$;
import scala.Predef$;
import scala.StringContext;
import scala.collection.Seq;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.StringBuilder;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: BioTokenizerPostProcessor.scala */
/* loaded from: input_file:org/clulab/processors/clu/bio/BioTokenizerPostProcessor$.class */
public final class BioTokenizerPostProcessor$ {
    public static BioTokenizerPostProcessor$ MODULE$;
    private final boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$DISCARD_STANDALONE_DASHES;
    private final Set<String> VALID_DASH_SUFFIXES;
    private final Set<String> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$COMMON_PREFIXES;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$dashSuffixes;
    private final String VALID_PROTEIN;
    private final String VALID_PROTEIN_NO_DASH;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$VALID_COMPLEX_SEPARATOR_PATTERN;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLESLASH_PATTERN;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLEDASH_PATTERN;
    private final Pattern SITE1_PATTERN;
    private final String SITE2;
    private final String SITE3;
    private final Pattern SITE2_PATTERN;
    private final Pattern SITE3_PATTERN;
    private final Pattern MUTATION1_PATTERN;
    private final Pattern MUTATION2_PATTERN;
    private final Pattern MUTATION3_PATTERN;
    private final Set<Pattern> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$MODIFICATIONS;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$sitePrefixes;
    private final Set<String> PARENS;
    private final Pattern COMPLEX;
    private final Pattern MUTANT;
    private final Pattern MEASUREMENT_UNIT_WITHSLASH;

    static {
        new BioTokenizerPostProcessor$();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$DISCARD_STANDALONE_DASHES() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$DISCARD_STANDALONE_DASHES;
    }

    private Set<String> VALID_DASH_SUFFIXES() {
        return this.VALID_DASH_SUFFIXES;
    }

    public Set<String> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$COMMON_PREFIXES() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$COMMON_PREFIXES;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$dashSuffixes() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$dashSuffixes;
    }

    private String VALID_PROTEIN() {
        return this.VALID_PROTEIN;
    }

    private String VALID_PROTEIN_NO_DASH() {
        return this.VALID_PROTEIN_NO_DASH;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$VALID_COMPLEX_SEPARATOR_PATTERN() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$VALID_COMPLEX_SEPARATOR_PATTERN;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLESLASH_PATTERN() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLESLASH_PATTERN;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLEDASH_PATTERN() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLEDASH_PATTERN;
    }

    private Pattern SITE1_PATTERN() {
        return this.SITE1_PATTERN;
    }

    private String SITE2() {
        return this.SITE2;
    }

    private String SITE3() {
        return this.SITE3;
    }

    private Pattern SITE2_PATTERN() {
        return this.SITE2_PATTERN;
    }

    private Pattern SITE3_PATTERN() {
        return this.SITE3_PATTERN;
    }

    private Pattern MUTATION1_PATTERN() {
        return this.MUTATION1_PATTERN;
    }

    private Pattern MUTATION2_PATTERN() {
        return this.MUTATION2_PATTERN;
    }

    private Pattern MUTATION3_PATTERN() {
        return this.MUTATION3_PATTERN;
    }

    public Set<Pattern> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$MODIFICATIONS() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$MODIFICATIONS;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$sitePrefixes() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$sitePrefixes;
    }

    private Set<String> PARENS() {
        return this.PARENS;
    }

    private Pattern COMPLEX() {
        return this.COMPLEX;
    }

    private Pattern MUTANT() {
        return this.MUTANT;
    }

    private Pattern MEASUREMENT_UNIT_WITHSLASH() {
        return this.MEASUREMENT_UNIT_WITHSLASH;
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isParen(String str) {
        return PARENS().contains(str);
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isMeasurementUnit(String str) {
        return MEASUREMENT_UNIT_WITHSLASH().matcher(str).matches();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isComplex(String str) {
        return COMPLEX().matcher(str).matches();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isMutant(String str) {
        return MUTANT().matcher(str).matches();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isValidProtein(String str) {
        return Pattern.compile(VALID_PROTEIN(), 2).matcher(str).matches();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$countConnectingTokens(PostProcessorToken[] postProcessorTokenArr, int i, MutableNumber<Object> mutableNumber) {
        int i2 = i;
        while (true) {
            int i3 = i2;
            if (i3 >= postProcessorTokenArr.length - 1 || !isConnectingToken(postProcessorTokenArr[i3].word()) || postProcessorTokenArr[i3 - 1].endPosition() != postProcessorTokenArr[i3].beginPosition() || postProcessorTokenArr[i3].endPosition() != postProcessorTokenArr[i3 + 1].beginPosition()) {
                break;
            }
            mutableNumber.value_$eq(BoxesRunTime.boxToInteger(BoxesRunTime.unboxToInt(mutableNumber.value()) + 1));
            i2 = i3 + 1;
        }
        return BoxesRunTime.unboxToInt(mutableNumber.value()) > 0;
    }

    private boolean isConnectingToken(String str) {
        return Pattern.compile("[\\-/]|\\-\\d+", 2).matcher(str).matches() || Pattern.compile("^\\-\\d+[\\-/]", 2).matcher(str).find();
    }

    private Pattern mkDashSuffixes() {
        return Pattern.compile("([\\w/]+)(-)(" + makeRegexOr(VALID_DASH_SUFFIXES()) + ")", 2);
    }

    private String makeRegexOr(Set<String> set) {
        StringBuilder stringBuilder = new StringBuilder();
        set.foreach(str -> {
            if (stringBuilder.nonEmpty()) {
                stringBuilder.append("|");
            } else {
                BoxedUnit boxedUnit = BoxedUnit.UNIT;
            }
            return stringBuilder.append("(" + str + ")");
        });
        return stringBuilder.toString();
    }

    public Set<String> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$loadTokensWithValidSlash(Seq<String> seq) {
        HashSet hashSet = new HashSet();
        seq.foreach(str -> {
            $anonfun$loadTokensWithValidSlash$1(hashSet, str);
            return BoxedUnit.UNIT;
        });
        return hashSet.toSet();
    }

    public static final /* synthetic */ void $anonfun$loadTokensWithValidSlash$1(HashSet hashSet, String str) {
        BufferedReader loadStreamFromClasspath = Files$.MODULE$.loadStreamFromClasspath(str);
        boolean z = false;
        while (!z) {
            String readLine = loadStreamFromClasspath.readLine();
            if (readLine == null) {
                z = true;
            } else {
                String trim = readLine.trim();
                if (!trim.startsWith("#")) {
                    new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps(trim.split("\t")[0].split("\\s+"))).foreach(str2 -> {
                        return new StringOps(Predef$.MODULE$.augmentString(str2)).contains(BoxesRunTime.boxToCharacter('/')) ? hashSet.$plus$eq(str2.toLowerCase()) : BoxedUnit.UNIT;
                    });
                }
            }
        }
        loadStreamFromClasspath.close();
    }

    private BioTokenizerPostProcessor$() {
        MODULE$ = this;
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$DISCARD_STANDALONE_DASHES = true;
        this.VALID_DASH_SUFFIXES = Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{"\\w+ed", "\\w+ing", "\\w+ation", "(in)?dependent", "deficient", "response", "protein", "by", "specific", "like", "inducible", "responsive", "gene", "mRNA", "transcription", "cytoplasmic", "sensitive", "bound", "driven", "positive", "negative", "dominant", "family", "resistant", "activity", "proximal", "defective", "selective", "reporter", "fragment", "rich", "expression", "mechanisms?", "agonist", "heterozygous", "homozygous"}));
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$COMMON_PREFIXES = Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{"anti", "auto", "bi", "co", "de", "dis", "down", "extra", "homo", "hetero", "hyper", "macro", "micro", "mono", "omni", "over", "poly", "pre", "post", "re", "semi", "sub", "super", "trans", "under", "up"}));
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$dashSuffixes = mkDashSuffixes();
        this.VALID_PROTEIN = "[a-z][\\w\\-][\\w\\-]+";
        this.VALID_PROTEIN_NO_DASH = "[a-z][\\w][\\w]+";
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$VALID_COMPLEX_SEPARATOR_PATTERN = Pattern.compile("[/\\-]");
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLESLASH_PATTERN = Pattern.compile(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")(/)(", ")"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{VALID_PROTEIN(), VALID_PROTEIN()})), 2);
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLEDASH_PATTERN = Pattern.compile(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")(\\\\-)(", ")"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{VALID_PROTEIN_NO_DASH(), VALID_PROTEIN_NO_DASH()})), 2);
        this.SITE1_PATTERN = Pattern.compile("[ACDEFGHIKLMNQRSTVWY]\\d+", 2);
        this.SITE2 = "glycine|phenylalanine|leucine|serine|tyrosine|cysteine|tryptophan|proline|histidine|arginine|soleucine|methionine|threonine|asparagine|lysine|serine|arginine|valine|alanine|aspartate|glutamate|glycine";
        this.SITE3 = "Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val";
        this.SITE2_PATTERN = Pattern.compile(SITE2(), 2);
        this.SITE3_PATTERN = Pattern.compile(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")\\\\d*"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{SITE3()})), 2);
        this.MUTATION1_PATTERN = Pattern.compile("[ACDEFGHIKLMNQRSTVWY]\\d+[ACDEFGHIKLMNPQRSTVWY]", 2);
        this.MUTATION2_PATTERN = Pattern.compile("P\\d+[ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy]");
        this.MUTATION3_PATTERN = Pattern.compile("(Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val)\\d+(Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val)", 2);
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$MODIFICATIONS = Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new Pattern[]{SITE1_PATTERN(), SITE2_PATTERN(), SITE3_PATTERN(), MUTATION1_PATTERN(), MUTATION2_PATTERN(), MUTATION3_PATTERN()}));
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$sitePrefixes = Pattern.compile(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", "|", ")(-)([\\\\w/]+)"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{SITE2(), SITE3()})), 2);
        this.PARENS = Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")", "[", "]"}));
        this.COMPLEX = Pattern.compile("complex|dimer|heterodimer");
        this.MUTANT = Pattern.compile("mutant|mutants|mutation|mutations");
        this.MEASUREMENT_UNIT_WITHSLASH = Pattern.compile("\\w+/(ml|l|cm|m)", 2);
    }
}
