package org.clulab.processors.bionlp;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import java.util.regex.Pattern;
import org.clulab.struct.MutableNumber;
import scala.Predef$;
import scala.StringContext;
import scala.collection.immutable.Set;
import scala.collection.mutable.StringBuilder;
import scala.runtime.BoxesRunTime;

/* compiled from: BioNLPTokenizerPostProcessor.scala */
/* loaded from: input_file:org/clulab/processors/bionlp/BioNLPTokenizerPostProcessor$.class */
public final class BioNLPTokenizerPostProcessor$ {
    public static final BioNLPTokenizerPostProcessor$ MODULE$ = null;
    private final CoreLabelTokenFactory tokenFactory;
    private final boolean DISCARD_STANDALONE_DASHES;
    private final Set<String> VALID_DASH_SUFFIXES;
    private final Set<String> COMMON_PREFIXES;
    private final Pattern dashSuffixes;
    private final String VALID_PROTEIN;
    private final String VALID_PROTEIN_NO_DASH;
    private final Pattern VALID_COMPLEX_SEPARATOR_PATTERN;
    private final Pattern SINGLESLASH_PATTERN;
    private final Pattern SINGLEDASH_PATTERN;
    private final Pattern SITE1_PATTERN;
    private final String SITE2;
    private final String SITE3;
    private final Pattern SITE2_PATTERN;
    private final Pattern SITE3_PATTERN;
    private final Pattern MUTATION1_PATTERN;
    private final Pattern MUTATION2_PATTERN;
    private final Pattern MUTATION3_PATTERN;
    private final Set<Pattern> MODIFICATIONS;
    private final Pattern sitePrefixes;
    private final Set<String> PARENS;
    private final Pattern COMPLEX;
    private final Pattern MUTANT;
    private final Pattern MEASUREMENT_UNIT_WITHSLASH;

    static {
        new BioNLPTokenizerPostProcessor$();
    }

    public CoreLabelTokenFactory tokenFactory() {
        return this.tokenFactory;
    }

    public boolean DISCARD_STANDALONE_DASHES() {
        return this.DISCARD_STANDALONE_DASHES;
    }

    public Set<String> VALID_DASH_SUFFIXES() {
        return this.VALID_DASH_SUFFIXES;
    }

    public Set<String> COMMON_PREFIXES() {
        return this.COMMON_PREFIXES;
    }

    public Pattern dashSuffixes() {
        return this.dashSuffixes;
    }

    public String VALID_PROTEIN() {
        return this.VALID_PROTEIN;
    }

    public String VALID_PROTEIN_NO_DASH() {
        return this.VALID_PROTEIN_NO_DASH;
    }

    public Pattern VALID_COMPLEX_SEPARATOR_PATTERN() {
        return this.VALID_COMPLEX_SEPARATOR_PATTERN;
    }

    public Pattern SINGLESLASH_PATTERN() {
        return this.SINGLESLASH_PATTERN;
    }

    public Pattern SINGLEDASH_PATTERN() {
        return this.SINGLEDASH_PATTERN;
    }

    public Pattern SITE1_PATTERN() {
        return this.SITE1_PATTERN;
    }

    public String SITE2() {
        return this.SITE2;
    }

    public String SITE3() {
        return this.SITE3;
    }

    public Pattern SITE2_PATTERN() {
        return this.SITE2_PATTERN;
    }

    public Pattern SITE3_PATTERN() {
        return this.SITE3_PATTERN;
    }

    public Pattern MUTATION1_PATTERN() {
        return this.MUTATION1_PATTERN;
    }

    public Pattern MUTATION2_PATTERN() {
        return this.MUTATION2_PATTERN;
    }

    public Pattern MUTATION3_PATTERN() {
        return this.MUTATION3_PATTERN;
    }

    public Set<Pattern> MODIFICATIONS() {
        return this.MODIFICATIONS;
    }

    public Pattern sitePrefixes() {
        return this.sitePrefixes;
    }

    public Set<String> PARENS() {
        return this.PARENS;
    }

    public Pattern COMPLEX() {
        return this.COMPLEX;
    }

    public Pattern MUTANT() {
        return this.MUTANT;
    }

    public Pattern MEASUREMENT_UNIT_WITHSLASH() {
        return this.MEASUREMENT_UNIT_WITHSLASH;
    }

    public boolean isParen(String str) {
        return PARENS().contains(str);
    }

    public boolean isMeasurementUnit(String str) {
        return MEASUREMENT_UNIT_WITHSLASH().matcher(str).matches();
    }

    public boolean isComplex(String str) {
        return COMPLEX().matcher(str).matches();
    }

    public boolean isMutant(String str) {
        return MUTANT().matcher(str).matches();
    }

    public boolean isValidProtein(String str) {
        return Pattern.compile(VALID_PROTEIN(), 2).matcher(str).matches();
    }

    public boolean countConnectingTokens(CoreLabel[] coreLabelArr, int i, MutableNumber<Object> mutableNumber) {
        int i2 = i;
        while (true) {
            int i3 = i2;
            if (i3 >= coreLabelArr.length - 1 || !isConnectingToken(coreLabelArr[i3].word()) || coreLabelArr[i3 - 1].endPosition() != coreLabelArr[i3].beginPosition() || coreLabelArr[i3].endPosition() != coreLabelArr[i3 + 1].beginPosition()) {
                break;
            }
            mutableNumber.value_$eq(BoxesRunTime.boxToInteger(BoxesRunTime.unboxToInt(mutableNumber.value()) + 1));
            i2 = i3 + 1;
        }
        return BoxesRunTime.unboxToInt(mutableNumber.value()) > 0;
    }

    public boolean isConnectingToken(String str) {
        return Pattern.compile("[\\-/]|\\-\\d+", 2).matcher(str).matches() || Pattern.compile("^\\-\\d+[\\-/]", 2).matcher(str).find();
    }

    public Pattern mkDashSuffixes() {
        return Pattern.compile(new StringBuilder().append("([\\w/]+)(-)(").append(makeRegexOr(VALID_DASH_SUFFIXES())).append(")").toString(), 2);
    }

    public String makeRegexOr(Set<String> set) {
        StringBuilder stringBuilder = new StringBuilder();
        set.foreach(new BioNLPTokenizerPostProcessor$$anonfun$makeRegexOr$1(stringBuilder));
        return stringBuilder.toString();
    }

    private BioNLPTokenizerPostProcessor$() {
        MODULE$ = this;
        this.tokenFactory = new CoreLabelTokenFactory();
        this.DISCARD_STANDALONE_DASHES = true;
        this.VALID_DASH_SUFFIXES = Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{"\\w+ed", "\\w+ing", "\\w+ation", "(in)?dependent", "deficient", "response", "protein", "by", "specific", "like", "inducible", "responsive", "gene", "mRNA", "transcription", "cytoplasmic", "sensitive", "bound", "driven", "positive", "negative", "dominant", "family", "resistant", "activity", "proximal", "defective", "selective", "reporter", "fragment", "rich", "expression", "mechanisms?", "agonist", "heterozygous", "homozygous"}));
        this.COMMON_PREFIXES = Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{"anti", "auto", "bi", "co", "de", "dis", "down", "extra", "homo", "hetero", "hyper", "macro", "micro", "mono", "omni", "over", "poly", "pre", "post", "re", "semi", "sub", "super", "trans", "under", "up"}));
        this.dashSuffixes = mkDashSuffixes();
        this.VALID_PROTEIN = "[a-z][\\w\\-][\\w\\-]+";
        this.VALID_PROTEIN_NO_DASH = "[a-z][\\w][\\w]+";
        this.VALID_COMPLEX_SEPARATOR_PATTERN = Pattern.compile("[/\\-]");
        this.SINGLESLASH_PATTERN = Pattern.compile(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")(/)(", ")"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{VALID_PROTEIN(), VALID_PROTEIN()})), 2);
        this.SINGLEDASH_PATTERN = Pattern.compile(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")(\\\\-)(", ")"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{VALID_PROTEIN_NO_DASH(), VALID_PROTEIN_NO_DASH()})), 2);
        this.SITE1_PATTERN = Pattern.compile("[ACDEFGHIKLMNQRSTVWY]\\d+", 2);
        this.SITE2 = "glycine|phenylalanine|leucine|serine|tyrosine|cysteine|tryptophan|proline|histidine|arginine|soleucine|methionine|threonine|asparagine|lysine|serine|arginine|valine|alanine|aspartate|glutamate|glycine";
        this.SITE3 = "Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val";
        this.SITE2_PATTERN = Pattern.compile(SITE2(), 2);
        this.SITE3_PATTERN = Pattern.compile(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")\\\\d*"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{SITE3()})), 2);
        this.MUTATION1_PATTERN = Pattern.compile("[ACDEFGHIKLMNQRSTVWY]\\d+[ACDEFGHIKLMNPQRSTVWY]", 2);
        this.MUTATION2_PATTERN = Pattern.compile("P\\d+[ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy]");
        this.MUTATION3_PATTERN = Pattern.compile("(Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val)\\d+(Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val)", 2);
        this.MODIFICATIONS = Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new Pattern[]{SITE1_PATTERN(), SITE2_PATTERN(), SITE3_PATTERN(), MUTATION1_PATTERN(), MUTATION2_PATTERN(), MUTATION3_PATTERN()}));
        this.sitePrefixes = Pattern.compile(new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"(", "|", ")(-)([\\\\w/]+)"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{SITE2(), SITE3()})), 2);
        this.PARENS = Predef$.MODULE$.Set().apply(Predef$.MODULE$.wrapRefArray(new String[]{"(", ")", "[", "]"}));
        this.COMPLEX = Pattern.compile("complex|dimer|heterodimer");
        this.MUTANT = Pattern.compile("mutant|mutants|mutation|mutations");
        this.MEASUREMENT_UNIT_WITHSLASH = Pattern.compile("\\w+/(ml|l|cm|m)", 2);
    }
}
