/*
 * Decompiled with CFR 0.152.
 */
package org.clulab.processors.clu.bio;

import java.io.BufferedReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.clulab.processors.clu.bio.PostProcessorToken;
import org.clulab.struct.MutableNumber;
import org.clulab.utils.Files$;
import scala.Function1;
import scala.Predef$;
import scala.Serializable;
import scala.StringContext;
import scala.collection.Seq;
import scala.collection.immutable.Set;
import scala.collection.immutable.StringOps;
import scala.collection.mutable.ArrayOps;
import scala.collection.mutable.HashSet;
import scala.collection.mutable.StringBuilder;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

public final class BioTokenizerPostProcessor$ {
    public static BioTokenizerPostProcessor$ MODULE$;
    private final boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$DISCARD_STANDALONE_DASHES;
    private final Set<String> VALID_DASH_SUFFIXES;
    private final Set<String> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$COMMON_PREFIXES;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$dashSuffixes;
    private final String VALID_PROTEIN;
    private final String VALID_PROTEIN_NO_DASH;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$VALID_COMPLEX_SEPARATOR_PATTERN;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLESLASH_PATTERN;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLEDASH_PATTERN;
    private final Pattern SITE1_PATTERN;
    private final String SITE2;
    private final String SITE3;
    private final Pattern SITE2_PATTERN;
    private final Pattern SITE3_PATTERN;
    private final Pattern MUTATION1_PATTERN;
    private final Pattern MUTATION2_PATTERN;
    private final Pattern MUTATION3_PATTERN;
    private final Set<Pattern> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$MODIFICATIONS;
    private final Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$sitePrefixes;
    private final Set<String> PARENS;
    private final Pattern COMPLEX;
    private final Pattern MUTANT;
    private final Pattern MEASUREMENT_UNIT_WITHSLASH;

    static {
        new BioTokenizerPostProcessor$();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$DISCARD_STANDALONE_DASHES() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$DISCARD_STANDALONE_DASHES;
    }

    private Set<String> VALID_DASH_SUFFIXES() {
        return this.VALID_DASH_SUFFIXES;
    }

    public Set<String> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$COMMON_PREFIXES() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$COMMON_PREFIXES;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$dashSuffixes() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$dashSuffixes;
    }

    private String VALID_PROTEIN() {
        return this.VALID_PROTEIN;
    }

    private String VALID_PROTEIN_NO_DASH() {
        return this.VALID_PROTEIN_NO_DASH;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$VALID_COMPLEX_SEPARATOR_PATTERN() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$VALID_COMPLEX_SEPARATOR_PATTERN;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLESLASH_PATTERN() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLESLASH_PATTERN;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLEDASH_PATTERN() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLEDASH_PATTERN;
    }

    private Pattern SITE1_PATTERN() {
        return this.SITE1_PATTERN;
    }

    private String SITE2() {
        return this.SITE2;
    }

    private String SITE3() {
        return this.SITE3;
    }

    private Pattern SITE2_PATTERN() {
        return this.SITE2_PATTERN;
    }

    private Pattern SITE3_PATTERN() {
        return this.SITE3_PATTERN;
    }

    private Pattern MUTATION1_PATTERN() {
        return this.MUTATION1_PATTERN;
    }

    private Pattern MUTATION2_PATTERN() {
        return this.MUTATION2_PATTERN;
    }

    private Pattern MUTATION3_PATTERN() {
        return this.MUTATION3_PATTERN;
    }

    public Set<Pattern> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$MODIFICATIONS() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$MODIFICATIONS;
    }

    public Pattern org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$sitePrefixes() {
        return this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$sitePrefixes;
    }

    private Set<String> PARENS() {
        return this.PARENS;
    }

    private Pattern COMPLEX() {
        return this.COMPLEX;
    }

    private Pattern MUTANT() {
        return this.MUTANT;
    }

    private Pattern MEASUREMENT_UNIT_WITHSLASH() {
        return this.MEASUREMENT_UNIT_WITHSLASH;
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isParen(String s) {
        return this.PARENS().contains((Object)s);
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isMeasurementUnit(String s) {
        return this.MEASUREMENT_UNIT_WITHSLASH().matcher(s).matches();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isComplex(String word) {
        Matcher m = this.COMPLEX().matcher(word);
        return m.matches();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isMutant(String word) {
        Matcher m = this.MUTANT().matcher(word);
        return m.matches();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$isValidProtein(String word) {
        Matcher m = Pattern.compile(this.VALID_PROTEIN(), 2).matcher(word);
        return m.matches();
    }

    public boolean org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$countConnectingTokens(PostProcessorToken[] tokens, int offset, MutableNumber<Object> howManyConnecting) {
        for (int i = offset; i < tokens.length - 1 && this.isConnectingToken(tokens[i].word()) && tokens[i - 1].endPosition() == tokens[i].beginPosition() && tokens[i].endPosition() == tokens[i + 1].beginPosition(); ++i) {
            howManyConnecting.value_$eq(BoxesRunTime.boxToInteger((int)(BoxesRunTime.unboxToInt((Object)howManyConnecting.value()) + 1)));
        }
        return BoxesRunTime.unboxToInt((Object)howManyConnecting.value()) > 0;
    }

    private boolean isConnectingToken(String word) {
        if (Pattern.compile("[\\-/]|\\-\\d+", 2).matcher(word).matches()) {
            return true;
        }
        return Pattern.compile("^\\-\\d+[\\-/]", 2).matcher(word).find();
    }

    private Pattern mkDashSuffixes() {
        String allSuffixes = this.makeRegexOr(this.VALID_DASH_SUFFIXES());
        String allSuffixesRegex = "([\\w/]+)(-)(" + allSuffixes + ")";
        return Pattern.compile(allSuffixesRegex, 2);
    }

    private String makeRegexOr(Set<String> pieces) {
        StringBuilder suffixBuilder = new StringBuilder();
        pieces.foreach((Function1 & java.io.Serializable & Serializable)suffix -> {
            Object object = suffixBuilder.nonEmpty() ? suffixBuilder.append("|") : BoxedUnit.UNIT;
            return suffixBuilder.append("(" + suffix + ")");
        });
        return suffixBuilder.toString();
    }

    public Set<String> org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$loadTokensWithValidSlash(Seq<String> kbs) {
        HashSet specialTokens = new HashSet();
        kbs.foreach((Function1 & java.io.Serializable & Serializable)tkb -> {
            BioTokenizerPostProcessor$.$anonfun$loadTokensWithValidSlash$1(specialTokens, tkb);
            return BoxedUnit.UNIT;
        });
        return specialTokens.toSet();
    }

    public static final /* synthetic */ void $anonfun$loadTokensWithValidSlash$1(HashSet specialTokens$1, String tkb) {
        BufferedReader reader = Files$.MODULE$.loadStreamFromClasspath(tkb);
        boolean done = false;
        while (!done) {
            String line = reader.readLine();
            if (line == null) {
                done = true;
                continue;
            }
            String trimmed = line.trim();
            if (trimmed.startsWith("#")) continue;
            String name = trimmed.split("\t")[0];
            String[] tokens = name.split("\\s+");
            new ArrayOps.ofRef(Predef$.MODULE$.refArrayOps((Object[])tokens)).foreach((Function1 & java.io.Serializable & Serializable)token -> new StringOps(Predef$.MODULE$.augmentString(token)).contains((Object)BoxesRunTime.boxToCharacter((char)'/')) ? specialTokens$1.$plus$eq((Object)token.toLowerCase()) : BoxedUnit.UNIT);
        }
        reader.close();
    }

    private BioTokenizerPostProcessor$() {
        MODULE$ = this;
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$DISCARD_STANDALONE_DASHES = true;
        this.VALID_DASH_SUFFIXES = (Set)Predef$.MODULE$.Set().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"\\w+ed", "\\w+ing", "\\w+ation", "(in)?dependent", "deficient", "response", "protein", "by", "specific", "like", "inducible", "responsive", "gene", "mRNA", "transcription", "cytoplasmic", "sensitive", "bound", "driven", "positive", "negative", "dominant", "family", "resistant", "activity", "proximal", "defective", "selective", "reporter", "fragment", "rich", "expression", "mechanisms?", "agonist", "heterozygous", "homozygous"}));
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$COMMON_PREFIXES = (Set)Predef$.MODULE$.Set().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"anti", "auto", "bi", "co", "de", "dis", "down", "extra", "homo", "hetero", "hyper", "macro", "micro", "mono", "omni", "over", "poly", "pre", "post", "re", "semi", "sub", "super", "trans", "under", "up"}));
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$dashSuffixes = this.mkDashSuffixes();
        this.VALID_PROTEIN = "[a-z][\\w\\-][\\w\\-]+";
        this.VALID_PROTEIN_NO_DASH = "[a-z][\\w][\\w]+";
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$VALID_COMPLEX_SEPARATOR_PATTERN = Pattern.compile("[/\\-]");
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLESLASH_PATTERN = Pattern.compile(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"(", ")(/)(", ")"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.VALID_PROTEIN(), this.VALID_PROTEIN()})), 2);
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$SINGLEDASH_PATTERN = Pattern.compile(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"(", ")(\\\\-)(", ")"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.VALID_PROTEIN_NO_DASH(), this.VALID_PROTEIN_NO_DASH()})), 2);
        this.SITE1_PATTERN = Pattern.compile("[ACDEFGHIKLMNQRSTVWY]\\d+", 2);
        this.SITE2 = "glycine|phenylalanine|leucine|serine|tyrosine|cysteine|tryptophan|proline|histidine|arginine|soleucine|methionine|threonine|asparagine|lysine|serine|arginine|valine|alanine|aspartate|glutamate|glycine";
        this.SITE3 = "Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val";
        this.SITE2_PATTERN = Pattern.compile(this.SITE2(), 2);
        this.SITE3_PATTERN = Pattern.compile(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"(", ")\\\\d*"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.SITE3()})), 2);
        this.MUTATION1_PATTERN = Pattern.compile("[ACDEFGHIKLMNQRSTVWY]\\d+[ACDEFGHIKLMNPQRSTVWY]", 2);
        this.MUTATION2_PATTERN = Pattern.compile("P\\d+[ACDEFGHIKLMNPQRSTVWYacdefghiklmnpqrstvwy]");
        this.MUTATION3_PATTERN = Pattern.compile("(Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val)\\d+(Ala|Arg|Asn|Asp|Cys|Gln|Glu|Gly|His|Ile|Leu|Lys|Met|Phe|Pro|Ser|Thr|Trp|Tyr|Val)", 2);
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$MODIFICATIONS = (Set)Predef$.MODULE$.Set().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new Pattern[]{this.SITE1_PATTERN(), this.SITE2_PATTERN(), this.SITE3_PATTERN(), this.MUTATION1_PATTERN(), this.MUTATION2_PATTERN(), this.MUTATION3_PATTERN()}));
        this.org$clulab$processors$clu$bio$BioTokenizerPostProcessor$$sitePrefixes = Pattern.compile(new StringContext((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"(", "|", ")(-)([\\\\w/]+)"})).s((Seq)Predef$.MODULE$.genericWrapArray((Object)new Object[]{this.SITE2(), this.SITE3()})), 2);
        this.PARENS = (Set)Predef$.MODULE$.Set().apply((Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[]{"(", ")", "[", "]"}));
        this.COMPLEX = Pattern.compile("complex|dimer|heterodimer");
        this.MUTANT = Pattern.compile("mutant|mutants|mutation|mutations");
        this.MEASUREMENT_UNIT_WITHSLASH = Pattern.compile("\\w+/(ml|l|cm|m)", 2);
    }
}

