package org.clulab.processors.clu.tokenizer;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import scala.Predef$;
import scala.collection.StringOps$;
import scala.collection.mutable.StringBuilder;
import scala.runtime.BoxedUnit;
import scala.util.matching.Regex;

/* compiled from: SentenceSplitter.scala */
/* loaded from: input_file:org/clulab/processors/clu/tokenizer/SentenceSplitter$.class */
public final class SentenceSplitter$ {
    public static final SentenceSplitter$ MODULE$ = new SentenceSplitter$();
    private static final Regex EOS = StringOps$.MODULE$.r$extension(Predef$.MODULE$.augmentString("^[\\.!\\?\\s]+$"));
    private static final Regex EOS_FOLLOWEDBY_BULLET = StringOps$.MODULE$.r$extension(Predef$.MODULE$.augmentString("\\.\\d+$"));
    private static final Regex IS_ENGLISH_ABBREVIATION = MODULE$.loadDictionary("org/clulab/processors/clu/tokenizer/english.abbreviations");
    private static final Regex IS_ENGLISH_SENTSTART = MODULE$.loadDictionary("org/clulab/processors/clu/tokenizer/english.sentstarts");
    private static final Regex IS_PORTUGUESE_ABBREVIATION = MODULE$.loadDictionary("org/clulab/processors/clu/tokenizer/portuguese.abbreviations");
    private static final Regex IS_PORTUGUESE_SENTSTART = MODULE$.loadDictionary("org/clulab/processors/clu/tokenizer/portuguese.sentstarts");
    private static final Regex IS_SPANISH_ABBREVIATION = MODULE$.loadDictionary("org/clulab/processors/clu/tokenizer/spanish.abbreviations");
    private static final Regex IS_SPANISH_SENTSTART = MODULE$.loadDictionary("org/clulab/processors/clu/tokenizer/spanish.sentstarts");

    public Regex EOS() {
        return EOS;
    }

    public Regex EOS_FOLLOWEDBY_BULLET() {
        return EOS_FOLLOWEDBY_BULLET;
    }

    public Regex IS_ENGLISH_ABBREVIATION() {
        return IS_ENGLISH_ABBREVIATION;
    }

    public Regex IS_ENGLISH_SENTSTART() {
        return IS_ENGLISH_SENTSTART;
    }

    public Regex IS_PORTUGUESE_ABBREVIATION() {
        return IS_PORTUGUESE_ABBREVIATION;
    }

    public Regex IS_PORTUGUESE_SENTSTART() {
        return IS_PORTUGUESE_SENTSTART;
    }

    public Regex IS_SPANISH_ABBREVIATION() {
        return IS_SPANISH_ABBREVIATION;
    }

    public Regex IS_SPANISH_SENTSTART() {
        return IS_SPANISH_SENTSTART;
    }

    private Regex loadDictionary(String str) {
        InputStream resourceAsStream = getClass().getClassLoader().getResourceAsStream(str);
        Predef$.MODULE$.assert(resourceAsStream != null, () -> {
            return new StringBuilder(42).append("Failed to find resource ").append(str).append(" in the classpath!").toString();
        });
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(resourceAsStream));
        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append("^(");
        boolean z = false;
        boolean z2 = true;
        while (!z) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                z = true;
            } else if (!readLine.startsWith("#")) {
                if (z2) {
                    BoxedUnit boxedUnit = BoxedUnit.UNIT;
                } else {
                    stringBuilder.append("|");
                }
                stringBuilder.append(normalizeSpecialChars(readLine.trim()));
                z2 = false;
            }
        }
        stringBuilder.append(")$");
        bufferedReader.close();
        return StringOps$.MODULE$.r$extension(Predef$.MODULE$.augmentString(stringBuilder.toString()));
    }

    private String normalizeSpecialChars(String str) {
        return str.replaceAll("\\.", "\\\\.").replaceAll("\\-", "\\\\-");
    }

    private SentenceSplitter$() {
    }
}
