package de.versley.exml.annotators;

import com.google.re2j.Pattern;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import webcorp.tokens.Token;
import webcorp.tokens.TokenizerInterface;

/* loaded from: input_file:de/versley/exml/annotators/CoreNLPTokenizer.class */
public class CoreNLPTokenizer implements TokenizerInterface {
    private static Pattern gluedToken = Pattern.compile("[a-z]+\\.[A-Z][a-z]*");
    public final String language = "en";
    private WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor<>();

    private static List<CoreLabel> splitGluedCore(List<CoreLabel> list, CoreLabelTokenFactory coreLabelTokenFactory) {
        ArrayList arrayList = new ArrayList(list.size());
        for (CoreLabel coreLabel : list) {
            String string = coreLabel.getString(CoreAnnotations.TextAnnotation.class);
            if (gluedToken.matches(string)) {
                int indexOf = string.indexOf(46);
                CoreLabel makeToken = coreLabelTokenFactory.makeToken(string.substring(0, indexOf), coreLabel.beginPosition(), indexOf);
                CoreLabel makeToken2 = coreLabelTokenFactory.makeToken(".", coreLabel.beginPosition() + indexOf, 1);
                String substring = string.substring(indexOf + 1);
                CoreLabel makeToken3 = coreLabelTokenFactory.makeToken(substring, coreLabel.beginPosition() + indexOf + 1, substring.length());
                arrayList.add(makeToken);
                arrayList.add(makeToken2);
                arrayList.add(makeToken3);
            } else {
                arrayList.add(coreLabel);
            }
        }
        return arrayList;
    }

    @Override // webcorp.tokens.TokenizerInterface
    public List<Token> tokenize(String str, int i) {
        ArrayList arrayList = new ArrayList();
        CoreLabelTokenFactory coreLabelTokenFactory = new CoreLabelTokenFactory();
        if (!"en".equals("en")) {
            throw new IllegalArgumentException("No model for language: en");
        }
        Iterator it = this.wts.process(splitGluedCore(new PTBTokenizer(new StringReader(str), coreLabelTokenFactory, "").tokenize(), coreLabelTokenFactory)).iterator();
        while (it.hasNext()) {
            boolean z = true;
            for (CoreLabel coreLabel : (List) it.next()) {
                Token token = new Token();
                token.value = coreLabel.getString(CoreAnnotations.TextAnnotation.class);
                token.start = coreLabel.beginPosition();
                token.end = coreLabel.endPosition();
                arrayList.add(token);
                if (z && token.start != 0) {
                    token.addFlag(8);
                }
                z = false;
            }
        }
        return arrayList;
    }
}
