package de.versley.exml.annotators.nlp4j;

import edu.emory.mathcs.nlp.tokenization.EnglishTokenizer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import webcorp.tokens.Token;
import webcorp.tokens.TokenizerInterface;

/* loaded from: input_file:de/versley/exml/annotators/nlp4j/NLP4JTokenizer.class */
public class NLP4JTokenizer implements TokenizerInterface {
    EnglishTokenizer tokenizer = new EnglishTokenizer();

    @Override // webcorp.tokens.TokenizerInterface
    public List<Token> tokenize(String str, int i) {
        List segmentize = this.tokenizer.segmentize(str);
        ArrayList arrayList = new ArrayList();
        Iterator it = segmentize.iterator();
        while (it.hasNext()) {
            boolean z = true;
            for (edu.emory.mathcs.nlp.tokenization.Token token : (List) it.next()) {
                Token token2 = new Token();
                token2.value = token.getWordForm();
                token2.start = token.getStartOffset() + i;
                token2.end = token.getEndOffset() + i;
                if (z && token2.start != 0) {
                    token2.addFlag(8);
                }
                arrayList.add(token2);
                z = false;
            }
        }
        return arrayList;
    }
}
