package ai.idylnlp.nlp.tokenizers;

import ai.idylnlp.model.nlp.Span;
import ai.idylnlp.model.nlp.Stemmer;
import ai.idylnlp.model.nlp.Tokenizer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import opennlp.tools.util.StringUtil;

/* loaded from: input_file:ai/idylnlp/nlp/tokenizers/WhitespaceTokenizer.class */
public class WhitespaceTokenizer implements Tokenizer {
    public static final WhitespaceTokenizer INSTANCE = new WhitespaceTokenizer();

    private WhitespaceTokenizer() {
    }

    public List<String> getLanguageCodes() {
        return Collections.EMPTY_LIST;
    }

    public String[] tokenize(String str) {
        return Span.spansToStrings(tokenizePos(str), str);
    }

    public String[] tokenize(String str, Stemmer stemmer) {
        String[] strArr = tokenize(str);
        for (int i = 0; i < strArr.length; i++) {
            strArr[i] = stemmer.stem(strArr[i]);
        }
        return strArr;
    }

    public Span[] tokenizePos(String str) {
        int i = -1;
        ArrayList arrayList = new ArrayList();
        boolean z = false;
        int length = str.length();
        for (int i2 = 0; i2 < length; i2++) {
            if (StringUtil.isWhitespace(str.charAt(i2))) {
                if (z) {
                    arrayList.add(new Span(i, i2));
                    z = false;
                    i = -1;
                }
            } else if (!z) {
                i = i2;
                z = true;
            }
        }
        if (z) {
            arrayList.add(new Span(i, length));
        }
        return (Span[]) arrayList.toArray(new Span[arrayList.size()]);
    }
}
