package org.wikibrain.core.nlp;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
import org.wikibrain.core.lang.Language;

/* loaded from: input_file:org/wikibrain/core/nlp/StringTokenizer.class */
public class StringTokenizer {
    public List<String> getWords(Language language, String str) {
        ArrayList arrayList = new ArrayList();
        BreakIterator wordInstance = BreakIterator.getWordInstance(language.getLocale());
        wordInstance.setText(str);
        int first = wordInstance.first();
        int i = 0;
        while (true) {
            int i2 = i;
            if (first == -1) {
                return arrayList;
            }
            first = wordInstance.next();
            if (first != -1) {
                String substring = str.substring(i2, first);
                if (substring.length() > 0 && Character.isLetterOrDigit(substring.charAt(0))) {
                    arrayList.add(substring);
                }
            }
            i = first;
        }
    }

    public List<String> getSentences(Language language, String str) {
        ArrayList arrayList = new ArrayList();
        BreakIterator sentenceInstance = BreakIterator.getSentenceInstance(language.getLocale());
        sentenceInstance.setText(str);
        int first = sentenceInstance.first();
        int i = 0;
        while (true) {
            int i2 = i;
            if (first == -1) {
                return arrayList;
            }
            first = sentenceInstance.next();
            if (first != -1) {
                arrayList.add(str.substring(i2, first));
            }
            i = first;
        }
    }

    public List<Token> getSentenceTokens(Language language, String str) {
        ArrayList arrayList = new ArrayList();
        BreakIterator sentenceInstance = BreakIterator.getSentenceInstance(language.getLocale());
        sentenceInstance.setText(str);
        int first = sentenceInstance.first();
        int i = 0;
        while (true) {
            int i2 = i;
            if (first == -1) {
                return arrayList;
            }
            first = sentenceInstance.next();
            if (first != -1) {
                arrayList.add(new Token(i2, first, str));
            }
            i = first;
        }
    }

    public List<Token> getWordTokens(Language language, String str) {
        ArrayList arrayList = new ArrayList();
        BreakIterator wordInstance = BreakIterator.getWordInstance(language.getLocale());
        wordInstance.setText(str);
        int first = wordInstance.first();
        int i = 0;
        while (true) {
            int i2 = i;
            if (first == -1) {
                return arrayList;
            }
            first = wordInstance.next();
            if (first != -1) {
                String substring = str.substring(i2, first);
                if (substring.length() > 0 && Character.isLetterOrDigit(substring.charAt(0))) {
                    arrayList.add(new Token(i2, first, str));
                }
            }
            i = first;
        }
    }

    public List<Token> getWordTokens(Language language, Token token) {
        ArrayList arrayList = new ArrayList();
        BreakIterator wordInstance = BreakIterator.getWordInstance(language.getLocale());
        wordInstance.setText(token.getToken());
        int first = wordInstance.first();
        int i = 0;
        while (true) {
            int i2 = i;
            if (first == -1) {
                return arrayList;
            }
            first = wordInstance.next();
            if (first != -1) {
                String substring = token.getToken().substring(i2, first);
                if (substring.length() > 0 && Character.isLetterOrDigit(substring.charAt(0))) {
                    arrayList.add(new Token(i2 + token.getBegin(), first + token.getBegin(), token.getFullText()));
                }
            }
            i = first;
        }
    }
}
