package net.sf.okapi.steps.tokenization.engine;

import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.RuleBasedBreakIterator;
import com.ibm.icu.util.ULocale;
import java.util.Iterator;
import java.util.TreeMap;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Util;
import net.sf.okapi.steps.tokenization.common.AbstractLexer;
import net.sf.okapi.steps.tokenization.common.Lexem;
import net.sf.okapi.steps.tokenization.common.Lexems;
import net.sf.okapi.steps.tokenization.common.LexerRule;
import net.sf.okapi.steps.tokenization.tokens.Tokens;

/* loaded from: input_file:net/sf/okapi/steps/tokenization/engine/RbbiLexer.class */
public class RbbiLexer extends AbstractLexer {
    private TreeMap<LocaleId, RuleBasedBreakIterator> iterators = new TreeMap<>();
    private RuleBasedBreakIterator iterator = null;
    private int start;
    private int end;
    private String text;

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    public void lexer_init() {
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    public boolean lexer_hasNext() {
        return this.end != -1;
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    public Lexem lexer_next() {
        this.end = this.iterator.next();
        if (this.end == -1 || this.start >= this.end) {
            return null;
        }
        Lexem lexem = new Lexem(this.iterator.getRuleStatus(), this.text.substring(this.start, this.end), this.start, this.end);
        this.start = this.end;
        return lexem;
    }

    public static String formatRule(String str, String str2, String str3, String str4, int i) {
        return Util.normalizeNewlines(str).replaceFirst("(\\.;)", String.format("%s$0", String.format("\\$%s = %s;", str2, str4.replace("\\", "\\\\")))).replaceFirst("(\\.;)", String.format("%s$0", String.format("\\$%s {%d};", str2, Integer.valueOf(i))));
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    public void lexer_open(String str, LocaleId localeId, Tokens tokens) {
        if (Util.isEmpty(str)) {
            cancel();
            return;
        }
        this.text = str;
        if (this.iterators.containsKey(localeId)) {
            this.iterator = this.iterators.get(localeId);
        } else {
            this.iterator = BreakIterator.getWordInstance(ULocale.createCanonical(localeId.toString()));
            String ruleBasedBreakIterator = this.iterator.toString();
            Iterator<LexerRule> it = getRules().iterator();
            while (it.hasNext()) {
                LexerRule next = it.next();
                boolean isEmpty = Util.isEmpty(next.getPattern());
                if (checkRule(next, localeId) && !isEmpty) {
                    ruleBasedBreakIterator = formatRule(ruleBasedBreakIterator, next.getName(), next.getDescription(), next.getPattern(), next.getLexemId());
                }
            }
            this.iterator = new RuleBasedBreakIterator(ruleBasedBreakIterator);
            this.iterators.put(localeId, this.iterator);
        }
        if (this.iterator == null) {
            return;
        }
        this.iterator.setText(str);
        this.start = this.iterator.first();
        this.end = this.start;
    }

    @Override // net.sf.okapi.steps.tokenization.common.ILexer
    public Lexems process(String str, LocaleId localeId, Tokens tokens) {
        return null;
    }
}
