package net.sf.okapi.steps.tokenization.engine;

import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.regex.Pattern;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.steps.tokenization.common.AbstractLexer;
import net.sf.okapi.steps.tokenization.common.InputTokenAnnotation;
import net.sf.okapi.steps.tokenization.common.Lexem;
import net.sf.okapi.steps.tokenization.common.Lexems;
import net.sf.okapi.steps.tokenization.common.LexerRule;
import net.sf.okapi.steps.tokenization.common.LexerRules;
import net.sf.okapi.steps.tokenization.common.RegexRule;
import net.sf.okapi.steps.tokenization.common.RegexRules;
import net.sf.okapi.steps.tokenization.common.Token;
import net.sf.okapi.steps.tokenization.tokens.Tokens;

/* loaded from: input_file:net/sf/okapi/steps/tokenization/engine/TokenMatcher.class */
public class TokenMatcher extends AbstractLexer {
    private LexerRules rules;
    private LinkedHashMap<LexerRule, Pattern> patterns;

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected Class<? extends LexerRules> lexer_getRulesClass() {
        return RegexRules.class;
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected boolean lexer_hasNext() {
        return false;
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected void lexer_init() {
        this.patterns = new LinkedHashMap<>();
        this.rules = getRules();
        Iterator<LexerRule> it = this.rules.iterator();
        while (it.hasNext()) {
            RegexRule regexRule = (RegexRule) it.next();
            Pattern pattern = null;
            if (regexRule.getPattern() != null) {
                pattern = Pattern.compile(regexRule.getPattern(), regexRule.getRegexOptions());
            }
            this.patterns.put(regexRule, pattern);
        }
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected Lexem lexer_next() {
        return null;
    }

    @Override // net.sf.okapi.steps.tokenization.common.AbstractLexer
    protected void lexer_open(String str, LocaleId localeId, Tokens tokens) {
    }

    @Override // net.sf.okapi.steps.tokenization.common.ILexer
    public Lexems process(String str, LocaleId localeId, Tokens tokens) {
        Lexems lexems = new Lexems();
        Iterator<LexerRule> it = this.rules.iterator();
        while (it.hasNext()) {
            RegexRule regexRule = (RegexRule) it.next();
            if (checkRule(regexRule, localeId)) {
                List<Integer> inTokenIDs = regexRule.getInTokenIDs();
                Pattern pattern = this.patterns.get(regexRule);
                if (pattern != null) {
                    Iterator<Token> it2 = tokens.iterator();
                    while (it2.hasNext()) {
                        Token next = it2.next();
                        if (inTokenIDs.contains(Integer.valueOf(next.getTokenId())) && pattern.matcher(next.getValue()).matches()) {
                            Lexem lexem = new Lexem(regexRule.getLexemId(), next.getValue(), next.getRange());
                            lexem.setAnnotation(new InputTokenAnnotation(next));
                            lexems.add(lexem);
                            if (!regexRule.getKeepInput()) {
                                next.delete();
                            }
                        }
                    }
                }
            }
        }
        return lexems;
    }
}
