package net.sf.okapi.steps.tokenization;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.ListUtil;
import net.sf.okapi.common.LocaleId;
import net.sf.okapi.common.Range;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.resource.ITextUnit;
import net.sf.okapi.common.resource.TextContainer;
import net.sf.okapi.common.resource.TextUnitUtil;
import net.sf.okapi.lib.extra.Notification;
import net.sf.okapi.lib.extra.steps.AbstractPipelineStep;
import net.sf.okapi.steps.tokenization.common.Config;
import net.sf.okapi.steps.tokenization.common.ILexer;
import net.sf.okapi.steps.tokenization.common.Lexem;
import net.sf.okapi.steps.tokenization.common.Lexems;
import net.sf.okapi.steps.tokenization.common.LexerRule;
import net.sf.okapi.steps.tokenization.common.LexerRules;
import net.sf.okapi.steps.tokenization.common.StructureParameters;
import net.sf.okapi.steps.tokenization.common.StructureParametersItem;
import net.sf.okapi.steps.tokenization.common.Token;
import net.sf.okapi.steps.tokenization.common.TokensAnnotation;
import net.sf.okapi.steps.tokenization.tokens.Tokens;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters(Parameters.class)
/* loaded from: input_file:net/sf/okapi/steps/tokenization/TokenizationStep.class */
public class TokenizationStep extends AbstractPipelineStep {
    public static final int RAWTEXT = -1;
    private Parameters params;
    private StructureParameters structureParams;
    private final Logger logger = LoggerFactory.getLogger(getClass());
    private Config config = new Config();
    private List<ILexer> lexers = new ArrayList();
    private List<ILexer> serviceLexers = new ArrayList();
    private boolean allowNewRawText = true;
    private List<LexerRule> idleRules = new ArrayList();
    private ArrayList<Integer> positions = new ArrayList<>();
    private LinkedList<Lexem> rawtextLexems = new LinkedList<>();

    public TokenizationStep() {
        setName("Tokenization");
        setDescription("Extracts tokens from the text units content of a document. Expects: filter events. Sends back: filter events.");
        setConfiguration(getClass(), "config.tprm");
    }

    public void setConfiguration(Class<?> cls, String str) {
        if (this.config == null) {
            this.config = new Config();
        }
        if (this.config == null) {
            return;
        }
        this.config.loadFromResource(cls, str);
        this.structureParams = new StructureParameters();
        if (this.structureParams == null) {
            return;
        }
        String engineConfig = this.config.getEngineConfig();
        if (Util.isEmpty(engineConfig) || !this.structureParams.loadFromResource(cls, engineConfig)) {
            this.logger.debug("Lexers' config file not found.");
        }
        instantiateLexers();
        setParameters(new Parameters());
    }

    private void instantiateLexers() {
        ILexer iLexer;
        LexerRules rules;
        if (this.lexers == null) {
            return;
        }
        this.lexers.clear();
        this.serviceLexers.clear();
        for (StructureParametersItem structureParametersItem : this.structureParams.getItems()) {
            if (structureParametersItem != null) {
                try {
                    if (structureParametersItem.isEnabled() && (iLexer = (ILexer) Class.forName(structureParametersItem.getLexerClass()).newInstance()) != null && ((rules = iLexer.getRules()) == null || rules.loadFromResource(iLexer.getClass(), structureParametersItem.getRulesLocation()))) {
                        iLexer.init();
                        if (rules == null) {
                            this.serviceLexers.add(iLexer);
                        } else if (rules.hasOutTokens()) {
                            this.lexers.add(iLexer);
                        } else {
                            this.serviceLexers.add(iLexer);
                        }
                    }
                } catch (ClassNotFoundException | IllegalAccessException | InstantiationException e) {
                    this.logger.debug("Lexer instantiation falied: {}", e.getMessage());
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.lib.extra.OkapiComponent
    public void component_init() {
        updateParameters();
        setFilters();
    }

    private void updateParameters() {
        this.params = (Parameters) getParameters(Parameters.class);
    }

    @Override // net.sf.okapi.lib.extra.Component, net.sf.okapi.lib.extra.INotifiable
    public boolean exec(Object obj, String str, Object obj2) {
        if (super.exec(obj, str, obj2)) {
            return true;
        }
        if (!str.equalsIgnoreCase(Notification.PARAMETERS_CHANGED)) {
            return false;
        }
        updateParameters();
        setFilters();
        return true;
    }

    private void setFilters() {
        if (this.params == null || this.idleRules == null) {
            return;
        }
        this.idleRules.clear();
        for (ILexer iLexer : this.lexers) {
            if (iLexer != null && iLexer.getRules() != null) {
                Iterator<LexerRule> it = iLexer.getRules().iterator();
                while (it.hasNext()) {
                    LexerRule next = it.next();
                    if (!next.isEnabled()) {
                        this.idleRules.add(next);
                    }
                    if (next.getInTokenIDs().size() == 0 && next.getOutTokenIDs().size() == 0 && next.getUserTokenIDs().size() == 0) {
                        this.idleRules.add(next);
                    }
                }
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // net.sf.okapi.lib.extra.steps.AbstractPipelineStep
    public Event handleTextUnit(Event event) {
        ITextUnit textUnit;
        Event handleTextUnit = super.handleTextUnit(event);
        if (handleTextUnit != null && (textUnit = handleTextUnit.getTextUnit()) != null && !textUnit.isEmpty() && textUnit.isTranslatable()) {
            if (this.params.tokenizeSource) {
                tokenizeSource(textUnit);
            }
            if (this.params.tokenizeTargets) {
                tokenizeTargets(textUnit);
            }
            return handleTextUnit;
        }
        return handleTextUnit;
    }

    private void processLexem(Lexem lexem, ILexer iLexer, LocaleId localeId, Tokens tokens, int i) {
        if (lexem == null) {
            return;
        }
        if (lexem.getId() == -1) {
            if (this.allowNewRawText) {
                this.rawtextLexems.add(lexem);
                return;
            }
            return;
        }
        LexerRule rule = iLexer.getRules().getRule(lexem.getId());
        if (rule == null || this.idleRules.contains(rule) || !rule.supportsLanguage(localeId)) {
            return;
        }
        lexem.setLexerId(this.lexers.indexOf(iLexer) + 1);
        Iterator<Integer> it = rule.getOutTokenIDs().iterator();
        while (it.hasNext()) {
            int intValue = it.next().intValue();
            if (i > 0) {
                Range range = lexem.getRange();
                range.start += i;
                range.end += i;
            }
            tokens.add(new Token(intValue, lexem, 100));
        }
    }

    private void runLexers(List<ILexer> list, String str, LocaleId localeId, Tokens tokens, int i) {
        Iterator<ILexer> it = list.iterator();
        while (it.hasNext()) {
            ILexer next = it.next();
            if (next != null) {
                Lexems process = next.process(str, localeId, tokens);
                if (process != null) {
                    Iterator<Lexem> it2 = process.iterator();
                    while (it2.hasNext()) {
                        processLexem(it2.next(), next, localeId, tokens, 0);
                    }
                }
                next.open(str, localeId, tokens);
                while (next.hasNext()) {
                    try {
                        processLexem(next.next(), next, localeId, tokens, i);
                    } finally {
                        next.close();
                    }
                }
            }
        }
    }

    private Tokens tokenize(TextContainer textContainer, LocaleId localeId) {
        if (textContainer == null || Util.isNullOrEmpty(localeId) || !this.params.supportsLanguage(localeId) || this.positions == null) {
            return null;
        }
        this.positions.clear();
        Tokens tokens = new Tokens();
        Tokens tokens2 = new Tokens();
        this.rawtextLexems.clear();
        String text = textContainer.contentIsOneSegment() ? TextUnitUtil.getText(textContainer.getFirstContent(), this.positions) : TextUnitUtil.getText(textContainer.getUnSegmentedContentCopy(), this.positions);
        this.allowNewRawText = true;
        runLexers(this.lexers, text, localeId, tokens, 0);
        runLexers(this.serviceLexers, text, localeId, tokens, 0);
        this.allowNewRawText = false;
        if (this.rawtextLexems.size() > 0) {
            int i = 0;
            while (true) {
                if (this.rawtextLexems.size() <= 0) {
                    break;
                }
                if (i <= 0 || this.rawtextLexems.size() < i) {
                    tokens2.clear();
                    i = this.rawtextLexems.size();
                    Lexem poll = this.rawtextLexems.poll();
                    text = poll.getValue();
                    runLexers(this.lexers, text, localeId, tokens2, poll.getRange().start);
                    tokens2.setImmutable(true);
                    tokens.addAll(tokens2);
                } else if (this.rawtextLexems.size() == i) {
                    this.logger.debug("RAWTEXT lexems are not processed in tokenize()");
                } else {
                    this.logger.debug("RAWTEXT lexems are creating a chain reaction in tokenize()");
                }
            }
            runLexers(this.serviceLexers, text, localeId, tokens, 0);
        }
        if (tokens != null) {
            tokens.fixRanges(this.positions);
        }
        return tokens.getFilteredList(ListUtil.stringListAsArray(this.params.getTokenNames()));
    }

    private void tokenizeSource(ITextUnit iTextUnit) {
        Tokens tokens;
        if (iTextUnit == null || (tokens = tokenize(iTextUnit.getSource(), getSourceLocale())) == null) {
            return;
        }
        TokensAnnotation tokensAnnotation = (TokensAnnotation) TextUnitUtil.getSourceAnnotation(iTextUnit, TokensAnnotation.class);
        if (tokensAnnotation == null) {
            TextUnitUtil.setSourceAnnotation(iTextUnit, new TokensAnnotation(tokens));
        } else {
            tokensAnnotation.addTokens(tokens);
        }
    }

    private void tokenizeTargets(ITextUnit iTextUnit) {
        if (iTextUnit == null) {
            return;
        }
        for (LocaleId localeId : iTextUnit.getTargetLocales()) {
            Tokens tokens = tokenize(iTextUnit.getTarget(localeId), localeId);
            if (tokens != null) {
                TokensAnnotation tokensAnnotation = (TokensAnnotation) TextUnitUtil.getTargetAnnotation(iTextUnit, localeId, TokensAnnotation.class);
                if (tokensAnnotation == null) {
                    TextUnitUtil.setTargetAnnotation(iTextUnit, localeId, new TokensAnnotation(tokens));
                } else {
                    tokensAnnotation.addTokens(tokens);
                }
            }
        }
    }

    public List<LexerRule> getIdleRules() {
        return this.idleRules;
    }

    public void setLexers(List<ILexer> list) {
        this.lexers = list;
    }

    public List<ILexer> getLexers() {
        return this.lexers;
    }

    public String getConfigInfo() {
        return this.config == null ? "" : this.config.getEngineConfig();
    }
}
