package org.cleartk.clearnlp;

import com.googlecode.clearnlp.engine.EngineGetter;
import com.googlecode.clearnlp.tokenization.AbstractTokenizer;
import java.net.URI;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.uimafit.component.JCasAnnotator_ImplBase;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.util.JCasUtil;

/* loaded from: input_file:org/cleartk/clearnlp/Tokenizer.class */
public class Tokenizer extends JCasAnnotator_ImplBase {
    public static final String DEFAULT_DICTIONARY_FILE_NAME = "dictionary-1.2.0.zip";

    @ConfigurationParameter(description = "Language code for the tokenizer (default value=en).", defaultValue = {"en"})
    private String languageCode;

    @ConfigurationParameter(description = "This parameter provides the URI of the tokenizer dictionary file.")
    private URI dictionaryUri;
    private AbstractTokenizer tokenizer;
    public static final String PARAM_LANGUAGE_CODE = ConfigurationParameterFactory.createConfigurationParameterName(Tokenizer.class, "languageCode");
    public static final String PARAM_DICTIONARY_URI = ConfigurationParameterFactory.createConfigurationParameterName(Tokenizer.class, "dictionaryUri");

    public static AnalysisEngineDescription getDescription() throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(Tokenizer.class, new Object[0]);
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            this.tokenizer = EngineGetter.getTokenizer(this.languageCode, (this.dictionaryUri == null ? Tokenizer.class.getResource("dictionary-1.2.0.zip").toURI().toURL() : this.dictionaryUri.toURL()).openStream());
        } catch (Exception e) {
            throw new ResourceInitializationException(e);
        }
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
            String coveredText = sentence.getCoveredText();
            int begin = sentence.getBegin();
            int i = 0;
            for (String str : this.tokenizer.getTokens(sentence.getCoveredText())) {
                int indexOf = coveredText.indexOf(str, i);
                int length = indexOf + str.length();
                new Token(jCas, begin + indexOf, begin + length).addToIndexes();
                i = length;
            }
        }
    }
}
