package org.canova.nlp.tokenization.tokenizerfactory;

import java.io.InputStream;
import java.util.Collection;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.canova.nlp.annotator.PoStagger;
import org.canova.nlp.annotator.SentenceAnnotator;
import org.canova.nlp.annotator.StemmerAnnotator;
import org.canova.nlp.annotator.TokenizerAnnotator;
import org.canova.nlp.tokenization.tokenizer.PosUimaTokenizer;
import org.canova.nlp.tokenization.tokenizer.TokenPreProcess;
import org.canova.nlp.tokenization.tokenizer.Tokenizer;

/* loaded from: input_file:org/canova/nlp/tokenization/tokenizerfactory/PosUimaTokenizerFactory.class */
public class PosUimaTokenizerFactory implements TokenizerFactory {
    private AnalysisEngine tokenizer;
    private Collection<String> allowedPoSTags;
    private TokenPreProcess tokenPreProcess;

    public PosUimaTokenizerFactory(Collection<String> collection) {
        this(defaultAnalysisEngine(), collection);
    }

    public PosUimaTokenizerFactory(AnalysisEngine analysisEngine, Collection<String> collection) {
        this.tokenizer = analysisEngine;
        this.allowedPoSTags = collection;
    }

    public static AnalysisEngine defaultAnalysisEngine() {
        try {
            return AnalysisEngineFactory.createEngine(AnalysisEngineFactory.createEngineDescription(new AnalysisEngineDescription[]{SentenceAnnotator.getDescription(), TokenizerAnnotator.getDescription(), PoStagger.getDescription("en"), StemmerAnnotator.getDescription("English")}), new Object[0]);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    @Override // org.canova.nlp.tokenization.tokenizerfactory.TokenizerFactory
    public Tokenizer create(String str) {
        PosUimaTokenizer posUimaTokenizer = new PosUimaTokenizer(str, this.tokenizer, this.allowedPoSTags);
        posUimaTokenizer.setTokenPreProcessor(this.tokenPreProcess);
        return posUimaTokenizer;
    }

    @Override // org.canova.nlp.tokenization.tokenizerfactory.TokenizerFactory
    public Tokenizer create(InputStream inputStream) {
        throw new UnsupportedOperationException();
    }

    @Override // org.canova.nlp.tokenization.tokenizerfactory.TokenizerFactory
    public void setTokenPreProcessor(TokenPreProcess tokenPreProcess) {
        this.tokenPreProcess = tokenPreProcess;
    }
}
