package org.deeplearning4j.text.tokenization.tokenizer;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import lombok.NonNull;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.deeplearning4j.text.annotator.PoStagger;
import org.deeplearning4j.text.annotator.SentenceAnnotator;
import org.deeplearning4j.text.annotator.StemmerAnnotator;
import org.deeplearning4j.text.annotator.TokenizerAnnotator;

/* loaded from: input_file:org/deeplearning4j/text/tokenization/tokenizer/PosUimaTokenizer.class */
public class PosUimaTokenizer implements Tokenizer {
    private static AnalysisEngine engine;
    private List<String> tokens;
    private Collection<String> allowedPosTags;
    private int index;
    private static CAS cas;
    private TokenPreProcess preProcessor;
    private boolean stripNones;

    public PosUimaTokenizer(String str, AnalysisEngine analysisEngine, Collection<String> collection) {
        this(str, analysisEngine, collection, false);
    }

    public PosUimaTokenizer(String str, AnalysisEngine analysisEngine, Collection<String> collection, boolean z) {
        this.stripNones = false;
        if (engine == null) {
            engine = analysisEngine;
        }
        this.allowedPosTags = collection;
        this.tokens = new ArrayList();
        this.stripNones = z;
        try {
            if (cas == null) {
                cas = analysisEngine.newCAS();
            }
            cas.reset();
            cas.setDocumentText(str);
            engine.process(cas);
            Iterator it = JCasUtil.select(cas.getJCas(), Sentence.class).iterator();
            while (it.hasNext()) {
                for (Token token : JCasUtil.selectCovered(Token.class, (Sentence) it.next())) {
                    if (!valid(token)) {
                        this.tokens.add("NONE");
                    } else if (token.getLemma() != null) {
                        this.tokens.add(token.getLemma());
                    } else if (token.getStem() != null) {
                        this.tokens.add(token.getStem());
                    } else {
                        this.tokens.add(token.getCoveredText());
                    }
                }
            }
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    private boolean valid(Token token) {
        String coveredText = token.getCoveredText();
        if (coveredText.matches("<[A-Z]+>") || coveredText.matches("</[A-Z]+>")) {
            return false;
        }
        return token.getPos() == null || this.allowedPosTags.contains(token.getPos());
    }

    public boolean hasMoreTokens() {
        return this.index < this.tokens.size();
    }

    public int countTokens() {
        return this.tokens.size();
    }

    public String nextToken() {
        String str = this.tokens.get(this.index);
        this.index++;
        return str;
    }

    public List<String> getTokens() {
        ArrayList arrayList = new ArrayList();
        while (hasMoreTokens()) {
            String nextToken = nextToken();
            if (!this.stripNones || !nextToken.equals("NONE")) {
                arrayList.add(this.preProcessor != null ? this.preProcessor.preProcess(nextToken) : nextToken);
            }
        }
        return arrayList;
    }

    public static AnalysisEngine defaultAnalysisEngine() {
        try {
            return AnalysisEngineFactory.createEngine(AnalysisEngineFactory.createEngineDescription(new AnalysisEngineDescription[]{SentenceAnnotator.getDescription(), TokenizerAnnotator.getDescription(), PoStagger.getDescription("en"), StemmerAnnotator.getDescription("English")}), new Object[0]);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public void setTokenPreProcessor(@NonNull TokenPreProcess tokenPreProcess) {
        if (tokenPreProcess == null) {
            throw new NullPointerException("tokenPreProcessor");
        }
        this.preProcessor = tokenPreProcess;
    }
}
