package org.cleartk.examples.chunking;

import java.util.ArrayList;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkSequenceAnnotator;
import org.cleartk.classifier.Instances;
import org.cleartk.classifier.chunking.BIOChunking;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
import org.cleartk.classifier.feature.extractor.simple.CharacterCategoryPatternExtractor;
import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.cleartk.classifier.feature.extractor.simple.TypePathExtractor;
import org.cleartk.ne.type.NamedEntityMention;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.uimafit.util.JCasUtil;

/* loaded from: input_file:org/cleartk/examples/chunking/NamedEntityChunker.class */
public class NamedEntityChunker extends CleartkSequenceAnnotator<String> {
    private SimpleFeatureExtractor extractor;
    private CleartkExtractor contextExtractor;
    private BIOChunking<Token, NamedEntityMention> chunking;

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this.extractor = new CombinedExtractor(new SimpleFeatureExtractor[]{new CoveredTextExtractor(), new CharacterCategoryPatternExtractor(CharacterCategoryPatternExtractor.PatternType.REPEATS_MERGED), new TypePathExtractor(Token.class, "pos")});
        this.contextExtractor = new CleartkExtractor(Token.class, this.extractor, new CleartkExtractor.Context[]{new CleartkExtractor.Preceding(3), new CleartkExtractor.Following(3)});
        this.chunking = new BIOChunking<>(Token.class, NamedEntityMention.class, "mentionType");
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
            List<Token> selectCovered = JCasUtil.selectCovered(jCas, Token.class, sentence);
            ArrayList arrayList = new ArrayList();
            for (Token token : selectCovered) {
                ArrayList arrayList2 = new ArrayList();
                arrayList2.addAll(this.extractor.extract(jCas, token));
                arrayList2.addAll(this.contextExtractor.extract(jCas, token));
                arrayList.add(arrayList2);
            }
            if (isTraining()) {
                this.dataWriter.write(Instances.toInstances(this.chunking.createOutcomes(jCas, selectCovered, JCasUtil.selectCovered(jCas, NamedEntityMention.class, sentence)), arrayList));
            } else {
                this.chunking.createChunks(jCas, selectCovered, this.classifier.classify(arrayList));
            }
        }
    }
}
