package org.cleartk.examples.documentclassification.advanced;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.List;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.jcas.tcas.DocumentAnnotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.CleartkAnnotator;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.Instance;
import org.cleartk.classifier.feature.extractor.CleartkExtractor;
import org.cleartk.classifier.feature.extractor.CleartkExtractorException;
import org.cleartk.classifier.feature.extractor.simple.CombinedExtractor;
import org.cleartk.classifier.feature.extractor.simple.CoveredTextExtractor;
import org.cleartk.classifier.feature.extractor.simple.SimpleFeatureExtractor;
import org.cleartk.classifier.feature.transform.extractor.CentroidTfidfSimilarityExtractor;
import org.cleartk.classifier.feature.transform.extractor.MinMaxNormalizationExtractor;
import org.cleartk.classifier.feature.transform.extractor.TfidfExtractor;
import org.cleartk.classifier.feature.transform.extractor.ZeroMeanUnitStddevExtractor;
import org.cleartk.classifier.jar.GenericJarClassifierFactory;
import org.cleartk.examples.type.UsenetDocument;
import org.cleartk.token.type.Sentence;
import org.cleartk.token.type.Token;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.util.JCasUtil;

/* loaded from: input_file:org/cleartk/examples/documentclassification/advanced/DocumentClassificationAnnotator.class */
public class DocumentClassificationAnnotator extends CleartkAnnotator<String> {

    @ConfigurationParameter(mandatory = false, description = "provides a URI where the tf*idf map will be written")
    protected URI tfIdfUri;

    @ConfigurationParameter(mandatory = false, description = "provides a URI where the tf*idf centroid data will be written")
    protected URI tfIdfCentroidSimilarityUri;

    @ConfigurationParameter(mandatory = false, description = "provides a URI where the Zero Mean, Unit Std Dev feature data will be written")
    protected URI zmusUri;

    @ConfigurationParameter(mandatory = false, description = "provides a URI where the min-max feature normalizaation data will be written")
    protected URI minmaxUri;
    public static final String PREDICTION_VIEW_NAME = "ExampleDocumentClassificationPredictionView";
    public static final String TFIDF_EXTRACTOR_KEY = "Token";
    public static final String CENTROID_TFIDF_SIM_EXTRACTOR_KEY = "CentroidTfIdfSimilarity";
    public static final String ZMUS_EXTRACTOR_KEY = "LengthFeatures";
    public static final String MINMAX_EXTRACTOR_KEY = "LengthFeatures";
    private CombinedExtractor extractor;
    public static final String PARAM_TF_IDF_URI = ConfigurationParameterFactory.createConfigurationParameterName(DocumentClassificationAnnotator.class, "tfIdfUri");
    public static final String PARAM_TF_IDF_CENTROID_SIMILARITY_URI = ConfigurationParameterFactory.createConfigurationParameterName(DocumentClassificationAnnotator.class, "tfIdfCentroidSimilarityUri");
    public static final String PARAM_ZMUS_URI = ConfigurationParameterFactory.createConfigurationParameterName(DocumentClassificationAnnotator.class, "zmusUri");
    public static final String PARAM_MINMAX_URI = ConfigurationParameterFactory.createConfigurationParameterName(DocumentClassificationAnnotator.class, "minmaxUri");

    /* loaded from: input_file:org/cleartk/examples/documentclassification/advanced/DocumentClassificationAnnotator$CountAnnotationExtractor.class */
    public static class CountAnnotationExtractor implements SimpleFeatureExtractor {
        private Class annotationType;

        public CountAnnotationExtractor(Class cls) {
            this.annotationType = cls;
        }

        public List<Feature> extract(JCas jCas, Annotation annotation) throws CleartkExtractorException {
            return Arrays.asList(new Feature("Count_" + this.annotationType.getName(), Integer.valueOf(JCasUtil.selectCovered(this.annotationType, annotation).size())));
        }
    }

    public static URI createTokenTfIdfDataURI(File file) {
        return new File(file, "Token_tfidf_extractor.dat").toURI();
    }

    public static URI createIdfCentroidSimilarityDataURI(File file) {
        return new File(file, CENTROID_TFIDF_SIM_EXTRACTOR_KEY).toURI();
    }

    public static URI createZmusDataURI(File file) {
        return new File(file, "LengthFeatures_zmus_extractor.dat").toURI();
    }

    public static URI createMinMaxDataURI(File file) {
        return new File(file, "LengthFeatures_minmax_extractor.dat").toURI();
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        try {
            this.extractor = new CombinedExtractor(new SimpleFeatureExtractor[]{initTfIdfExtractor(), initCentroidTfIdfSimilarityExtractor(), initZmusExtractor(), initMinMaxExtractor()});
        } catch (IOException e) {
            throw new ResourceInitializationException(e);
        }
    }

    private TfidfExtractor<String> initTfIdfExtractor() throws IOException {
        TfidfExtractor<String> tfidfExtractor = new TfidfExtractor<>(TFIDF_EXTRACTOR_KEY, new CleartkExtractor(Token.class, new CoveredTextExtractor(), new CleartkExtractor.Context[]{new CleartkExtractor.Count(new CleartkExtractor.Context[]{new CleartkExtractor.Covered()})}));
        if (this.tfIdfUri != null) {
            tfidfExtractor.load(this.tfIdfUri);
        }
        return tfidfExtractor;
    }

    private CentroidTfidfSimilarityExtractor<String> initCentroidTfIdfSimilarityExtractor() throws IOException {
        CentroidTfidfSimilarityExtractor<String> centroidTfidfSimilarityExtractor = new CentroidTfidfSimilarityExtractor<>(CENTROID_TFIDF_SIM_EXTRACTOR_KEY, new CleartkExtractor(Token.class, new CoveredTextExtractor(), new CleartkExtractor.Context[]{new CleartkExtractor.Count(new CleartkExtractor.Context[]{new CleartkExtractor.Covered()})}));
        if (this.tfIdfCentroidSimilarityUri != null) {
            centroidTfidfSimilarityExtractor.load(this.tfIdfCentroidSimilarityUri);
        }
        return centroidTfidfSimilarityExtractor;
    }

    private ZeroMeanUnitStddevExtractor<String> initZmusExtractor() throws IOException {
        ZeroMeanUnitStddevExtractor<String> zeroMeanUnitStddevExtractor = new ZeroMeanUnitStddevExtractor<>("LengthFeatures", new CombinedExtractor(new SimpleFeatureExtractor[]{new CountAnnotationExtractor(Sentence.class), new CountAnnotationExtractor(Token.class)}));
        if (this.zmusUri != null) {
            zeroMeanUnitStddevExtractor.load(this.zmusUri);
        }
        return zeroMeanUnitStddevExtractor;
    }

    private MinMaxNormalizationExtractor<String> initMinMaxExtractor() throws IOException {
        MinMaxNormalizationExtractor<String> minMaxNormalizationExtractor = new MinMaxNormalizationExtractor<>("LengthFeatures", new CombinedExtractor(new SimpleFeatureExtractor[]{new CountAnnotationExtractor(Sentence.class), new CountAnnotationExtractor(Token.class)}));
        if (this.minmaxUri != null) {
            minMaxNormalizationExtractor.load(this.minmaxUri);
        }
        return minMaxNormalizationExtractor;
    }

    public void process(JCas jCas) throws AnalysisEngineProcessException {
        DocumentAnnotation documentAnnotationFs = jCas.getDocumentAnnotationFs();
        Instance instance = new Instance();
        instance.addAll(this.extractor.extract(jCas, documentAnnotationFs));
        if (isTraining()) {
            instance.setOutcome(JCasUtil.selectSingle(jCas, UsenetDocument.class).getCategory());
            this.dataWriter.write(instance);
        } else {
            String str = (String) this.classifier.classify(instance.getFeatures());
            UsenetDocument usenetDocument = new UsenetDocument(jCas, 0, jCas.getDocumentText().length());
            usenetDocument.setCategory(str);
            usenetDocument.addToIndexes();
        }
    }

    public static AnalysisEngineDescription getClassifierDescription(File file) throws ResourceInitializationException {
        return AnalysisEngineFactory.createPrimitiveDescription(DocumentClassificationAnnotator.class, new Object[]{GenericJarClassifierFactory.PARAM_CLASSIFIER_JAR_PATH, file.toString()});
    }
}
