package org.cleartk.examples.documentclassification.basic;

import java.io.File;
import java.util.Arrays;
import java.util.List;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReader;
import org.cleartk.classifier.jar.DefaultDataWriterFactory;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.JarClassifierBuilder;
import org.cleartk.classifier.libsvm.LIBSVMStringOutcomeDataWriter;
import org.cleartk.examples.documentclassification.advanced.GoldDocumentCategoryAnnotator;
import org.cleartk.syntax.opennlp.SentenceAnnotator;
import org.cleartk.token.stem.snowball.DefaultSnowballStemmer;
import org.cleartk.token.tokenizer.TokenAnnotator;
import org.cleartk.util.Options_ImplBase;
import org.cleartk.util.ae.UriToDocumentTextAnnotator;
import org.cleartk.util.cr.UriCollectionReader;
import org.kohsuke.args4j.Option;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.pipeline.SimplePipeline;

/* loaded from: input_file:org/cleartk/examples/documentclassification/basic/TrainModel.class */
public class TrainModel {

    /* loaded from: input_file:org/cleartk/examples/documentclassification/basic/TrainModel$Options.class */
    public static class Options extends Options_ImplBase {

        @Option(name = "--train-dir", usage = "Specify the directory containing the training documents.  This is used for cross-validation, and for training in a holdout set evaluation. When we run this example we point to a directory containing training data from a subset of the 20 newsgroup corpus - i.e. a directory called '3news-bydate/train'")
        public File trainDirectory = new File("src/main/resources/data/3news-bydate/train");

        @Option(name = "--models-dir", usage = "specify the directory in which to write out the trained model files")
        public File modelsDirectory = new File("target/simple_document_classification/models");

        @Option(name = "--training-args", usage = "specify training arguments to be passed to the learner.  For multiple values specify -ta for each - e.g. '-ta -t -ta 0'")
        public List<String> trainingArguments = Arrays.asList("-t", "0");
    }

    public static void main(String[] strArr) throws Exception {
        Options options = new Options();
        options.parseOptions(strArr);
        CollectionReader collectionReaderFromDirectory = UriCollectionReader.getCollectionReaderFromDirectory(options.trainDirectory, UriCollectionReader.RejectSystemFiles.class, UriCollectionReader.RejectSystemDirectories.class);
        AggregateBuilder aggregateBuilder = new AggregateBuilder();
        aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription(), new String[0]);
        aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(GoldDocumentCategoryAnnotator.class, new Object[0]), new String[0]);
        aggregateBuilder.add(SentenceAnnotator.getDescription(), new String[0]);
        aggregateBuilder.add(TokenAnnotator.getDescription(), new String[0]);
        aggregateBuilder.add(DefaultSnowballStemmer.getDescription("English"), new String[0]);
        aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(BasicDocumentClassificationAnnotator.class, new Object[]{DefaultDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, LIBSVMStringOutcomeDataWriter.class.getName(), DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.modelsDirectory}), new String[0]);
        SimplePipeline.runPipeline(collectionReaderFromDirectory, new AnalysisEngineDescription[]{aggregateBuilder.createAggregateDescription()});
        JarClassifierBuilder.trainAndPackage(options.modelsDirectory, (String[]) options.trainingArguments.toArray(new String[options.trainingArguments.size()]));
    }
}
