package org.cleartk.examples.chunking;

import java.io.File;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.collection.CollectionReaderDescription;
import org.cleartk.classifier.CleartkSequenceAnnotator;
import org.cleartk.classifier.jar.DefaultSequenceDataWriterFactory;
import org.cleartk.classifier.jar.DirectoryDataWriterFactory;
import org.cleartk.classifier.jar.Train;
import org.cleartk.classifier.mallet.MalletCRFStringOutcomeDataWriter;
import org.cleartk.examples.chunking.util.MASCGoldAnnotator;
import org.cleartk.syntax.opennlp.PosTaggerAnnotator;
import org.cleartk.util.Options_ImplBase;
import org.cleartk.util.ae.UriToDocumentTextAnnotator;
import org.cleartk.util.cr.UriCollectionReader;
import org.kohsuke.args4j.Option;
import org.uimafit.factory.AggregateBuilder;
import org.uimafit.factory.AnalysisEngineFactory;
import org.uimafit.pipeline.SimplePipeline;

/* loaded from: input_file:org/cleartk/examples/chunking/TrainNamedEntityChunker.class */
public class TrainNamedEntityChunker {

    /* loaded from: input_file:org/cleartk/examples/chunking/TrainNamedEntityChunker$MASCTextFileFilter.class */
    public static class MASCTextFileFilter implements IOFileFilter {
        public boolean accept(File file) {
            return file.getPath().endsWith(".txt");
        }

        public boolean accept(File file, String str) {
            return str.endsWith(".txt");
        }
    }

    /* loaded from: input_file:org/cleartk/examples/chunking/TrainNamedEntityChunker$Options.class */
    public static class Options extends Options_ImplBase {

        @Option(name = "--train-dir", usage = "The directory containing MASC-annotated files")
        public File trainDirectory = new File("src/main/resources/data/MASC-1.0.3/data/written");

        @Option(name = "--model-dir", usage = "The directory where the model should be written")
        public File modelDirectory = new File("target/chunking/ne-model");
    }

    public static void main(String[] strArr) throws Exception {
        Options options = new Options();
        options.parseOptions(strArr);
        CollectionReaderDescription descriptionFromDirectory = UriCollectionReader.getDescriptionFromDirectory(options.trainDirectory, MASCTextFileFilter.class, (Class) null);
        AggregateBuilder aggregateBuilder = new AggregateBuilder();
        aggregateBuilder.add(UriToDocumentTextAnnotator.getDescription(), new String[0]);
        aggregateBuilder.add(MASCGoldAnnotator.getDescription(), new String[0]);
        aggregateBuilder.add(PosTaggerAnnotator.getDescription(), new String[0]);
        aggregateBuilder.add(AnalysisEngineFactory.createPrimitiveDescription(NamedEntityChunker.class, new Object[]{CleartkSequenceAnnotator.PARAM_IS_TRAINING, true, DirectoryDataWriterFactory.PARAM_OUTPUT_DIRECTORY, options.modelDirectory, DefaultSequenceDataWriterFactory.PARAM_DATA_WRITER_CLASS_NAME, MalletCRFStringOutcomeDataWriter.class}), new String[0]);
        SimplePipeline.runPipeline(descriptionFromDirectory, new AnalysisEngineDescription[]{aggregateBuilder.createAggregateDescription()});
        Train.main(options.modelDirectory, new String[0]);
    }
}
