package opennlp.tools.cmdline.postag;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import opennlp.model.TrainUtil;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.postag.POSDictionary;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.postag.WordTagSampleStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelType;

/* loaded from: input_file:opennlp/tools/cmdline/postag/POSTaggerTrainerTool.class */
public final class POSTaggerTrainerTool implements CmdLineTool {

    /* loaded from: input_file:opennlp/tools/cmdline/postag/POSTaggerTrainerTool$TrainerToolParams.class */
    interface TrainerToolParams extends TrainingParams, TrainingToolParams {
    }

    @Override // opennlp.tools.cmdline.CmdLineTool
    public String getName() {
        return "POSTaggerTrainer";
    }

    @Override // opennlp.tools.cmdline.CmdLineTool
    public String getShortDescription() {
        return "trains a model for the part-of-speech tagger";
    }

    @Override // opennlp.tools.cmdline.CmdLineTool
    public String getHelp() {
        return "Usage: opennlp " + getName() + " " + ArgumentParser.createUsage(TrainerToolParams.class);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static ObjectStream<POSSample> openSampleData(String str, File file, Charset charset) {
        CmdLineUtil.checkInputFile(str + " Data", file);
        return new WordTagSampleStream(new PlainTextByLineStream(CmdLineUtil.openInFile(file).getChannel(), charset));
    }

    @Override // opennlp.tools.cmdline.CmdLineTool
    public void run(String[] strArr) {
        if (!ArgumentParser.validateArguments(strArr, TrainerToolParams.class)) {
            System.err.println(getHelp());
            throw new TerminateToolException(1);
        }
        TrainerToolParams trainerToolParams = (TrainerToolParams) ArgumentParser.parse(strArr, TrainerToolParams.class);
        TrainingParameters loadTrainingParameters = CmdLineUtil.loadTrainingParameters(trainerToolParams.getParams(), true);
        if (loadTrainingParameters != null && !TrainUtil.isValid(loadTrainingParameters.getSettings())) {
            System.err.println("Training parameters file is invalid!");
            throw new TerminateToolException(-1);
        }
        File data = trainerToolParams.getData();
        File model = trainerToolParams.getModel();
        CmdLineUtil.checkOutputFile("pos tagger model", model);
        ObjectStream<POSSample> openSampleData = openSampleData("Training", data, trainerToolParams.getEncoding());
        Dictionary dictionary = null;
        Integer ngram = trainerToolParams.getNgram();
        if (ngram != null) {
            System.err.print("Building ngram dictionary ... ");
            try {
                dictionary = POSTaggerME.buildNGramDictionary(openSampleData, ngram.intValue());
                openSampleData.reset();
                System.err.println("done");
            } catch (IOException e) {
                CmdLineUtil.printTrainingIoError(e);
                throw new TerminateToolException(-1);
            }
        }
        try {
            try {
                POSDictionary pOSDictionary = null;
                if (trainerToolParams.getDict() != null) {
                    pOSDictionary = POSDictionary.create(new FileInputStream(trainerToolParams.getDict()));
                }
                CmdLineUtil.writeModel("pos tagger", model, loadTrainingParameters == null ? POSTaggerME.train(trainerToolParams.getLang(), openSampleData, getModelType(trainerToolParams.getType()), pOSDictionary, dictionary, trainerToolParams.getCutoff().intValue(), trainerToolParams.getIterations().intValue()) : POSTaggerME.train(trainerToolParams.getLang(), openSampleData, loadTrainingParameters, pOSDictionary, dictionary));
            } finally {
                try {
                    openSampleData.close();
                } catch (IOException e2) {
                }
            }
        } catch (IOException e3) {
            CmdLineUtil.printTrainingIoError(e3);
            throw new TerminateToolException(-1);
        }
    }

    static ModelType getModelType(String str) {
        if (str == null) {
            str = "maxent";
        }
        return str.equals("maxent") ? ModelType.MAXENT : str.equals("perceptron") ? ModelType.PERCEPTRON : str.equals("perceptron_sequence") ? ModelType.PERCEPTRON_SEQUENCE : null;
    }
}
