package org.maochen.nlp.parser.stanford;

import edu.stanford.nlp.io.ExtensionFileFilter;
import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.tagger.maxent.TaggerConfig;
import edu.stanford.nlp.trees.BobChrisTreeNormalizer;
import edu.stanford.nlp.trees.DiskTreebank;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileFilter;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;

/* loaded from: input_file:org/maochen/nlp/parser/stanford/StanfordPOSTaggerTrainer.class */
public class StanfordPOSTaggerTrainer {
    public static final String wsj = "/Users/Maochen/Desktop/treebank_3/parsed/mrg/wsj/";
    public static final String extra = "/Users/Maochen/Desktop/extra/treebank_extra_data/";
    public static final String tempLocation = "/Users/Maochen/Desktop/tmp.txt";
    public static final String outputModelPath = "/Users/Maochen/Desktop/english-left3words-distsim.tagger";
    public static final String egw4_reut_512_clusters = StanfordPOSTaggerTrainer.class.getResource("/").getPath() + "/egw4-reut.512.clusters";

    private static void writeToFile(Set<String> set, String str) {
        try {
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(str)));
            Throwable th = null;
            try {
                Iterator<String> it = set.iterator();
                while (it.hasNext()) {
                    bufferedWriter.write(it.next());
                    bufferedWriter.write(System.lineSeparator());
                }
                if (bufferedWriter != null) {
                    if (0 != 0) {
                        try {
                            bufferedWriter.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        bufferedWriter.close();
                    }
                }
            } finally {
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private static void loadTreeBank(FileFilter fileFilter, String str, Collection<String> collection) {
        DiskTreebank diskTreebank = new DiskTreebank();
        diskTreebank.loadPath(str, fileFilter);
        BobChrisTreeNormalizer bobChrisTreeNormalizer = new BobChrisTreeNormalizer();
        diskTreebank.apply(tree -> {
            collection.add(Sentence.listToString(bobChrisTreeNormalizer.normalizeWholeTree(tree, tree.treeFactory()).taggedYield(), false, "_"));
        });
    }

    private static void convertTrainingData() {
        Set newSetFromMap = Collections.newSetFromMap(new ConcurrentHashMap());
        ForkJoinPool commonPool = ForkJoinPool.commonPool();
        ForkJoinTask submit = commonPool.submit(() -> {
            loadTreeBank(new NumberRangeFileFilter(1, 2502, true), wsj, newSetFromMap);
            return null;
        });
        try {
            commonPool.submit(() -> {
                loadTreeBank(new ExtensionFileFilter(".mrg", true), extra, newSetFromMap);
                return null;
            }).get();
            submit.get();
        } catch (InterruptedException | ExecutionException e) {
            e.printStackTrace();
        }
        writeToFile(newSetFromMap, tempLocation);
    }

    public static void main(String[] strArr) {
        Properties properties = new Properties();
        properties.setProperty("mode", TaggerConfig.Mode.TRAIN.toString());
        properties.setProperty("model", outputModelPath);
        properties.setProperty("trainFile", tempLocation);
        properties.setProperty("wordFunction", "edu.stanford.nlp.process.AmericanizeFunction");
        properties.setProperty("closedClassTagThreshold", "40");
        properties.setProperty("curWordMinFeatureThresh", "2");
        properties.setProperty("encoding", "UTF-8");
        properties.setProperty("iterations", "100");
        properties.setProperty("lang", "english");
        properties.setProperty("learnClosedClassTags", "false");
        properties.setProperty("minFeatureThresh", "2");
        properties.setProperty("rareWordMinFeatureThresh", "10");
        properties.setProperty("rareWordThresh", "5");
        properties.setProperty("sgml", "false");
        properties.setProperty("sigmaSquared", "0.0");
        properties.setProperty("regL1", "0.75");
        properties.setProperty("tokenize", "true");
        properties.setProperty("verbose", "false");
        properties.setProperty("verboseResults", "true");
        properties.setProperty("veryCommonWordThresh", "250");
        properties.setProperty("outputFormat", "slashTags");
        properties.setProperty("nthreads", "8");
        properties.setProperty("tagSeparator", "_");
        properties.setProperty("arch", "left3words,naacl2003unknowns,wordshapes(-1,1),distsim(" + egw4_reut_512_clusters + ",-1,1),distsimconjunction(" + egw4_reut_512_clusters + ",-1,1)");
        TaggerConfig taggerConfig = new TaggerConfig(properties);
        try {
            Method declaredMethod = MaxentTagger.class.getDeclaredMethod("trainAndSaveModel", TaggerConfig.class);
            declaredMethod.setAccessible(true);
            declaredMethod.invoke(null, taggerConfig);
        } catch (IllegalAccessException | NoSuchMethodException | InvocationTargetException e) {
            e.printStackTrace();
        }
    }
}
