package org.maochen.nlp.sentencetypeclassifier;

import com.google.common.collect.Sets;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.maochen.nlp.datastructure.DTree;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/sentencetypeclassifier/TrainingFeatureExtractor.class */
public class TrainingFeatureExtractor extends FeatureExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(TrainingFeatureExtractor.class);
    private Map<String, DTree> depTreeCache;
    private ExecutorService executorService;

    public TrainingFeatureExtractor(String str, String str2) {
        super(str, str2);
        this.depTreeCache = new HashMap();
        this.executorService = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(), new ThreadFactoryBuilder().setNameFormat("MaxEnt-FeatureExtractor-%d").build());
    }

    public void extractFeature(Set<String> set) {
        File file = new File(this.filepathPrefix + "/featureVector.txt");
        if (file.exists()) {
            return;
        }
        try {
            file.createNewFile();
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file.getAbsoluteFile()));
            Iterator<String> it = getFeats(set).iterator();
            while (it.hasNext()) {
                bufferedWriter.write(it.next() + System.getProperty("line.separator"));
            }
            bufferedWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void addToMap(Map<String, Integer> map, String... strArr) {
        String str = "";
        for (String str2 : strArr) {
            str = str + "_" + str2;
        }
        String substring = str.substring(1);
        map.put(substring, Integer.valueOf((map.containsKey(substring) ? map.get(substring).intValue() : 0) + 1));
    }

    private void generateDEPNGram(DTree dTree) {
        String[] split = super.getDEPString(dTree).split("_");
        for (int i = 0; i < split.length; i++) {
            if (i + 1 < split.length) {
                addToMap(this.biGramDepMap, split[i], split[i + 1]);
            }
            if (i + 2 < split.length) {
                addToMap(this.triGramDepMap, split[i], split[i + 1], split[i + 2]);
            }
        }
    }

    private void generateWordNGram(String str) {
        String[] split = ("<sentence>_" + str.toLowerCase() + "_</sentence>").split("_");
        for (int i = 0; i < split.length; i++) {
            if (i + 1 < split.length) {
                addToMap(this.biGramWordMap, split[i], split[i + 1]);
            }
            if (i + 2 < split.length) {
                addToMap(this.triGramWordMap, split[i], split[i + 1], split[i + 2]);
            }
        }
    }

    private Set<String> getFeats(Set<String> set) {
        LOG.info("Extracting Features ...");
        Set<String> newSetFromMap = Sets.newSetFromMap(new ConcurrentHashMap());
        LOG.info("Generating NGram Model ...");
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            String str = it.next().split(this.delimiter)[0];
            DTree parse = this.parser.parse(str.replaceAll("_", SentenceTypeClassifier.DELIMITER));
            this.depTreeCache.put(str, parse);
            generateDEPNGram(parse);
            generateWordNGram(str);
        }
        persistNGram();
        LOG.info("NGram Model completed ...");
        ArrayList arrayList = new ArrayList();
        String str2 = this.delimiter;
        for (String str3 : set) {
            arrayList.add(this.executorService.submit(() -> {
                String feats = getFeats(str3, this.depTreeCache.get(str3.split(str2)[0]));
                newSetFromMap.add(feats);
                return feats;
            }));
        }
        Iterator it2 = arrayList.iterator();
        while (it2.hasNext()) {
            try {
                ((Future) it2.next()).get();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        LOG.info("Extracting features completed.");
        return newSetFromMap;
    }

    private void serialize(String str, Map<String, Integer> map) {
        try {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(str));
            objectOutputStream.writeObject(map);
            objectOutputStream.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void persistNGram() {
        String str = this.filepathPrefix + "/bigram_word";
        String str2 = this.filepathPrefix + "/trigram_word";
        String str3 = this.filepathPrefix + "/bigram_dep";
        String str4 = this.filepathPrefix + "/trigram_dep";
        serialize(str, this.biGramWordMap);
        serialize(str2, this.triGramWordMap);
        serialize(str3, this.biGramDepMap);
        serialize(str4, this.triGramDepMap);
    }
}
