package org.maochen.nlp.sentencetypeclassifier;

import com.google.common.collect.Sets;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.stream.Stream;
import org.maochen.nlp.datastructure.DNode;
import org.maochen.nlp.datastructure.DTree;
import org.maochen.nlp.parser.IParser;
import org.maochen.nlp.parser.stanford.pcfg.StanfordPCFGParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/sentencetypeclassifier/FeatureExtractor.class */
public class FeatureExtractor {
    private static final Logger LOG = LoggerFactory.getLogger(FeatureExtractor.class);
    final String filepathPrefix;
    String delimiter;
    Map<String, Integer> biGramWordMap;
    Map<String, Integer> triGramWordMap;
    Map<String, Integer> biGramDepMap;
    Map<String, Integer> triGramDepMap;
    boolean isRealFeature = false;
    IParser parser = new StanfordPCFGParser();

    private void addFeats(StringBuilder sb, String str, Object obj, int i) {
        if (((Boolean) obj).booleanValue()) {
            for (int i2 = 0; i2 < i; i2++) {
                sb.append(str).append(this.delimiter);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getDEPString(DTree dTree) {
        StringBuilder sb = new StringBuilder();
        sb.append("_<DEP>_");
        LinkedList linkedList = new LinkedList();
        linkedList.add(dTree.getRoots().get(0));
        while (!linkedList.isEmpty()) {
            DNode dNode = (DNode) linkedList.poll();
            if (dNode != null) {
                sb.append(dNode.getDepLabel()).append("_");
                linkedList.addAll(dNode.getChildren());
            }
        }
        sb.append("</DEP>_");
        return sb.toString();
    }

    private String generateFeats(String str, DTree dTree) {
        StringBuilder sb = new StringBuilder();
        String replaceAll = str.trim().replaceAll("_", SentenceTypeClassifier.DELIMITER);
        int length = replaceAll.split("\\s").length;
        int i = length > 10 ? length : 10;
        String replaceAll2 = (" <sentence> " + replaceAll.toLowerCase().replaceAll("\\p{Punct}*$", "") + " </sentence> ").replaceAll("\\s", "_");
        for (String str2 : this.biGramWordMap.keySet()) {
            addFeats(sb, "biGramWord_" + str2, Boolean.valueOf(replaceAll2.contains("_" + str2 + "_")), 1);
        }
        for (String str3 : this.triGramWordMap.keySet()) {
            addFeats(sb, "triGramWord_" + str3, Boolean.valueOf(replaceAll2.contains("_" + str3 + "_")), 1);
        }
        String dEPString = getDEPString(dTree);
        for (String str4 : this.biGramDepMap.keySet()) {
            addFeats(sb, "biGramDEP_" + str4, Boolean.valueOf(dEPString.contains("_" + str4 + "_")), 1);
        }
        for (String str5 : this.triGramDepMap.keySet()) {
            addFeats(sb, "triGramDEP_" + str5, Boolean.valueOf(dEPString.contains("_" + str5 + "_")), 1);
        }
        HashSet newHashSet = Sets.newHashSet(new String[]{"WRB", "WDT", "WP", "WP$"});
        String pos = ((DNode) dTree.get(1)).getPOS();
        addFeats(sb, "first_word_pos", Boolean.valueOf(newHashSet.contains(pos)), 1);
        addFeats(sb, "last_word_pos", Boolean.valueOf(newHashSet.contains(((DNode) dTree.get(replaceAll.matches(".*\\p{Punct}$") ? dTree.size() - 2 : dTree.size() - 1)).getPOS())), 1);
        addFeats(sb, "first_word_root_verb", Boolean.valueOf(pos.startsWith("VB") && ((DNode) dTree.get(1)).isRoot()), i);
        addFeats(sb, "has_aux", Boolean.valueOf(((int) ((Stream) dTree.stream().parallel()).filter(dNode -> {
            return "aux".equals(dNode.getDepLabel());
        }).distinct().count()) > 0), 1);
        boolean z = false;
        Iterator it = Sets.newHashSet(new String[]{"tell me", "let me know", "clarify for me", "name"}).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            if (replaceAll.toLowerCase().startsWith((String) it.next())) {
                z = true;
                break;
            }
        }
        addFeats(sb, "question_over_head", Boolean.valueOf(z), 1);
        addFeats(sb, "has_imperative_keyword", Boolean.valueOf(Sets.newHashSet(new String[]{"verify", "ask", "say", "solve", "run", "execute"}).contains(((DNode) dTree.get(1)).getLemma()) && ((DNode) dTree.get(1)).isRoot()), i);
        switch (replaceAll.charAt(replaceAll.length() - 1)) {
            case '!':
                addFeats(sb, "punct_dot", false, 1);
                addFeats(sb, "punct_question", false, 1);
                addFeats(sb, "punct_exclaim", true, i);
                break;
            case '.':
            case ';':
                addFeats(sb, "punct_dot", true, 1);
                addFeats(sb, "punct_question", false, 1);
                addFeats(sb, "punct_exclaim", false, 1);
                break;
            case '?':
                addFeats(sb, "punct_dot", false, 1);
                addFeats(sb, "punct_question", true, i);
                addFeats(sb, "punct_exclaim", false, 1);
                break;
            default:
                addFeats(sb, "punct_dot", false, 1);
                addFeats(sb, "punct_question", false, 1);
                addFeats(sb, "punct_exclaim", false, 1);
                break;
        }
        addFeats(sb, "whether", Boolean.valueOf(replaceAll.toLowerCase().contains("whether")), 1);
        return sb.toString().trim();
    }

    public String getFeats(String str) {
        return getFeats(str, this.parser.parse(str.split(this.delimiter)[0].replaceAll("_", SentenceTypeClassifier.DELIMITER)));
    }

    public String getFeats(String str, DTree dTree) {
        String[] split = str.split(this.delimiter);
        return split.length != 2 ? "" : (split[0] + this.delimiter + generateFeats(split[0], dTree) + this.delimiter + split[1]).trim();
    }

    private Map<String, Integer> deserialize(String str) {
        try {
            File file = new File(str);
            if (file.exists() && !file.isDirectory()) {
                return (Map) new ObjectInputStream(new FileInputStream(file)).readObject();
            }
        } catch (IOException | ClassNotFoundException e) {
            e.printStackTrace();
        }
        return new HashMap();
    }

    public FeatureExtractor(String str, String str2) {
        this.biGramWordMap = new HashMap();
        this.triGramWordMap = new HashMap();
        this.biGramDepMap = new HashMap();
        this.triGramDepMap = new HashMap();
        this.delimiter = str2;
        this.filepathPrefix = str;
        this.biGramWordMap = deserialize(str + "/bigram_word");
        this.triGramWordMap = deserialize(str + "/trigram_word");
        this.biGramDepMap = deserialize(str + "/bigram_dep");
        this.triGramDepMap = deserialize(str + "/trigram_dep");
    }
}
