package org.maochen.nlp.app.chunker;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Properties;
import java.util.Scanner;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.maochen.nlp.app.ISeqTagger;
import org.maochen.nlp.ml.SequenceTuple;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.classifier.maxent.MaxEntClassifier;
import org.maochen.nlp.ml.vector.LabeledVector;
import org.maochen.nlp.parser.stanford.StanfordParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/app/chunker/Chunker.class */
public class Chunker extends MaxEntClassifier implements ISeqTagger {
    public static final int WORD_INDEX = 0;
    public static final int POS_INDEX = 1;
    private static final Logger LOG = LoggerFactory.getLogger(Chunker.class);
    private static MaxentTagger POS_TAGGER = null;
    public static String TRAIN_FILE_DELIMITER = "\t";

    private static Set<SequenceTuple> readFile(String str) {
        HashSet hashSet = new HashSet();
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(str));
            Throwable th = null;
            try {
                try {
                    for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                        if (readLine.trim().isEmpty()) {
                            HashMap hashMap = new HashMap();
                            hashMap.put(0, arrayList);
                            hashMap.put(1, arrayList2);
                            hashSet.add(new SequenceTuple(hashMap, arrayList3));
                            arrayList = new ArrayList();
                            arrayList2 = new ArrayList();
                            arrayList3 = new ArrayList();
                        } else {
                            String[] split = readLine.split(TRAIN_FILE_DELIMITER);
                            arrayList.add(split[0]);
                            arrayList2.add(split[1]);
                            arrayList3.add(split[2]);
                        }
                    }
                    if (bufferedReader != null) {
                        if (0 != 0) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (IOException e) {
            LOG.error("load data err.", e);
        }
        return hashSet;
    }

    @Override // org.maochen.nlp.app.ISeqTagger
    public void train(String str) {
        Set<SequenceTuple> readFile = readFile(str);
        LOG.info("Loaded Training data.");
        LOG.info("Generating feats");
        List<Tuple> extract = ChunkerFeatureExtractor.extract(readFile);
        LOG.info("Extracted Feats.");
        super.train(extract);
    }

    @Override // org.maochen.nlp.app.ISeqTagger
    public SequenceTuple predict(String str) {
        List<CoreLabel> stanfordTokenize = StanfordParser.stanfordTokenize(str);
        if (POS_TAGGER == null) {
            POS_TAGGER = new MaxentTagger("edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger");
        }
        List list = (List) stanfordTokenize.stream().map((v0) -> {
            return v0.originalText();
        }).collect(Collectors.toList());
        List list2 = (List) POS_TAGGER.tagSentence(stanfordTokenize).stream().map((v0) -> {
            return v0.tag();
        }).collect(Collectors.toList());
        HashMap hashMap = new HashMap();
        hashMap.put(0, list);
        hashMap.put(1, list2);
        SequenceTuple sequenceTuple = new SequenceTuple(hashMap, (List) IntStream.range(0, ((List) hashMap.values().stream().findFirst().get()).size()).mapToObj(i -> {
            return "";
        }).collect(Collectors.toList()));
        predict(sequenceTuple);
        return sequenceTuple;
    }

    public void validate(String str) {
        int i = 0;
        int i2 = 0;
        for (SequenceTuple sequenceTuple : readFile(str)) {
            i2 += sequenceTuple.tag.size();
            ArrayList arrayList = new ArrayList(sequenceTuple.tag);
            predict(sequenceTuple);
            boolean z = false;
            for (int i3 = 0; i3 < arrayList.size(); i3++) {
                if (!((String) arrayList.get(i3)).equals(sequenceTuple.tag.get(i3))) {
                    if (!z) {
                        printSequenceTuple(sequenceTuple, arrayList);
                        System.out.println("");
                        z = true;
                    }
                    i++;
                }
            }
        }
        System.out.println("Err/Total:\t" + i + "/" + i2);
        System.out.println("Accurancy:\t" + ((1.0d - (i / i2)) * 100.0d) + "%");
    }

    @Override // org.maochen.nlp.app.ISeqTagger
    public void predict(SequenceTuple sequenceTuple) {
        if (sequenceTuple == null) {
            return;
        }
        String[] strArr = (String[]) sequenceTuple.entries.stream().map(tuple -> {
            return tuple.vector.featsName[0];
        }).toArray(i -> {
            return new String[i];
        });
        String[] strArr2 = (String[]) sequenceTuple.entries.stream().map(tuple2 -> {
            return tuple2.vector.featsName[1];
        }).toArray(i2 -> {
            return new String[i2];
        });
        for (int i3 = 0; i3 < sequenceTuple.entries.size(); i3++) {
            String[] strArr3 = new String[strArr.length];
            int min = Math.min(strArr3.length, sequenceTuple.tag.size());
            for (int i4 = 0; i4 < min; i4++) {
                strArr3[i4] = (String) sequenceTuple.tag.get(i4);
            }
            Tuple tuple3 = new Tuple(new LabeledVector((String[]) ChunkerFeatureExtractor.extractFeatSingle(i3, strArr, strArr2, strArr3).stream().toArray(i5 -> {
                return new String[i5];
            })));
            tuple3.label = (String) super.predict(tuple3).entrySet().stream().max((entry, entry2) -> {
                return ((Double) entry.getValue()).compareTo((Double) entry2.getValue());
            }).map((v0) -> {
                return v0.getKey();
            }).get();
            sequenceTuple.tag.set(i3, tuple3.label);
        }
    }

    public static void printSequenceTuple(SequenceTuple sequenceTuple, List<String> list) {
        String[] strArr = (String[]) sequenceTuple.entries.stream().map(tuple -> {
            return tuple.vector.featsName[0];
        }).toArray(i -> {
            return new String[i];
        });
        for (int i2 = 0; i2 < strArr.length; i2++) {
            String str = strArr[i2] + "\t" + ((String) sequenceTuple.tag.get(i2));
            if (list != null && !((String) sequenceTuple.tag.get(i2)).equals(list.get(i2))) {
                str = str + "\tExpected:\t" + list.get(i2);
            }
            System.out.println(str);
        }
    }

    public static void main(String[] strArr) throws IOException {
        Chunker chunker = new Chunker();
        TRAIN_FILE_DELIMITER = " ";
        Properties properties = new Properties();
        properties.put("iterations", "1000");
        chunker.setParameter(properties);
        chunker.train("/Users/mguan/Desktop/CoNLL_2000_Chunking/train.txt");
        chunker.persistModel("/Users/mguan/Desktop/chunker.model");
        chunker.validate("/Users/mguan/Desktop/CoNLL_2000_Chunking/test.txt");
        Scanner scanner = new Scanner(System.in);
        String str = "";
        while (!str.matches("q|quit|exit")) {
            System.out.println("Please enter sentence:");
            str = scanner.nextLine();
            if (!str.trim().isEmpty() && !str.matches("q|quit|exit")) {
                printSequenceTuple(chunker.predict(str), null);
            }
        }
    }
}
