package org.maochen.nlp.app.chunker;

import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
import java.util.Scanner;
import java.util.stream.Collectors;
import org.maochen.nlp.ml.SequenceTuple;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.classifier.crfsuite.CRFClassifier;
import org.maochen.nlp.ml.util.TrainingDataUtils;
import org.maochen.nlp.ml.vector.LabeledVector;
import org.maochen.nlp.parser.stanford.util.StanfordConst;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/app/chunker/CRFChunker.class */
public class CRFChunker extends CRFClassifier {
    private static final Logger LOG = LoggerFactory.getLogger(CRFChunker.class);
    public static String TRAIN_FILE_DELIMITER = "\t";

    public void train(String str) throws FileNotFoundException {
        List<SequenceTuple> readSeqFile = TrainingDataUtils.readSeqFile(new FileInputStream(new File(str)), TRAIN_FILE_DELIMITER, 2);
        LOG.info("Loaded Training data.");
        LOG.info("Generating feats");
        readSeqFile.stream().forEach(sequenceTuple -> {
            sequenceTuple.entries = ChunkerFeatureExtractor.extractFeat(sequenceTuple);
        });
        LOG.info("Extracted Feats.");
        super.train(readSeqFile);
    }

    public SequenceTuple predict(String[] strArr, String[] strArr2) {
        SequenceTuple sequenceTuple = new SequenceTuple();
        sequenceTuple.entries = new ArrayList();
        for (int i = 0; i < strArr.length; i++) {
            sequenceTuple.entries.add(new Tuple(new LabeledVector(new String[]{strArr[i], strArr2[i]})));
        }
        sequenceTuple.entries = ChunkerFeatureExtractor.extractFeat(sequenceTuple);
        List list = (List) super.predict(sequenceTuple).stream().map((v0) -> {
            return v0.getLeft();
        }).collect(Collectors.toList());
        for (int i2 = 0; i2 < list.size(); i2++) {
            ((Tuple) sequenceTuple.entries.get(i2)).label = (String) list.get(i2);
        }
        return sequenceTuple;
    }

    public void validate(String str) throws FileNotFoundException {
        int i = 0;
        int i2 = 0;
        for (SequenceTuple sequenceTuple : TrainingDataUtils.readSeqFile(new FileInputStream(new File(str)), TRAIN_FILE_DELIMITER, 2)) {
            i2 += sequenceTuple.entries.size();
            ArrayList arrayList = new ArrayList(sequenceTuple.getLabel());
            SequenceTuple predict = predict((String[]) sequenceTuple.entries.stream().map(tuple -> {
                return tuple.vector.featsName[0];
            }).toArray(i3 -> {
                return new String[i3];
            }), (String[]) sequenceTuple.entries.stream().map(tuple2 -> {
                return tuple2.vector.featsName[1];
            }).toArray(i4 -> {
                return new String[i4];
            }));
            boolean z = false;
            for (int i5 = 0; i5 < arrayList.size(); i5++) {
                if (!((String) arrayList.get(i5)).equals(((Tuple) predict.entries.get(i5)).label)) {
                    if (!z) {
                        printSequenceTuple(predict, arrayList);
                        System.out.println("");
                        z = true;
                    }
                    i++;
                }
            }
        }
        System.out.println("Err/Total:\t" + i + "/" + i2);
        System.out.println("Accuracy:\t" + ((1.0d - (i / i2)) * 100.0d) + "%");
    }

    public static void printSequenceTuple(SequenceTuple sequenceTuple, List<String> list) {
        String[] strArr = (String[]) sequenceTuple.entries.stream().map(tuple -> {
            return (String) Arrays.stream(tuple.vector.featsName).filter(str -> {
                return str.startsWith("w0=");
            }).map(str2 -> {
                return str2.split("=")[1];
            }).findFirst().orElse("");
        }).toArray(i -> {
            return new String[i];
        });
        String[] strArr2 = (String[]) sequenceTuple.entries.stream().map(tuple2 -> {
            return (String) Arrays.stream(tuple2.vector.featsName).filter(str -> {
                return str.startsWith("pos0=");
            }).map(str2 -> {
                return str2.split("=")[1];
            }).findFirst().orElse("");
        }).toArray(i2 -> {
            return new String[i2];
        });
        for (int i3 = 0; i3 < strArr.length; i3++) {
            String str = strArr[i3] + "\t" + strArr2[i3] + "\t" + ((Tuple) sequenceTuple.entries.get(i3)).label;
            if (list != null && !((Tuple) sequenceTuple.entries.get(i3)).label.equals(list.get(i3))) {
                str = str + "\tExpected:\t" + list.get(i3);
            }
            System.out.println(str);
        }
    }

    public static void main(String[] strArr) throws IOException {
        CRFChunker cRFChunker = new CRFChunker();
        TRAIN_FILE_DELIMITER = " ";
        Properties properties = new Properties();
        properties.setProperty("model", "/Users/mguan/Desktop/chunker.crf.model");
        properties.setProperty("algorithm", "l2sgd");
        properties.setProperty("feature.possible_transitions", "1");
        properties.setProperty("feature.possible_states", "1");
        cRFChunker.setParameter(properties);
        cRFChunker.validate("/Users/mguan/workspace/nlp-service_training-data/corpora/CoNLL_Shared_Task/CoNLL_2000_Chunking/test.txt");
        MaxentTagger maxentTagger = new MaxentTagger(StanfordConst.STANFORD_DEFAULT_POS_EN_MODEL);
        Scanner scanner = new Scanner(System.in);
        String str = "";
        while (!str.matches("q|quit|exit")) {
            System.out.println("Please enter sentence:");
            str = scanner.nextLine();
            if (!str.trim().isEmpty() && !str.matches("q|quit|exit")) {
                String[] split = str.split("\\s");
                List list = (List) Arrays.stream(split).map(str2 -> {
                    CoreLabel coreLabel = new CoreLabel();
                    coreLabel.setWord(str2);
                    coreLabel.setOriginalText(str2);
                    coreLabel.setValue(str2);
                    return coreLabel;
                }).collect(Collectors.toList());
                List tagSentence = maxentTagger.tagSentence(list);
                for (int i = 0; i < list.size(); i++) {
                    ((CoreLabel) list.get(i)).setTag(((TaggedWord) tagSentence.get(i)).tag());
                }
                printSequenceTuple(cRFChunker.predict(split, (String[]) list.stream().map((v0) -> {
                    return v0.tag();
                }).toArray(i2 -> {
                    return new String[i2];
                })), null);
            }
        }
    }
}
