package org.maochen.nlp.app.chunker;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.maochen.nlp.ml.SequenceTuple;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.classifier.maxent.MaxEntClassifier;
import org.maochen.nlp.ml.util.TrainingDataUtils;
import org.maochen.nlp.ml.vector.LabeledVector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/app/chunker/MaxEntChunker.class */
public class MaxEntChunker extends MaxEntClassifier {
    private static final Logger LOG = LoggerFactory.getLogger(MaxEntChunker.class);
    public static String TRAIN_FILE_DELIMITER = "\t";

    public void train(String str) throws FileNotFoundException {
        List<SequenceTuple> readSeqFile = TrainingDataUtils.readSeqFile(new FileInputStream(new File(str)), TRAIN_FILE_DELIMITER, 2);
        LOG.info("Loaded Training data.");
        LOG.info("Generating feats");
        List<Tuple> list = (List) readSeqFile.parallelStream().map(ChunkerFeatureExtractor::extractFeat).flatMap((v0) -> {
            return v0.stream();
        }).collect(Collectors.toList());
        LOG.info("Extracted Feats.");
        super.train(list);
    }

    public SequenceTuple predict(String[] strArr, String[] strArr2) {
        SequenceTuple sequenceTuple = new SequenceTuple();
        sequenceTuple.entries = new ArrayList();
        for (int i = 0; i < strArr.length; i++) {
            sequenceTuple.entries.add(new Tuple(new LabeledVector(new String[]{strArr[i], strArr2[i]})));
        }
        for (int i2 = 0; i2 < sequenceTuple.entries.size(); i2++) {
            Tuple tuple = (Tuple) sequenceTuple.entries.get(i2);
            String[] strArr3 = (String[]) ChunkerFeatureExtractor.extractFeatSingle(i2, strArr, strArr2).stream().toArray(i3 -> {
                return new String[i3];
            });
            tuple.vector.featsName = strArr3;
            tuple.vector.setVector(IntStream.range(0, strArr3.length).mapToDouble(i4 -> {
                return 1.0d;
            }).toArray());
            tuple.label = (String) super.predict(tuple).entrySet().stream().max((entry, entry2) -> {
                return ((Double) entry.getValue()).compareTo((Double) entry2.getValue());
            }).map((v0) -> {
                return v0.getKey();
            }).get();
        }
        return sequenceTuple;
    }

    public void validate(String str) throws FileNotFoundException {
        int i = 0;
        int i2 = 0;
        for (SequenceTuple sequenceTuple : TrainingDataUtils.readSeqFile(new FileInputStream(new File(str)), TRAIN_FILE_DELIMITER, 2)) {
            i2 += sequenceTuple.entries.size();
            ArrayList arrayList = new ArrayList(sequenceTuple.getLabel());
            SequenceTuple predict = predict((String[]) sequenceTuple.entries.stream().map(tuple -> {
                return tuple.vector.featsName[0];
            }).toArray(i3 -> {
                return new String[i3];
            }), (String[]) sequenceTuple.entries.stream().map(tuple2 -> {
                return tuple2.vector.featsName[1];
            }).toArray(i4 -> {
                return new String[i4];
            }));
            boolean z = false;
            for (int i5 = 0; i5 < arrayList.size(); i5++) {
                if (!((String) arrayList.get(i5)).equals(((Tuple) predict.entries.get(i5)).label)) {
                    if (!z) {
                        CRFChunker.printSequenceTuple(predict, arrayList);
                        System.out.println("");
                        z = true;
                    }
                    i++;
                }
            }
        }
        System.out.println("Err/Total:\t" + i + "/" + i2);
        System.out.println("Accuracy:\t" + ((1.0d - (i / i2)) * 100.0d) + "%");
    }

    public static void main(String[] strArr) throws IOException {
        MaxEntChunker maxEntChunker = new MaxEntChunker();
        TRAIN_FILE_DELIMITER = " ";
        maxEntChunker.setParameter(new Properties());
        maxEntChunker.train("/Users/mguan/workspace/nlp-service_training-data/corpora/CoNLL_Shared_Task/CoNLL_2000_Chunking/train.txt");
        maxEntChunker.persistModel("/Users/mguan/Desktop/chunker.maxent.model");
        maxEntChunker.loadModel(new FileInputStream(new File("/Users/mguan/Desktop/chunker.maxent.model")));
        maxEntChunker.validate("/Users/mguan/workspace/nlp-service_training-data/corpora/CoNLL_Shared_Task/CoNLL_2000_Chunking/test.txt");
    }
}
