package org.maochen.nlp.app.ner;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Scanner;
import java.util.stream.Collectors;
import org.maochen.nlp.app.featextractor.IFeatureExtractor;
import org.maochen.nlp.ml.SequenceTuple;
import org.maochen.nlp.ml.Tuple;
import org.maochen.nlp.ml.classifier.crfsuite.CRFClassifier;
import org.maochen.nlp.ml.util.TrainingDataUtils;
import org.maochen.nlp.ml.vector.LabeledVector;
import org.maochen.nlp.util.ValidationUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/maochen/nlp/app/ner/CRFNER.class */
public class CRFNER extends CRFClassifier {
    private static final Logger LOG = LoggerFactory.getLogger(CRFNER.class);
    public static String TRAIN_FILE_DELIMITER = "\t";
    public static final int TRAIN_FILE_TAG_COL = 1;
    public IFeatureExtractor featureExtractor;

    public void train(String str) throws FileNotFoundException {
        List<SequenceTuple> readSeqFile = TrainingDataUtils.readSeqFile(new FileInputStream(new File(str)), TRAIN_FILE_DELIMITER, 1);
        LOG.info("Loaded Training data.");
        LOG.info("Generating feats");
        readSeqFile.stream().forEach(sequenceTuple -> {
            sequenceTuple.entries = this.featureExtractor.extractFeat(sequenceTuple);
        });
        LOG.info("Extracted Feats.");
        super.train(readSeqFile);
    }

    public SequenceTuple predict(String[] strArr) {
        SequenceTuple sequenceTuple = new SequenceTuple();
        sequenceTuple.entries = new ArrayList();
        for (String str : strArr) {
            sequenceTuple.entries.add(new Tuple(new LabeledVector(new String[]{str})));
        }
        sequenceTuple.entries = this.featureExtractor.extractFeat(sequenceTuple);
        List list = (List) super.predict(sequenceTuple).stream().map((v0) -> {
            return v0.getLeft();
        }).collect(Collectors.toList());
        for (int i = 0; i < list.size(); i++) {
            ((Tuple) sequenceTuple.entries.get(i)).label = (String) list.get(i);
        }
        return sequenceTuple;
    }

    public void validate(String str) throws FileNotFoundException {
        int i = 0;
        int i2 = 0;
        for (SequenceTuple sequenceTuple : TrainingDataUtils.readSeqFile(new FileInputStream(new File(str)), TRAIN_FILE_DELIMITER, 1)) {
            i2 += sequenceTuple.entries.size();
            ArrayList arrayList = new ArrayList(sequenceTuple.getLabel());
            SequenceTuple predict = predict((String[]) sequenceTuple.entries.stream().map(tuple -> {
                return tuple.vector.featsName[0];
            }).toArray(i3 -> {
                return new String[i3];
            }));
            boolean z = false;
            for (int i4 = 0; i4 < arrayList.size(); i4++) {
                if (!((String) arrayList.get(i4)).equals(((Tuple) predict.entries.get(i4)).label)) {
                    if (!z) {
                        ValidationUtils.printSequenceTuple(predict, arrayList);
                        System.out.println("");
                        z = true;
                    }
                    i++;
                }
            }
        }
        System.out.println("Err/Total:\t" + i + "/" + i2);
        System.out.println("Accuracy:\t" + ((1.0d - (i / i2)) * 100.0d) + "%");
    }

    public static void main(String[] strArr) throws IOException {
        CRFNER crfner = new CRFNER();
        crfner.featureExtractor = new NERFeatureExtractor();
        Properties properties = new Properties();
        properties.setProperty("model", "/Users/mguan/Desktop/ner.crf.model");
        properties.setProperty("algorithm", "l2sgd");
        properties.setProperty("feature.possible_transitions", "1");
        properties.setProperty("feature.possible_states", "1");
        crfner.setParameter(properties);
        crfner.train("/Users/mguan/Desktop/npaper.collx.txt");
        properties.setProperty("model", "/Users/mguan/Desktop/ner.crf.model");
        crfner.setParameter(properties);
        crfner.loadModel(null);
        crfner.validate("/Users/mguan/Desktop/npaper.collx.txt");
        Scanner scanner = new Scanner(System.in);
        String str = "";
        while (!str.matches("q|quit|exit")) {
            System.out.println("Please enter sentence:");
            str = scanner.nextLine();
            if (!str.trim().isEmpty() && !str.matches("q|quit|exit")) {
                ValidationUtils.printSequenceTuple(crfner.predict(str.split("\\s")), null);
            }
        }
    }
}
