package org.maochen.nlp.parser.stanford.util;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.process.Tokenizer;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.EnglishGrammaticalStructure;
import edu.stanford.nlp.trees.SemanticHeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TypedDependency;
import java.io.FileFilter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.stream.Collectors;
import org.maochen.nlp.parser.DNode;
import org.maochen.nlp.parser.DTree;
import org.maochen.nlp.parser.LangTools;
import org.maochen.nlp.parser.stanford.StanfordParser;
import org.maochen.nlp.parser.stanford.nn.StanfordNNDepParser;
import org.maochen.nlp.parser.stanford.pcfg.StanfordTreeBuilder;

/* loaded from: input_file:org/maochen/nlp/parser/stanford/util/StanfordParserUtils.class */
public class StanfordParserUtils {
    public static List<String> tokenize(String str) {
        if (str == null) {
            return null;
        }
        return (List) StanfordParser.stanfordTokenize(str).stream().map((v0) -> {
            return v0.originalText();
        }).collect(Collectors.toList());
    }

    public static List<String> segmenter(String str) {
        if (str == null) {
            return null;
        }
        Tokenizer tokenizer = PTBTokenizer.factory(new CoreLabelTokenFactory(), "normalizeCurrency=false,ptb3Escaping=false").getTokenizer(new StringReader(str));
        ArrayList arrayList = new ArrayList();
        while (tokenizer.hasNext()) {
            arrayList.add(tokenizer.next());
        }
        List<List> process = new WordToSentenceProcessor().process(arrayList);
        int i = 0;
        ArrayList arrayList2 = new ArrayList();
        for (List list : process) {
            int endPosition = ((CoreLabel) list.get(list.size() - 1)).endPosition();
            arrayList2.add(str.substring(i, endPosition).trim());
            i = endPosition;
        }
        return arrayList2;
    }

    public static DTree getDTreeFromCoreNLP(Collection<TypedDependency> collection, List<CoreLabel> list) {
        HashMap hashMap = new HashMap(collection.size());
        for (TypedDependency typedDependency : collection) {
            hashMap.put(Integer.valueOf(typedDependency.dep().index()), typedDependency);
        }
        if (list.get(0).lemma() == null) {
            StanfordNNDepParser.tagLemma(list);
        }
        DTree dTree = new DTree();
        int i = 1;
        for (CoreLabel coreLabel : list) {
            String originalText = coreLabel.originalText();
            String tag = coreLabel.tag();
            String cPOSTag = coreLabel.get(CoreAnnotations.CoarseTagAnnotation.class) != null ? (String) coreLabel.get(CoreAnnotations.CoarseTagAnnotation.class) : LangTools.getCPOSTag(tag);
            String lemma = coreLabel.lemma();
            Integer valueOf = Integer.valueOf(hashMap.containsKey(Integer.valueOf(i)) ? ((TypedDependency) hashMap.get(Integer.valueOf(i))).gov().index() : 0);
            String grammaticalRelation = hashMap.containsKey(Integer.valueOf(i)) ? ((TypedDependency) hashMap.get(Integer.valueOf(i))).reln().toString() : "erased";
            String str = coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class) == null ? "O" : (String) coreLabel.get(CoreAnnotations.NamedEntityTagAnnotation.class);
            DNode dNode = new DNode(i, originalText, lemma, cPOSTag, tag, grammaticalRelation);
            if (!str.equalsIgnoreCase("O")) {
                dNode.setNamedEntity(str);
            }
            dNode.addFeature("head", String.valueOf(valueOf));
            if (coreLabel.beginPosition() != -1) {
                dNode.addFeature("index_start", String.valueOf(coreLabel.beginPosition()));
            }
            if (coreLabel.endPosition() != -1) {
                dNode.addFeature("index_end", String.valueOf(coreLabel.endPosition()));
            }
            dTree.add(dNode);
            i++;
        }
        dTree.stream().filter(dNode2 -> {
            return dTree.getPaddingNode() != dNode2;
        }).forEach(dNode3 -> {
            int parseInt = Integer.parseInt(dNode3.getFeature("head"));
            dNode3.setHead((DNode) dTree.get(parseInt));
            ((DNode) dTree.get(parseInt)).addChild(dNode3);
            dNode3.getFeats().remove("head");
        });
        return dTree;
    }

    private static void count(int i, int i2) {
        int i3 = i + 1;
        if (i3 % 1000 == 0) {
            System.out.println("Processing " + i3 + " of " + i2);
        }
    }

    public static void convertTreebankToCoNLLX(String str, FileFilter fileFilter, String str2) {
        DiskTreebank diskTreebank = new DiskTreebank();
        diskTreebank.loadPath(str, fileFilter);
        int i = 0;
        int size = diskTreebank.size();
        List list = (List) diskTreebank.parallelStream().map(tree -> {
            count(i, size);
            return convertTreeBankToCoNLLX(tree.pennString());
        }).collect(Collectors.toList());
        try {
            FileWriter fileWriter = new FileWriter(str2);
            list.forEach(dTree -> {
                try {
                    dTree.remove(0);
                    fileWriter.write(dTree.toString());
                    fileWriter.write(System.lineSeparator());
                    fileWriter.write(System.lineSeparator());
                } catch (IOException e) {
                    e.printStackTrace();
                }
            });
            fileWriter.flush();
            fileWriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static DTree convertTreeBankToCoNLLX(String str) {
        Tree valueOf = Tree.valueOf(str);
        Collection typedDependencies = new EnglishGrammaticalStructure(valueOf, str2 -> {
            return true;
        }, new SemanticHeadFinder(false)).typedDependencies();
        List taggedLabeledYield = valueOf.taggedLabeledYield();
        StanfordParser.tagLemma(taggedLabeledYield);
        return StanfordTreeBuilder.generate(taggedLabeledYield, typedDependencies, null);
    }
}
