package com.gengoai.hermes.wordnet;

import com.gengoai.Language;
import com.gengoai.config.Config;
import com.gengoai.hermes.Annotation;
import com.gengoai.hermes.Document;
import com.gengoai.hermes.DocumentFactory;
import com.gengoai.hermes.Types;
import com.gengoai.hermes.corpus.DocumentCollection;
import com.gengoai.hermes.en.ENWordSenseAnnotator;
import com.gengoai.hermes.format.DocFormat;
import com.gengoai.hermes.format.DocFormatParameters;
import com.gengoai.hermes.format.DocFormatProvider;
import com.gengoai.hermes.format.POSCorrection;
import com.gengoai.hermes.format.WholeFileTextFormat;
import com.gengoai.hermes.morphology.PartOfSpeech;
import com.gengoai.io.resource.Resource;
import com.gengoai.string.Strings;
import com.gengoai.tuple.Tuple2;
import com.gengoai.tuple.Tuple4;
import com.gengoai.tuple.Tuples;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

/* loaded from: input_file:com/gengoai/hermes/wordnet/SemCor.class */
public class SemCor extends WholeFileTextFormat {
    static final Pattern docIdPattern = Pattern.compile("<context filename=(\\S+)");
    static final Pattern sentencePattern = Pattern.compile("<s\\s*[^>]+>(.*?)</s>", 40);
    static final Pattern wordPattern = Pattern.compile("<(?:wf|punc)[^>]*>(.*?)</(?:wf|punc)>");
    static final Pattern cmd = Pattern.compile("cmd=(\\S+)");
    static final Pattern lemma = Pattern.compile("lemma=(\\S+)");
    static final Pattern wnsn = Pattern.compile("wnsn=(\\d+)");
    static final Pattern posPattern = Pattern.compile("pos=(\\S+)");

    /* loaded from: input_file:com/gengoai/hermes/wordnet/SemCor$Provider.class */
    public static class Provider implements DocFormatProvider {
        public DocFormat create(DocFormatParameters docFormatParameters) {
            return new SemCor();
        }

        public String getName() {
            return "SEMCOR";
        }

        public boolean isWriteable() {
            return false;
        }
    }

    public static void main(String[] strArr) throws Exception {
        Config.initializeTest();
        Config.setProperty("Corpus.reportLevel", Level.INFO.getName());
        Config.setProperty("Corpus.reportInterval", 5);
        DocumentCollection cache = DocumentCollection.create("conll::/home/ik/temp/documents/part-000;fields=INDEX,WORD,POS,CHUNK,ENTITY,SUPER_SENSE,IGNORE,IGNORE,IGNORE").cache();
        System.out.println(cache.size());
        cache.forEach(document -> {
            Iterator it = document.annotations(Types.MWE).iterator();
            while (it.hasNext()) {
                System.out.println(((Annotation) it.next()).toSGML());
            }
        });
    }

    public DocFormatParameters getParameters() {
        return new DocFormatParameters();
    }

    protected Stream<Document> readSingleFile(String str) {
        Matcher matcher = docIdPattern.matcher(str);
        String group = matcher.find() ? matcher.group(1) : null;
        Matcher matcher2 = sentencePattern.matcher(str);
        ArrayList arrayList = new ArrayList();
        ArrayList<Tuple2> arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        while (matcher2.find()) {
            Matcher matcher3 = wordPattern.matcher(matcher2.group(1));
            int size = arrayList.size();
            int size2 = arrayList.size();
            while (true) {
                int i = size2;
                if (matcher3.find()) {
                    String group2 = matcher3.group(0);
                    String firstMatch = Strings.firstMatch(lemma, group2, 1);
                    String firstMatch2 = Strings.firstMatch(wnsn, group2, 1);
                    String firstMatch3 = Strings.firstMatch(posPattern, group2, 1);
                    ArrayList arrayList5 = new ArrayList();
                    for (String str2 : matcher3.group(1).split("_")) {
                        arrayList5.add(POSCorrection.word(str2, firstMatch3));
                    }
                    int size3 = (i + arrayList5.size()) - 1;
                    arrayList.addAll(arrayList5);
                    if (Strings.isNotNullOrBlank(firstMatch) && Strings.isNotNullOrBlank(firstMatch2) && Strings.isNotNullOrBlank(firstMatch3)) {
                        arrayList4.add(Tuples.$(Integer.valueOf(i), Integer.valueOf(size3), firstMatch.replaceAll("_", " "), Integer.valueOf(Math.max(1, Integer.parseInt(firstMatch2)))));
                        arrayList3.add(PartOfSpeech.valueOf(firstMatch3));
                    }
                    size2 = arrayList.size();
                }
            }
            arrayList2.add(Tuples.$(Integer.valueOf(size), Integer.valueOf(arrayList.size() - 1)));
        }
        Document fromTokens = DocumentFactory.getInstance().fromTokens(arrayList, Language.ENGLISH);
        fromTokens.setId(group);
        int i2 = 0;
        for (Tuple2 tuple2 : arrayList2) {
            fromTokens.annotationBuilder(Types.SENTENCE).start(fromTokens.tokenAt(((Integer) tuple2.v1).intValue()).start()).end(fromTokens.tokenAt(((Integer) tuple2.v2).intValue()).end()).attribute(Types.INDEX, Integer.valueOf(i2)).createAttached();
            i2++;
        }
        for (int i3 = 0; i3 < arrayList4.size(); i3++) {
            PartOfSpeech partOfSpeech = (PartOfSpeech) arrayList3.get(i3);
            Tuple4 tuple4 = (Tuple4) arrayList4.get(i3);
            Sense orElse = WordNet.getInstance().getSense((String) tuple4.v3, partOfSpeech, ((Integer) tuple4.v4).intValue(), Language.ENGLISH).orElse(null);
            if (orElse != null) {
                fromTokens.annotationBuilder(Types.WORD_SENSE).start(fromTokens.tokenAt(((Integer) tuple4.v1).intValue()).start()).end(fromTokens.tokenAt(((Integer) tuple4.v2).intValue()).end()).attribute(ENWordSenseAnnotator.SENSE, orElse).createAttached();
            }
        }
        fromTokens.setCompleted(Types.SENTENCE, "PROVIDED");
        fromTokens.setCompleted(Types.TOKEN, "PROVIDED");
        fromTokens.setCompleted(Types.WORD_SENSE, "PROVIDED");
        return Stream.of(fromTokens);
    }

    public void write(Document document, Resource resource) throws IOException {
    }

    public void write(DocumentCollection documentCollection, Resource resource) throws IOException {
    }
}
