package lt.tokenmill.uima.dictionaryannotator;

import com.google.common.base.Charsets;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.stream.Collectors;
import lt.tokenmill.uima.dictionaryannotator.type.DictionaryEntry;
import opennlp.uima.tokenize.SimpleTokenizer;
import opennlp.uima.util.UimaUtil;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.factory.AggregateBuilder;
import org.apache.uima.fit.factory.AnalysisEngineFactory;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.junit.Assert;
import org.junit.Test;

/* loaded from: input_file:lt/tokenmill/uima/dictionaryannotator/DictionaryAnnotatorTest.class */
public class DictionaryAnnotatorTest {
    @Test
    public void testFeatureAssignment() throws Exception {
        Collection select = JCasUtil.select(process(AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class, new Object[]{"dictionaryLocation", "classpath:nlproc-dictionary.csv", "tokenizerClass", SimpleOpenNlpTokenizer.class.getName(), "annotationType", DictionaryEntry.class.getName(), "featureMapping", Arrays.asList("1 -> base")}), loadText("wiki-nlproc.txt")), DictionaryEntry.class);
        Assert.assertEquals(8L, select.size());
        Assert.assertEquals(Lists.newArrayList(new String[]{"hand-written rules", "machine learning", "Natural language generation", "Natural language understanding", "Natural language search"}), select.stream().map((v0) -> {
            return v0.getCoveredText();
        }).distinct().collect(Collectors.toList()));
        Assert.assertEquals(Lists.newArrayList(new String[]{"method", "method", "method", "method", "task", "task", "task", "task"}), select.stream().map((v0) -> {
            return v0.getBase();
        }).collect(Collectors.toList()));
        Assert.assertEquals(Lists.newArrayList(new Integer[]{0}), select.stream().map((v0) -> {
            return v0.getId();
        }).distinct().collect(Collectors.toList()));
    }

    @Test
    public void testCaseInsensitive() throws Exception {
        Collection select = JCasUtil.select(process(AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class, new Object[]{"dictionaryLocation", "classpath:nlproc-dictionary.csv", "tokenizerClass", SimpleOpenNlpTokenizer.class.getName(), "annotationType", DictionaryEntry.class.getName(), "caseSensitive", false, "featureMapping", Arrays.asList("1 -> base", "2 -> id")}), loadText("wiki-nlproc.txt")), DictionaryEntry.class);
        Assert.assertEquals(11L, select.size());
        Assert.assertEquals(Lists.newArrayList(new String[]{"Computing Machinery and Intelligence", "hand-written rules", "machine learning", "Anaphora resolution", "Natural language generation", "Natural language understanding", "Natural language search"}), select.stream().map((v0) -> {
            return v0.getCoveredText();
        }).distinct().collect(Collectors.toList()));
        Assert.assertEquals(Lists.newArrayList(new String[]{"computing machinery", "computing intelligence", "method", "task"}), select.stream().map((v0) -> {
            return v0.getBase();
        }).distinct().collect(Collectors.toList()));
        Assert.assertEquals(Lists.newArrayList(new Integer[]{3, 2, 1}), select.stream().map((v0) -> {
            return v0.getId();
        }).distinct().collect(Collectors.toList()));
    }

    @Test
    public void testAccentInsensitive() throws Exception {
        Collection select = JCasUtil.select(process(AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class, new Object[]{"dictionaryLocation", "classpath:language-dictionary.csv", "tokenizerClass", SimpleOpenNlpTokenizer.class.getName(), "annotationType", DictionaryEntry.class.getName(), "caseSensitive", false, "accentSensitive", false, "featureMapping", Arrays.asList("1 -> base")}), loadText("wiki-language-with-accents.txt")), DictionaryEntry.class);
        Assert.assertEquals(8L, select.size());
        Assert.assertEquals(Lists.newArrayList(new String[]{"capacité d'exprimer", "lingvistinių ženklų", "programmeringsspråk og språk", "gemäß ihrer genetischen"}), select.stream().map((v0) -> {
            return v0.getCoveredText();
        }).distinct().collect(Collectors.toList()));
        Assert.assertEquals(Lists.newArrayList(new String[]{"fr", "fr-no-accents", "lt", "lt-no-accents", "no", "no-no-accents", "de", "de-no-accents"}), select.stream().map((v0) -> {
            return v0.getBase();
        }).distinct().collect(Collectors.toList()));
    }

    @Test
    public void testAccentSensitive() throws Exception {
        Collection select = JCasUtil.select(process(AnalysisEngineFactory.createEngineDescription(DictionaryAnnotator.class, new Object[]{"dictionaryLocation", "classpath:language-dictionary.csv", "tokenizerClass", SimpleOpenNlpTokenizer.class.getName(), "annotationType", DictionaryEntry.class.getName(), "caseSensitive", false, "accentSensitive", true, "featureMapping", Arrays.asList("1 -> base")}), loadText("wiki-language-with-accents.txt")), DictionaryEntry.class);
        Assert.assertEquals(4L, select.size());
        Assert.assertEquals(Lists.newArrayList(new String[]{"capacité d'exprimer", "lingvistinių ženklų", "programmeringsspråk og språk", "gemäß ihrer genetischen"}), select.stream().map((v0) -> {
            return v0.getCoveredText();
        }).distinct().collect(Collectors.toList()));
        Assert.assertEquals(Lists.newArrayList(new String[]{"fr", "lt", "no", "de"}), select.stream().map((v0) -> {
            return v0.getBase();
        }).distinct().collect(Collectors.toList()));
    }

    private JCas process(AnalysisEngineDescription analysisEngineDescription, String str) {
        try {
            AggregateBuilder aggregateBuilder = new AggregateBuilder();
            aggregateBuilder.add(AnalysisEngineFactory.createEngineDescription(SimpleTokenizer.class, new Object[]{UimaUtil.SENTENCE_TYPE_PARAMETER, "uima.tcas.DocumentAnnotation", "opennlp.uima.TokenType", Token.class.getName()}), new String[0]);
            aggregateBuilder.add(analysisEngineDescription, new String[0]);
            AnalysisEngine createEngine = AnalysisEngineFactory.createEngine(aggregateBuilder.createAggregateDescription(), new Object[0]);
            JCas newJCas = createEngine.newJCas();
            newJCas.setDocumentText(str);
            createEngine.process(newJCas);
            return newJCas;
        } catch (Exception e) {
            throw new RuntimeException("Failed to create UIMA engine", e);
        }
    }

    private static String loadText(String str) {
        try {
            return Resources.toString(Resources.getResource(str), Charsets.UTF_8);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
