package de.versley.exml.annotators;

import com.google.re2j.Pattern;
import de.versley.exml.config.FileReference;
import de.versley.iwnlp.MappingLemmatizer;
import exml.tueba.TuebaDocument;
import exml.tueba.TuebaTerminal;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.eclipse.collections.api.map.primitive.ObjectIntMap;

/* loaded from: input_file:de/versley/exml/annotators/IWNLPAnnotator.class */
public class IWNLPAnnotator extends SimpleAnnotator {
    private static Pattern uc_tags = Pattern.compile("NN|NE");
    public FileReference lemma_file;
    public FileReference pos_file;
    public FileReference freq_file;
    private transient MappingLemmatizer lemmatizer;
    private transient Map<String, String> pos_map;
    private transient ObjectIntMap<String> freq_map;

    private static String ucfirst(String str) {
        String substring = str.substring(1);
        if (str.equals(str.toUpperCase())) {
            substring = substring.toLowerCase();
        }
        return str.substring(0, 1).toUpperCase() + substring;
    }

    @Override // de.versley.exml.annotators.SimpleAnnotator, de.versley.exml.annotators.Annotator, de.versley.exml.async.Channel
    public void loadModels() {
        if (this.lemmatizer == null) {
            try {
                InputStream stream = this.lemma_file.toStream();
                Throwable th = null;
                try {
                    this.lemmatizer = MappingLemmatizer.load(stream);
                    if (this.freq_file != null) {
                        this.lemmatizer.loadFrequencies(this.freq_file.toStream());
                    }
                    this.pos_map = new HashMap();
                    if (this.pos_file != null) {
                        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(this.pos_file.toStream()));
                        while (true) {
                            String readLine = bufferedReader.readLine();
                            if (readLine == null) {
                                break;
                            }
                            String[] split = readLine.split("\\s+");
                            if (split.length >= 2) {
                                if (!split[1].matches("NOUN|VERB|ADJ|PRON|DET|PREP")) {
                                    split[1] = "X";
                                }
                                this.pos_map.put(split[0], split[1]);
                            }
                        }
                    }
                    if (stream != null) {
                        if (0 != 0) {
                            try {
                                stream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            stream.close();
                        }
                    }
                } finally {
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    }

    @Override // de.versley.exml.annotators.Annotator
    public void annotate(TuebaDocument tuebaDocument) {
        for (TuebaTerminal tuebaTerminal : tuebaDocument.getTerminals()) {
            String cat = tuebaTerminal.getCat();
            if (this.pos_map.containsKey(cat)) {
                cat = this.pos_map.get(cat);
            }
            String lemmatizeSingle = this.lemmatizer.lemmatizeSingle(tuebaTerminal.getWord(), cat, false);
            if (lemmatizeSingle == null) {
                lemmatizeSingle = uc_tags.matches(tuebaTerminal.getCat()) ? ucfirst(tuebaTerminal.getWord()) : tuebaTerminal.getWord().toLowerCase();
            }
            tuebaTerminal.setLemma(lemmatizeSingle);
        }
    }
}
