package nlp4j.mecab;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import net.moraleboost.mecab.Node;
import net.moraleboost.mecab.impl.StandardLattice;
import net.moraleboost.mecab.impl.StandardTagger;
import nlp4j.AbstractDocumentAnnotator;
import nlp4j.Document;
import nlp4j.DocumentAnnotator;
import nlp4j.impl.DefaultKeyword;
import nlp4j.util.RegexUtils;
import nlp4j.util.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:nlp4j/mecab/MecabAnnotator.class */
public class MecabAnnotator extends AbstractDocumentAnnotator implements DocumentAnnotator {
    private static final String JA_KOYUU_MEISHI = "固有名詞";
    private static final String JA_MEISHI = "名詞";
    private static final String JA_DOUSHI_SETSUBI = "動詞-接尾";
    private static final String JA_SETSUBI = "接尾";
    private static final String JA_DOUSHI = "動詞";
    private static final Logger logger = LogManager.getLogger(MecabAnnotator.class);
    private ArrayList<String> facetfilter = null;
    private String lexregexfilter = null;
    private String option = "";
    StandardTagger tagger;

    public void annotate(Document document) throws Exception {
        if (this.tagger == null) {
            this.tagger = new StandardTagger(this.option);
        }
        logger.debug("processing document");
        if (this.option == null) {
            this.option = "";
        }
        StandardLattice createLattice = this.tagger.createLattice();
        Iterator it = this.targets.iterator();
        while (it.hasNext()) {
            Object attribute = document.getAttribute((String) it.next());
            if (attribute != null && (attribute instanceof String)) {
                String replaceAll = StringUtils.filter((String) attribute, "MS932").replaceAll(RegexUtils.REGEX_URL, "");
                createLattice.setSentence(replaceAll);
                this.tagger.parse(createLattice);
                Node bosNode = createLattice.bosNode();
                int i = 0;
                int i2 = 1;
                while (bosNode != null) {
                    String surface = bosNode.surface();
                    String feature = bosNode.feature();
                    logger.debug("feature=" + feature);
                    String[] split = feature.split(",");
                    if (split[0].equals("BOS/EOS")) {
                        bosNode = bosNode.next();
                    } else {
                        if (split.length < 8) {
                            logger.warn("invalid: features.length:" + split.length + "," + replaceAll);
                        }
                        logger.debug(bosNode.surface() + "\t" + bosNode.feature());
                        DefaultKeyword defaultKeyword = new DefaultKeyword();
                        defaultKeyword.setLex(split[6]);
                        defaultKeyword.setStr(surface);
                        if (split.length >= 8) {
                            defaultKeyword.setReading(split[7]);
                        } else {
                            defaultKeyword.setReading("*");
                        }
                        if (split[0].equals(JA_DOUSHI) && split[1].equals(JA_SETSUBI)) {
                            defaultKeyword.setFacet(JA_DOUSHI_SETSUBI);
                        } else if (split[0].equals(JA_MEISHI) && split[1].equals(JA_KOYUU_MEISHI)) {
                            defaultKeyword.setFacet(JA_KOYUU_MEISHI);
                        } else {
                            defaultKeyword.setFacet(split[0]);
                        }
                        if (defaultKeyword.getLex().equals("*") && defaultKeyword.getReading().equals("*")) {
                            defaultKeyword.setLex(defaultKeyword.getStr());
                        }
                        defaultKeyword.setBegin(i);
                        defaultKeyword.setEnd(i + surface.length());
                        i += surface.length();
                        defaultKeyword.setSequence(i2);
                        if ((this.facetfilter == null || this.facetfilter.contains(defaultKeyword.getFacet())) && (this.lexregexfilter == null || defaultKeyword.getLex().matches(this.lexregexfilter))) {
                            document.addKeyword(defaultKeyword);
                        }
                        i2++;
                        bosNode = bosNode.next();
                    }
                }
            }
        }
        createLattice.destroy();
    }

    public void close() {
        this.tagger.destroy();
    }

    public void setProperty(String str, String str2) {
        super.setProperty(str, str2);
        if ("facetfilter".equals(str) && str2 != null) {
            if (this.facetfilter == null) {
                this.facetfilter = new ArrayList<>();
            }
            this.facetfilter.addAll(Arrays.asList(str2.split(",")));
        } else if ("lexregexfilter".equals(str) && str2 != null) {
            this.lexregexfilter = str2;
        } else {
            if (!"option".equals(str) || str2 == null) {
                return;
            }
            this.option = str2;
        }
    }
}
