package nlp4j.sudachi;

import com.worksap.nlp.sudachi.DictionaryFactory;
import com.worksap.nlp.sudachi.Morpheme;
import com.worksap.nlp.sudachi.Tokenizer;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import nlp4j.AbstractDocumentAnnotator;
import nlp4j.Document;
import nlp4j.DocumentAnnotator;
import nlp4j.Keyword;
import nlp4j.KeywordBuilder;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:nlp4j/sudachi/SudachiAnnotator.class */
public class SudachiAnnotator extends AbstractDocumentAnnotator implements DocumentAnnotator, Closeable {
    private Tokenizer.SplitMode mode = Tokenizer.SplitMode.C;
    private Tokenizer tokenizer = null;
    private String posRegex;
    private static final Logger logger = LogManager.getLogger(SudachiAnnotator.class);
    public static String DEFAULT_FULLDIC = "system_full.dic";

    public void setProperty(String str, String str2) {
        super.setProperty(str, str2);
        if ("systemDict".equals(str)) {
            initTokenizer(str2);
            return;
        }
        if ("pos".equals(str)) {
            this.posRegex = str2;
            return;
        }
        if ("mode".equals(str)) {
            if ("A".equals(str2)) {
                this.mode = Tokenizer.SplitMode.A;
            } else if ("B".equals(str2)) {
                this.mode = Tokenizer.SplitMode.B;
            } else if ("C".equals(str2)) {
                this.mode = Tokenizer.SplitMode.C;
            }
        }
    }

    private void initTokenizer(String str) {
        if (!new File(str).exists()) {
            logger.info("File not found: " + new File(str).getAbsolutePath());
            return;
        }
        try {
            this.tokenizer = new DictionaryFactory().create((String) null, "{\"systemDict\":\"" + str + "\"}", true).create();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public void annotate(Document document) throws Exception {
        Iterator it = ((AbstractDocumentAnnotator) this).targets.iterator();
        while (it.hasNext()) {
            int i = -1;
            Iterator it2 = this.tokenizer.tokenizeSentences(this.mode, document.getAttributeAsString((String) it.next())).iterator();
            while (it2.hasNext()) {
                i++;
                for (Morpheme morpheme : (List) it2.next()) {
                    logger.debug("" + morpheme.begin() + "\t" + morpheme.end() + "\t" + morpheme.surface() + "\t" + String.join("-", morpheme.partOfSpeech()) + "," + morpheme.dictionaryForm() + "," + morpheme.readingForm() + "," + morpheme.normalizedForm());
                    String str = (String) morpheme.partOfSpeech().get(0);
                    String join = String.join("-", morpheme.partOfSpeech());
                    if (join.startsWith("名詞-固有名詞")) {
                        str = "固有名詞";
                    }
                    Keyword build = new KeywordBuilder().begin(morpheme.begin()).end(morpheme.end()).str(morpheme.surface()).facet(str).facet2(join).lex(morpheme.dictionaryForm()).reading(morpheme.readingForm()).sentenceIndex(i).build();
                    if (this.posRegex == null || build.getFacet().matches(this.posRegex)) {
                        document.addKeyword(build);
                    }
                }
            }
        }
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
    }
}
