package nlp4j.wiki;

import info.bliki.wiki.tags.WPATag;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import nlp4j.AbstractDocumentAnnotator;
import nlp4j.Document;
import nlp4j.DocumentAnnotator;
import nlp4j.FieldAnnotator;
import nlp4j.Keyword;
import nlp4j.impl.DefaultKeyword;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;

/* loaded from: input_file:nlp4j/wiki/WikiDocumentAnnotator.class */
public class WikiDocumentAnnotator extends AbstractDocumentAnnotator implements DocumentAnnotator, FieldAnnotator {
    private static Logger logger = LogManager.getLogger(MethodHandles.lookup().lookupClass());
    List<String> paths = new ArrayList();

    @Override // nlp4j.AbstractDocumentAnnotator, nlp4j.DocumentAnnotator
    public void setProperty(String str, String str2) {
        super.setProperty(str, str2);
        if ("paths".equals(str)) {
            this.paths.addAll(Arrays.asList(str2.split(",")));
        }
    }

    @Override // nlp4j.DocumentAnnotator
    public void annotate(Document document) throws Exception {
        String attributeAsString = document.getAttributeAsString("wikitext");
        if (attributeAsString == null || attributeAsString.isEmpty()) {
            logger.debug("wikitext is empty");
            return;
        }
        WikiItemTextParser wikiItemTextParser = new WikiItemTextParser();
        wikiItemTextParser.parse(attributeAsString);
        WikiPageNode root = wikiItemTextParser.getRoot();
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = this.paths.iterator();
        while (it.hasNext()) {
            for (WikiPageNode wikiPageNode : root.get(it.next())) {
                if (wikiPageNode != null) {
                    String[] spells = wikiPageNode.getSpells();
                    if (spells != null && spells.length > 0) {
                        arrayList.add(String.join(",", spells));
                    }
                    for (String str : wikiPageNode.getText().split(StringUtils.LF)) {
                        if (str.startsWith("#") && !str.startsWith("#*")) {
                            document.addKeywords(extractKeywordsFromWikiHtml(WikiUtils.toHtml(str)));
                            String plainText = WikiUtils.toPlainText(str, "");
                            if (!arrayList.contains("# " + plainText)) {
                                arrayList.add("# " + plainText);
                            }
                        } else if (!str.startsWith("#*")) {
                        }
                    }
                }
            }
        }
        if (arrayList == null || arrayList.size() <= 0) {
            return;
        }
        document.putAttribute("text", String.join(StringUtils.LF, arrayList));
    }

    private static List<Keyword> extractKeywordsFromWikiHtml(String str) {
        ArrayList arrayList = new ArrayList();
        Elements select = Jsoup.parse(str).select("ol > li");
        logger.debug("elements.size(): " + select.size());
        if (select.size() > 1) {
            logger.debug("elements.size(): " + select.size());
        }
        if (select.size() > 0) {
            String text = select.get(0).text();
            if (text.indexOf("。") != -1) {
                String substring = text.substring(0, text.indexOf("。"));
                logger.debug("text0: " + text);
                logger.debug("text1: " + substring);
            }
            Elements select2 = select.get(0).select("a[title]");
            for (int i = 0; i < select2.size(); i++) {
                String attr = select2.get(i).attr(WPATag.TITLE);
                String text2 = select2.get(i).text();
                if (!attr.startsWith("Template")) {
                    DefaultKeyword defaultKeyword = new DefaultKeyword();
                    defaultKeyword.setLex(attr);
                    defaultKeyword.setStr(text2);
                    defaultKeyword.setFacet("wiki.link");
                    arrayList.add(defaultKeyword);
                }
            }
        }
        return arrayList;
    }
}
