package org.wikibrain.parser.wiki;

import de.tudarmstadt.ukp.wikipedia.parser.Content;
import de.tudarmstadt.ukp.wikipedia.parser.Link;
import de.tudarmstadt.ukp.wikipedia.parser.Paragraph;
import de.tudarmstadt.ukp.wikipedia.parser.ParsedPage;
import de.tudarmstadt.ukp.wikipedia.parser.Section;
import de.tudarmstadt.ukp.wikipedia.parser.Template;
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser;
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LanguageInfo;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.core.model.NameSpace;
import org.wikibrain.core.model.RawPage;
import org.wikibrain.core.model.Title;
import org.wikibrain.parser.wiki.ParsedLink;

/* loaded from: input_file:org/wikibrain/parser/wiki/WikiTextParser.class */
public class WikiTextParser {
    private final MediaWikiParser jwpl;
    private final SubarticleParser subarticleParser;
    private final LanguageInfo lang;
    private final List<ParserVisitor> visitors;
    public static final Logger LOG = LoggerFactory.getLogger(WikiTextParser.class);
    private static Pattern illPattern = Pattern.compile("(.+?)\\:\\s*(.+)");

    public WikiTextParser(LanguageInfo languageInfo, List<ParserVisitor> list) {
        this(languageInfo, null, list);
    }

    public WikiTextParser(LanguageInfo languageInfo, LanguageSet languageSet, List<ParserVisitor> list) {
        this.lang = languageInfo;
        this.subarticleParser = new SubarticleParser(languageInfo);
        this.visitors = list;
        MediaWikiParserFactory mediaWikiParserFactory = new MediaWikiParserFactory();
        mediaWikiParserFactory.setCalculateSrcSpans(true);
        mediaWikiParserFactory.setCategoryIdentifers(languageInfo.getCategoryNames());
        if (languageSet != null) {
            mediaWikiParserFactory.setLanguageIdentifers(languageSet.getLangCodes());
        }
        this.jwpl = mediaWikiParserFactory.createParser();
    }

    public void parse(RawPage rawPage) throws WikiBrainException {
        visitBeginPage(rawPage);
        if (rawPage.isRedirect()) {
            ParsedRedirect parsedRedirect = new ParsedRedirect();
            parsedRedirect.location = new ParsedLocation(rawPage, -1, -1, -1);
            visitRedirect(parsedRedirect);
        } else {
            try {
                ParsedPage parse = this.jwpl.parse(rawPage.getBody());
                if (parse == null) {
                    LOG.debug("invalid page: " + rawPage.getBody());
                }
                if (rawPage.getNamespace() == NameSpace.CATEGORY) {
                    parseCategory(rawPage, parse);
                } else if (rawPage.getNamespace() == NameSpace.ARTICLE) {
                    parseArticle(rawPage, parse);
                }
            } catch (NullPointerException e) {
                visitParseError(rawPage, e);
            } catch (NoSuchElementException e2) {
                visitParseError(rawPage, e2);
            }
        }
        visitEndPage(rawPage);
    }

    private void parseArticle(RawPage rawPage, ParsedPage parsedPage) {
        ParsedLink.SubarticleType isSeeAlsoHeader;
        ParsedLink.SubarticleType isInlineSubarticle;
        int i = 0;
        int i2 = -parsedPage.getFirstParagraphNr();
        for (Section section : parsedPage.getSections()) {
            try {
                isSeeAlsoHeader = this.subarticleParser.isSeeAlsoHeader(this.lang, section.getTitle());
            } catch (WikiBrainException e) {
                LOG.error(String.format("Could not store whole section in %s", rawPage), e);
            }
            for (Content content : section.getContentList()) {
                for (Link link : content.getLinks()) {
                    if (link.getTarget().isEmpty()) {
                        LOG.debug("Found link with empty target: \t" + rawPage + "\t text=" + link.getText());
                    } else {
                        Title link2Title = link2Title(link);
                        if (link2Title != null && link2Title.getNamespace() == NameSpace.ARTICLE) {
                            if (isSeeAlsoHeader == null) {
                                try {
                                    isInlineSubarticle = this.subarticleParser.isInlineSubarticle(link.getSrcSpan().getStart(), rawPage);
                                } catch (WikiBrainException e2) {
                                    LOG.warn(String.format("Could not process link\t%s\t%s", rawPage, link.toString()), e2);
                                }
                            } else {
                                isInlineSubarticle = isSeeAlsoHeader;
                            }
                            visitLink(new ParsedLocation(rawPage, i, i2, link.getSrcSpan().getStart()), link2Title, link.getText(), isInlineSubarticle);
                        }
                    }
                }
                for (Template template : content.getTemplates()) {
                    String substring = !(template.getSrcSpan().getEnd() < 0) ? rawPage.getBody().substring(template.getSrcSpan().getStart(), template.getSrcSpan().getEnd()) : rawPage.getBody().substring(template.getSrcSpan().getStart(), template.getSrcSpan().getStart() + (template.getPos().getEnd() - template.getPos().getStart()) + 1);
                    if (substring.length() >= 5) {
                        String substring2 = substring.substring(2, substring.length() - 2);
                        ParsedLink.SubarticleType isTemplateSubarticle = this.subarticleParser.isTemplateSubarticle(new Title(template.getName(), false, this.lang).toString(), substring2);
                        if (isTemplateSubarticle == null) {
                            try {
                                substring2 = substring2.replaceAll("\\{\\{", "").replaceAll("\\}\\}", "").replaceAll("<!--", "").replaceAll("\\[\\[\\]\\]", "");
                                for (Link link2 : this.jwpl.parse(substring2).getLinks()) {
                                    Title link2Title2 = link2Title(link2);
                                    if (link2Title2 != null) {
                                        NameSpace namespace = link2Title2.getNamespace();
                                        if (namespace == NameSpace.ARTICLE) {
                                            visitLink(new ParsedLocation(rawPage, i, i2, template.getSrcSpan().getStart()), link2Title2, link2.getText(), isTemplateSubarticle);
                                        } else if (namespace == NameSpace.CATEGORY) {
                                            ParsedCategory parsedCategory = new ParsedCategory();
                                            parsedCategory.location = new ParsedLocation(rawPage, i, i2, template.getSrcSpan().getStart());
                                            parsedCategory.category = link2Title2;
                                            visitCategory(parsedCategory);
                                        }
                                    }
                                }
                            } catch (IndexOutOfBoundsException e3) {
                                LOG.error("Parsing error while doing templates -> ParsedPages:\t" + rawPage + "\t" + substring2);
                            }
                        } else {
                            Iterator<String> it = this.subarticleParser.getContentsOfTemplatePipe(substring2).iterator();
                            while (it.hasNext()) {
                                String removeTemplateAnchor = SubarticleParser.removeTemplateAnchor(it.next());
                                try {
                                    visitLink(new ParsedLocation(rawPage, i, i2, template.getSrcSpan().getStart()), new Title(removeTemplateAnchor, this.lang), removeTemplateAnchor, isTemplateSubarticle);
                                } catch (WikiBrainException e4) {
                                    LOG.error(String.format("Could not process template-based subarticle link: \t%s\t%s", rawPage, template.toString()), e4);
                                }
                            }
                        }
                        LOG.error(String.format("Could not store whole section in %s", rawPage), e);
                        i++;
                    }
                }
                if (content instanceof Paragraph) {
                    i2++;
                }
            }
            i++;
        }
        parseIlls(rawPage, parsedPage);
        for (Link link3 : parsedPage.getCategories()) {
            if (!link3.getText().contains(Pattern.quote("|"))) {
                Title title = new Title(link3.getTarget(), false, this.lang);
                ParsedCategory parsedCategory2 = new ParsedCategory();
                parsedCategory2.location = new ParsedLocation(rawPage, -1, -1, link3.getSrcSpan().getStart());
                parsedCategory2.category = title;
                visitCategory(parsedCategory2);
            }
        }
    }

    private void parseIlls(RawPage rawPage, ParsedPage parsedPage) {
        if (parsedPage.getLanguagesElement() != null) {
            for (Link link : parsedPage.getLanguages()) {
                try {
                    Matcher matcher = illPattern.matcher(link.getTarget());
                    if (matcher.find()) {
                        String group = matcher.group(1);
                        String group2 = matcher.group(2);
                        Language byLangCode = Language.getByLangCode(group);
                        if (byLangCode == null) {
                            LOG.warn("unkonwn lang code:\t" + group);
                        } else if (byLangCode != this.lang.getLanguage()) {
                            ParsedIll parsedIll = new ParsedIll();
                            parsedIll.location = new ParsedLocation(rawPage, -1, -1, link.getSrcSpan().getStart());
                            parsedIll.title = new Title(group2, false, LanguageInfo.getByLanguage(byLangCode));
                            visitIll(parsedIll);
                        }
                    } else {
                        LOG.debug("Invalid ILL:\t" + rawPage + "\t" + link.getTarget());
                    }
                } catch (Exception e) {
                    LOG.warn(String.format("Error while parsing/storing ILL\t%s\t%s\t%s", rawPage, link.toString().replaceAll("\n", ","), e.getMessage()));
                }
            }
        }
    }

    private void parseCategory(RawPage rawPage, ParsedPage parsedPage) {
        for (Link link : parsedPage.getCategories()) {
            Title title = new Title(link.getTarget(), this.lang);
            ParsedCategory parsedCategory = new ParsedCategory();
            parsedCategory.location = new ParsedLocation(rawPage, -1, -1, link.getSrcSpan().getStart());
            parsedCategory.category = title;
            visitCategory(parsedCategory);
        }
        parseIlls(rawPage, parsedPage);
    }

    private void visitBeginPage(RawPage rawPage) {
        Iterator<ParserVisitor> it = this.visitors.iterator();
        while (it.hasNext()) {
            try {
                it.next().beginPage(rawPage);
            } catch (WikiBrainException e) {
                LOG.warn("beginPage failed:", e);
            }
        }
    }

    private void visitEndPage(RawPage rawPage) {
        Iterator<ParserVisitor> it = this.visitors.iterator();
        while (it.hasNext()) {
            try {
                it.next().endPage(rawPage);
            } catch (WikiBrainException e) {
                LOG.warn("beginPage failed:", e);
            }
        }
    }

    private void visitRedirect(ParsedRedirect parsedRedirect) {
        Iterator<ParserVisitor> it = this.visitors.iterator();
        while (it.hasNext()) {
            try {
                it.next().redirect(parsedRedirect);
            } catch (WikiBrainException e) {
                LOG.warn("beginPage failed:", e);
            }
        }
    }

    private void visitParseError(RawPage rawPage, Exception exc) {
        Iterator<ParserVisitor> it = this.visitors.iterator();
        while (it.hasNext()) {
            it.next().parseError(rawPage, exc);
        }
    }

    private void visitIll(ParsedIll parsedIll) {
        Iterator<ParserVisitor> it = this.visitors.iterator();
        while (it.hasNext()) {
            try {
                it.next().ill(parsedIll);
            } catch (WikiBrainException e) {
                LOG.warn("beginPage failed:", e);
            }
        }
    }

    private void visitCategory(ParsedCategory parsedCategory) {
        Iterator<ParserVisitor> it = this.visitors.iterator();
        while (it.hasNext()) {
            try {
                it.next().category(parsedCategory);
            } catch (WikiBrainException e) {
            }
        }
    }

    private void visitLink(ParsedLocation parsedLocation, Title title, String str, ParsedLink.SubarticleType subarticleType) throws WikiBrainException {
        Title title2 = parsedLocation.getXml().getTitle();
        if (title2.toString().startsWith("#") || title2.equals(title)) {
            return;
        }
        ParsedLink parsedLink = new ParsedLink();
        parsedLink.location = parsedLocation;
        parsedLink.target = title;
        parsedLink.text = str;
        parsedLink.subarticleType = subarticleType;
        Iterator<ParserVisitor> it = this.visitors.iterator();
        while (it.hasNext()) {
            try {
                it.next().link(parsedLink);
            } catch (WikiBrainException e) {
                LOG.warn("beginPage failed:", e);
            }
        }
    }

    private NameSpace getLinkType(Link link) {
        Title link2Title = link2Title(link);
        if (link2Title == null) {
            return null;
        }
        return link2Title.getNamespace();
    }

    private Title link2Title(Link link) {
        if (link.getType().equals(Link.type.INTERNAL) || link.getType().equals(Link.type.UNKNOWN)) {
            return new Title(link.getTarget(), this.lang);
        }
        return null;
    }

    public static List<String> getLangCodes(List<LanguageInfo> list) {
        ArrayList arrayList = new ArrayList();
        Iterator<LanguageInfo> it = list.iterator();
        while (it.hasNext()) {
            arrayList.add(it.next().getLanguage().getLangCode());
        }
        return arrayList;
    }
}
