/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.river.wikipedia.support;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.elasticsearch.river.wikipedia.support.InfoBox;

public class WikiTextParser {
    private String wikiText = null;
    private ArrayList<String> pageCats = null;
    private ArrayList<String> pageLinks = null;
    private boolean redirect = false;
    private String redirectString = null;
    private static Pattern redirectPattern = Pattern.compile("#REDIRECT\\s+\\[\\[(.*?)\\]\\]");
    private boolean stub = false;
    private boolean disambiguation = false;
    private static Pattern stubPattern = Pattern.compile("\\-stub\\}\\}");
    private static Pattern disambCatPattern = Pattern.compile("\\{\\{disambig\\}\\}");
    private InfoBox infoBox = null;

    public WikiTextParser(String wtext) {
        this.wikiText = wtext;
        Matcher matcher = redirectPattern.matcher(this.wikiText);
        if (matcher.find()) {
            this.redirect = true;
            if (matcher.groupCount() == 1) {
                this.redirectString = matcher.group(1);
            }
        }
        matcher = stubPattern.matcher(this.wikiText);
        this.stub = matcher.find();
        matcher = disambCatPattern.matcher(this.wikiText);
        this.disambiguation = matcher.find();
    }

    public boolean isRedirect() {
        return this.redirect;
    }

    public boolean isStub() {
        return this.stub;
    }

    public String getRedirectText() {
        return this.redirectString;
    }

    public String getText() {
        return this.wikiText;
    }

    public ArrayList<String> getCategories() {
        if (this.pageCats == null) {
            this.parseCategories();
        }
        return this.pageCats;
    }

    public ArrayList<String> getLinks() {
        if (this.pageLinks == null) {
            this.parseLinks();
        }
        return this.pageLinks;
    }

    private void parseCategories() {
        this.pageCats = new ArrayList();
        Pattern catPattern = Pattern.compile("\\[\\[Category:(.*?)\\]\\]", 8);
        Matcher matcher = catPattern.matcher(this.wikiText);
        while (matcher.find()) {
            String[] temp = matcher.group(1).split("\\|");
            this.pageCats.add(temp[0]);
        }
    }

    private void parseLinks() {
        this.pageLinks = new ArrayList();
        Pattern catPattern = Pattern.compile("\\[\\[(.*?)\\]\\]", 8);
        Matcher matcher = catPattern.matcher(this.wikiText);
        while (matcher.find()) {
            String link;
            String[] temp = matcher.group(1).split("\\|");
            if (temp == null || temp.length == 0 || (link = temp[0]).contains(":")) continue;
            this.pageLinks.add(link);
        }
    }

    public String getPlainText() {
        String text = this.wikiText.replaceAll("&gt;", ">");
        text = text.replaceAll("&lt;", "<");
        text = text.replaceAll("<ref>.*?</ref>", " ");
        text = text.replaceAll("</?.*?>", " ");
        text = text.replaceAll("\\{\\{.*?\\}\\}", " ");
        text = text.replaceAll("\\[\\[.*?:.*?\\]\\]", " ");
        text = text.replaceAll("\\[\\[(.*?)\\]\\]", "$1");
        text = text.replaceAll("\\s(.*?)\\|(\\w+\\s)", " $2");
        text = text.replaceAll("\\[.*?\\]", " ");
        text = text.replaceAll("\\'+", "");
        return text;
    }

    public InfoBox getInfoBox() {
        if (this.infoBox == null) {
            this.infoBox = this.parseInfoBox();
        }
        return this.infoBox;
    }

    private InfoBox parseInfoBox() {
        int endPos;
        String INFOBOX_CONST_STR = "{{Infobox";
        int startPos = this.wikiText.indexOf(INFOBOX_CONST_STR);
        if (startPos < 0) {
            return null;
        }
        int bracketCount = 2;
        for (endPos = startPos + INFOBOX_CONST_STR.length(); endPos < this.wikiText.length(); ++endPos) {
            switch (this.wikiText.charAt(endPos)) {
                case '}': {
                    --bracketCount;
                    break;
                }
                case '{': {
                    ++bracketCount;
                    break;
                }
            }
            if (bracketCount == 0) break;
        }
        String infoBoxText = this.wikiText.substring(startPos, endPos + 1);
        infoBoxText = this.stripCite(infoBoxText);
        infoBoxText = infoBoxText.replaceAll("&gt;", ">");
        infoBoxText = infoBoxText.replaceAll("&lt;", "<");
        infoBoxText = infoBoxText.replaceAll("<ref.*?>.*?</ref>", " ");
        infoBoxText = infoBoxText.replaceAll("</?.*?>", " ");
        return new InfoBox(infoBoxText);
    }

    private String stripCite(String text) {
        int endPos;
        String CITE_CONST_STR = "{{cite";
        int startPos = text.indexOf(CITE_CONST_STR);
        if (startPos < 0) {
            return text;
        }
        int bracketCount = 2;
        for (endPos = startPos + CITE_CONST_STR.length(); endPos < text.length(); ++endPos) {
            switch (text.charAt(endPos)) {
                case '}': {
                    --bracketCount;
                    break;
                }
                case '{': {
                    ++bracketCount;
                    break;
                }
            }
            if (bracketCount == 0) break;
        }
        text = text.substring(0, startPos - 1) + text.substring(endPos);
        return this.stripCite(text);
    }

    public boolean isDisambiguationPage() {
        return this.disambiguation;
    }

    public String getTranslatedTitle(String languageCode) {
        Pattern pattern = Pattern.compile("^\\[\\[" + languageCode + ":(.*?)\\]\\]$", 8);
        Matcher matcher = pattern.matcher(this.wikiText);
        if (matcher.find()) {
            return matcher.group(1);
        }
        return null;
    }
}

