package org.languagetool.dev.wikipedia;

import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.SAXParserFactory;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.MultiThreadedJLanguageTool;
import org.languagetool.language.German;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.tools.ContextTools;
import org.languagetool.tools.StringTools;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/languagetool/dev/wikipedia/WikipediaQuickCheck.class */
public class WikipediaQuickCheck {
    private static final Pattern WIKIPEDIA_URL_REGEX = Pattern.compile("https?://(..)\\.wikipedia\\.org/wiki/(.*)");
    private static final Pattern SECURE_WIKIPEDIA_URL_REGEX = Pattern.compile("https://secure\\.wikimedia\\.org/wikipedia/(..)/wiki/(.*)");
    private static final int CONTEXT_SIZE = 25;
    private List<String> disabledRuleIds = new ArrayList();

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/languagetool/dev/wikipedia/WikipediaQuickCheck$RevisionContentHandler.class */
    public class RevisionContentHandler extends DefaultHandler {
        private String timestamp;
        private final StringBuilder revisionText = new StringBuilder();
        private boolean inRevision = false;

        RevisionContentHandler() {
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            if ("rev".equals(str3)) {
                this.timestamp = attributes.getValue("timestamp");
                this.inRevision = true;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void endElement(String str, String str2, String str3) throws SAXException {
            if ("rev".equals(str3)) {
                this.inRevision = false;
            }
        }

        @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void characters(char[] cArr, int i, int i2) {
            String str = new String(cArr, i, i2);
            if (this.inRevision) {
                this.revisionText.append(str);
            }
        }

        public String getRevisionContent() {
            return this.revisionText.toString();
        }

        public String getTimestamp() {
            return this.timestamp;
        }
    }

    public String getMediaWikiContent(URL url) throws IOException {
        return getContent(new URL("http://" + getLanguage(url).getShortName() + ".wikipedia.org/w/api.php?titles=" + getPageTitle(url) + "&action=query&prop=revisions&rvprop=content|timestamp&format=xml"));
    }

    public Language getLanguage(URL url) {
        return Language.getLanguageForShortName(getUrlMatcher(url.toString()).group(1));
    }

    public String getPageTitle(URL url) {
        return getUrlMatcher(url.toString()).group(2);
    }

    private Matcher getUrlMatcher(String str) {
        Matcher matcher = WIKIPEDIA_URL_REGEX.matcher(str);
        Matcher matcher2 = SECURE_WIKIPEDIA_URL_REGEX.matcher(str);
        if (matcher.matches()) {
            return matcher;
        }
        if (matcher2.matches()) {
            return matcher2;
        }
        throw new RuntimeException("URL does not seem to be a valid Wikipedia URL: " + str);
    }

    public void setDisabledRuleIds(List<String> list) {
        this.disabledRuleIds = list;
    }

    public List<String> getDisabledRuleIds() {
        return this.disabledRuleIds;
    }

    public MarkupAwareWikipediaResult checkPage(URL url) throws IOException {
        validateWikipediaUrl(url);
        return checkWikipediaMarkup(url, getRevisionContent(new WikipediaQuickCheck().getMediaWikiContent(url)), getLanguage(url));
    }

    MarkupAwareWikipediaResult checkWikipediaMarkup(URL url, MediaWikiContent mediaWikiContent, Language language) throws IOException {
        PlainTextMapping filter = new SwebleWikipediaTextFilter().filter(mediaWikiContent.getContent());
        JLanguageTool languageTool = getLanguageTool(language);
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (RuleMatch ruleMatch : languageTool.check(filter.getPlainText())) {
            try {
                arrayList.add(new AppliedRuleMatch(ruleMatch, new SuggestionReplacer(filter, mediaWikiContent.getContent()).applySuggestionsToOriginalText(ruleMatch)));
            } catch (Exception e) {
                System.err.println("Failed to apply suggestion for rule match '" + ruleMatch + "' for URL " + url + ": " + e.toString());
                i++;
            }
        }
        return new MarkupAwareWikipediaResult(mediaWikiContent, arrayList, i);
    }

    public WikipediaQuickCheckResult checkPage(String str, Language language) throws IOException {
        return new WikipediaQuickCheckResult(str, getLanguageTool(language).check(str), language.getShortName());
    }

    public void validateWikipediaUrl(URL url) {
        getUrlMatcher(url.toString());
    }

    public String getPlainText(String str) {
        return new SwebleWikipediaTextFilter().filter(removeInterLanguageLinks(getRevisionContent(str).getContent())).getPlainText();
    }

    public PlainTextMapping getPlainTextMapping(String str) {
        return new SwebleWikipediaTextFilter().filter(getRevisionContent(str).getContent());
    }

    String removeInterLanguageLinks(String str) {
        return str.replaceAll("\\[\\[[a-z]{2,6}:.*?\\]\\]", "");
    }

    private MediaWikiContent getRevisionContent(String str) {
        SAXParserFactory newInstance = SAXParserFactory.newInstance();
        RevisionContentHandler revisionContentHandler = new RevisionContentHandler();
        try {
            newInstance.newSAXParser().parse(new InputSource(new StringReader(str)), revisionContentHandler);
            return new MediaWikiContent(revisionContentHandler.getRevisionContent(), revisionContentHandler.getTimestamp());
        } catch (Exception e) {
            throw new RuntimeException("Could not parse XML: " + str, e);
        }
    }

    private JLanguageTool getLanguageTool(Language language) throws IOException {
        MultiThreadedJLanguageTool multiThreadedJLanguageTool = new MultiThreadedJLanguageTool(language);
        multiThreadedJLanguageTool.activateDefaultPatternRules();
        Iterator<String> it = this.disabledRuleIds.iterator();
        while (it.hasNext()) {
            multiThreadedJLanguageTool.disableRule(it.next());
        }
        disableSpellingRules(multiThreadedJLanguageTool);
        return multiThreadedJLanguageTool;
    }

    private void disableSpellingRules(JLanguageTool jLanguageTool) {
        for (Rule rule : jLanguageTool.getAllActiveRules()) {
            if (rule.isSpellingRule()) {
                jLanguageTool.disableRule(rule.getId());
            }
        }
    }

    private String getContent(URL url) throws IOException {
        return StringTools.streamToString((InputStream) url.getContent(), "UTF-8");
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length != 1) {
            System.out.println("Usage: " + WikipediaQuickCheck.class.getName() + " <url>");
            System.exit(1);
        }
        WikipediaQuickCheck wikipediaQuickCheck = new WikipediaQuickCheck();
        WikipediaQuickCheckResult checkPage = wikipediaQuickCheck.checkPage(wikipediaQuickCheck.getPlainText(wikipediaQuickCheck.getMediaWikiContent(new URL(strArr[0]))), new German());
        ContextTools contextTools = new ContextTools();
        contextTools.setContextSize(CONTEXT_SIZE);
        for (RuleMatch ruleMatch : checkPage.getRuleMatches()) {
            System.out.println(ruleMatch.getMessage());
            System.out.println(contextTools.getPlainTextContext(ruleMatch.getFromPos(), ruleMatch.getToPos(), checkPage.getText()));
        }
    }
}
