package org.languagetool.dev.wikipedia;

import java.util.Date;
import java.util.List;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.rules.RuleMatch;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/languagetool/dev/wikipedia/BaseWikipediaDumpHandler.class */
abstract class BaseWikipediaDumpHandler extends DefaultHandler {
    protected static final int CONTEXT_SIZE = 50;
    protected static final String MARKER_START = "<err>";
    protected static final String MARKER_END = "</err>";
    protected static final String LANG_MARKER = "XX";
    protected static final String URL_PREFIX = "http://XX.wikipedia.org/wiki/";
    protected Date dumpDate;
    protected String langCode;
    private final JLanguageTool languageTool;
    private Location location;
    private TextMapFilter textFilter;
    protected int maxErrors = 0;
    protected int errorCount = 0;
    private int ruleMatchCount = 0;
    private int maxArticles = 0;
    private int articleCount = 0;
    private StringBuilder title = new StringBuilder();
    private StringBuilder text = new StringBuilder();

    /* loaded from: input_file:org/languagetool/dev/wikipedia/BaseWikipediaDumpHandler$Location.class */
    private enum Location {
        TITLE,
        TEXT,
        OTHER
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public BaseWikipediaDumpHandler(JLanguageTool jLanguageTool, Date date, String str, Language language) {
        this.textFilter = new SwebleWikipediaTextFilter();
        this.languageTool = jLanguageTool;
        this.dumpDate = date;
        this.langCode = str;
        this.textFilter = TextFilterTools.getTextFilter(language);
    }

    public void setMaximumArticles(int i) {
        this.maxArticles = i;
    }

    public void setMaximumErrors(int i) {
        this.maxErrors = i;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int getArticleCount() {
        return this.articleCount;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public int getRuleMatchCount() {
        return this.ruleMatchCount;
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        if (str3.equals("title")) {
            this.title = new StringBuilder();
            this.location = Location.TITLE;
        } else if (str3.equals("text")) {
            this.text = new StringBuilder();
            this.location = Location.TEXT;
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) {
        if (str3.equals("title")) {
            this.location = Location.OTHER;
            return;
        }
        if (str3.equals("text")) {
            try {
                handleEndText();
            } catch (ArticleLimitReachedException | ErrorLimitReachedException e) {
                throw e;
            } catch (Exception e2) {
                System.err.println("Error checking text of '" + ((Object) this.title) + "', ignoring document. Stacktrace:");
                e2.printStackTrace();
            }
            this.text = new StringBuilder();
            this.location = Location.OTHER;
        }
    }

    private void handleEndText() {
        String plainText = this.textFilter.filter(this.text.toString()).getPlainText();
        if (plainText.contains("#REDIRECT")) {
            return;
        }
        this.articleCount++;
        if (this.maxArticles > 0 && this.articleCount > this.maxArticles) {
            throw new ArticleLimitReachedException(this.maxArticles);
        }
        try {
            List<RuleMatch> check = this.languageTool.check(plainText);
            this.ruleMatchCount += check.size();
            System.out.println("Checking article " + this.articleCount + " (" + (plainText.length() / 1024) + "KB, '" + ((Object) this.title) + "'), found " + check.size() + " matches");
            handleResult(this.title.toString(), check, plainText, this.languageTool.getLanguage());
        } catch (ErrorLimitReachedException e) {
            throw e;
        } catch (Exception e2) {
            throw new RuntimeException("Error checking '" + ((Object) this.title) + "' (" + this.articleCount + ")", e2);
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) {
        String str = new String(cArr, i, i2);
        if (this.location == Location.TITLE) {
            this.title.append(str);
        } else if (this.location == Location.TEXT) {
            this.text.append(str);
        }
    }

    protected abstract void handleResult(String str, List<RuleMatch> list, String str2, Language language) throws Exception;

    /* JADX INFO: Access modifiers changed from: protected */
    public abstract void close();
}
