package org.fbk.cit.hlt.thewikimachine.xmldump;

import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Locale;
import java.util.regex.Matcher;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.ExtractorParameters;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/WikipediaPageCategoryMainExtractor.class */
public class WikipediaPageCategoryMainExtractor extends AbstractWikipediaExtractor implements WikipediaExtractor {
    static Logger logger = Logger.getLogger(WikipediaPageCategoryMainExtractor.class.getName());
    private PrintWriter pageCategoryWriter;
    private boolean delCatLabel;

    public WikipediaPageCategoryMainExtractor(int i, int i2, Locale locale, boolean z) {
        super(i, i2, locale);
        this.delCatLabel = z;
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void start(ExtractorParameters extractorParameters) {
        try {
            this.pageCategoryWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(extractorParameters.getWikipediaPageCategoryMainFileName()), "UTF-8")));
        } catch (IOException e) {
            logger.error(e);
        }
        startProcess(extractorParameters.getWikipediaXmlFileName());
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void filePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void disambiguationPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void categoryPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void templatePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void redirectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void portalPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void projectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void contentPage(String str, String str2, int i) {
        Matcher matcher = this.categoryMainPattern.matcher(str);
        StringBuilder sb = new StringBuilder();
        int i2 = this.delCatLabel ? 2 : 1;
        while (matcher.find()) {
            String replace = str.substring(matcher.start(i2), matcher.end(i2)).replace(' ', '_');
            sb.append(str2);
            sb.append('\t');
            sb.append(normalizePageName(replace));
            sb.append('\n');
        }
        synchronized (this) {
            this.pageCategoryWriter.print(sb);
        }
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaExtractor, org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaXmlDumpParser
    public void endProcess() {
        super.endProcess();
        this.pageCategoryWriter.flush();
        this.pageCategoryWriter.close();
    }

    public static void main(String[] strArr) {
        String str = strArr[0];
        String str2 = strArr[1];
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        ExtractorParameters extractorParameters = new ExtractorParameters(str, str2, true);
        new WikipediaPageCategoryMainExtractor(12, Integer.MAX_VALUE, extractorParameters.getLocale(), true).start(extractorParameters);
    }
}
