package org.fbk.cit.hlt.thewikimachine.xmldump;

import de.tudarmstadt.ukp.wikipedia.parser.Section;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.Locale;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.fbk.cit.hlt.thewikimachine.ExtractorParameters;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.WikiMarkupParser;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/WikipediaSectionTitleExtractor.class */
public class WikipediaSectionTitleExtractor extends AbstractWikipediaExtractor implements WikipediaExtractor {
    static Logger logger = Logger.getLogger(WikipediaSectionTitleExtractor.class.getName());
    private PrintWriter sectionTitleWriter;
    private Pattern sectionTitleSkipPattern;

    public WikipediaSectionTitleExtractor(int i, int i2, Locale locale) {
        super(i, i2, locale);
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void start(ExtractorParameters extractorParameters) {
        try {
            this.sectionTitleWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(extractorParameters.getWikipediaSectionTitleFilePrefixName()), "UTF-8")));
        } catch (IOException e) {
            logger.error(e);
        }
        if (this.resources.getString("SECTION_TITLE_SKIP_PATTERN") != null) {
            this.sectionTitleSkipPattern = Pattern.compile(this.resources.getString("SECTION_TITLE_SKIP_PATTERN"), 2);
        }
        startProcess(extractorParameters.getWikipediaXmlFileName());
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void filePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void categoryPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void contentPage(String str, String str2, int i) {
        try {
            String sectionTitle = sectionTitle(str, str2);
            synchronized (this) {
                this.sectionTitleWriter.print(sectionTitle);
            }
        } catch (IOException e) {
            logger.error(e);
        }
    }

    public String sectionTitle(String str, String str2) throws IOException {
        logger.debug(str2);
        StringBuilder sb = new StringBuilder();
        Iterator<Section> it = WikiMarkupParser.getInstance().parsePage(str).getSections().iterator();
        while (it.hasNext()) {
            String title = it.next().getTitle();
            if (title != null && title.trim().length() > 0 && (this.sectionTitleSkipPattern == null || !this.sectionTitleSkipPattern.matcher(title).find())) {
                sb.append(str2);
                sb.append('\t');
                sb.append(title);
                sb.append('\n');
            }
        }
        return sb.toString();
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void disambiguationPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void templatePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void redirectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void portalPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void projectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaExtractor, org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaXmlDumpParser
    public void endProcess() {
        super.endProcess();
        this.sectionTitleWriter.flush();
        this.sectionTitleWriter.close();
    }
}
