package org.fbk.cit.hlt.thewikimachine.xmldump;

import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.fbk.cit.hlt.thewikimachine.ExtractorParameters;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/WikipediaCrossLanguageLinkExtractor.class */
public class WikipediaCrossLanguageLinkExtractor extends AbstractWikipediaExtractor implements WikipediaExtractor {
    static Logger logger = Logger.getLogger(WikipediaCrossLanguageLinkExtractor.class.getName());
    private PrintWriter crossLanguageWriter;
    protected Pattern crossLanguagePattern;

    public WikipediaCrossLanguageLinkExtractor(int i, int i2, Locale locale) {
        super(i, i2, locale);
        this.crossLanguagePattern = Pattern.compile("\\[\\[(\\w\\w:[^\\]]+)\\]\\]");
        logger.info("crossLanguagePattern: " + this.crossLanguagePattern);
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void start(ExtractorParameters extractorParameters) {
        try {
            this.crossLanguageWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(extractorParameters.getWikipediaCrossLanguageLinkFileName()), "UTF-8")));
        } catch (IOException e) {
            logger.error(e);
        }
        startProcess(extractorParameters.getWikipediaXmlFileName());
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void filePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void disambiguationPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void categoryPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void templatePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void redirectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void portalPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void projectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void contentPage(String str, String str2, int i) {
        StringBuilder sb = new StringBuilder();
        Matcher matcher = this.crossLanguagePattern.matcher(str);
        sb.append(str2);
        while (matcher.find()) {
            String replace = str.substring(matcher.start(1), matcher.end(1)).replace(' ', '_');
            sb.append('\t');
            sb.append(replace);
        }
        synchronized (this) {
            this.crossLanguageWriter.println(sb.toString());
        }
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaExtractor, org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaXmlDumpParser
    public void endProcess() {
        super.endProcess();
        this.crossLanguageWriter.flush();
        this.crossLanguageWriter.close();
    }
}
