package org.fbk.cit.hlt.thewikimachine.xmldump;

import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.SortedMap;
import org.apache.log4j.Logger;
import org.fbk.cit.hlt.thewikimachine.ExtractorParameters;
import org.fbk.cit.hlt.thewikimachine.index.FirstNameIndexer;
import org.fbk.cit.hlt.thewikimachine.util.FreqSet;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.WikiTemplate;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.WikiTemplateParser;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/WikipediaTemplateExtractor.class */
public class WikipediaTemplateExtractor extends AbstractWikipediaExtractor implements WikipediaExtractor {
    static Logger logger = Logger.getLogger(WikipediaTemplateExtractor.class.getName());
    private PrintWriter templateNameWriter;
    private PrintWriter templateFreqWriter;
    private PrintWriter templateMapWriter;
    private PrintWriter templateMapWriterWithRepetitions;
    private PrintWriter templateMapWriterProp;
    private FreqSet templateFreqSet;

    public WikipediaTemplateExtractor(int i, int i2, Locale locale) {
        super(i, i2, locale);
        this.templateFreqSet = new FreqSet();
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void start(ExtractorParameters extractorParameters) {
        try {
            this.templateNameWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(extractorParameters.getWikipediaTemplateFileNames().get(FirstNameIndexer.NAME_FIELD_NAME)), "UTF-8")));
            this.templateFreqWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(extractorParameters.getWikipediaTemplateFileNames().get("freq")), "UTF-8")));
            this.templateMapWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(extractorParameters.getWikipediaTemplateFileNames().get("map")), "UTF-8")));
            this.templateMapWriterWithRepetitions = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(extractorParameters.getWikipediaTemplateFileNames().get("map-rep")), "UTF-8")));
            this.templateMapWriterProp = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(extractorParameters.getWikipediaTemplateFileNames().get("map-prop")), "UTF-8")));
        } catch (IOException e) {
            logger.error(e);
        }
        startProcess(extractorParameters.getWikipediaXmlFileName());
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void filePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void disambiguationPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void categoryPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void redirectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void portalPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void projectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void templatePage(String str, String str2, int i) {
        synchronized (this) {
            this.templateNameWriter.println(str2);
        }
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void contentPage(String str, String str2, int i) {
        ArrayList<WikiTemplate> parse = WikiTemplateParser.parse(str, false);
        HashSet hashSet = new HashSet();
        HashSet hashSet2 = new HashSet();
        StringBuffer stringBuffer = new StringBuffer();
        StringBuffer stringBuffer2 = new StringBuffer();
        StringBuffer stringBuffer3 = new StringBuffer();
        int i2 = 0;
        Iterator<WikiTemplate> it = parse.iterator();
        while (it.hasNext()) {
            WikiTemplate next = it.next();
            Set<String> keySet = next.getHashMapOfParts().keySet();
            String firstPart = next.getFirstPart();
            if (firstPart != null && firstPart.length() != 0 && !firstPart.startsWith("#")) {
                String replace = normalizePageName(firstPart.trim()).replace(' ', '_');
                if (!hashSet.contains(replace)) {
                    stringBuffer.append(str2 + StringTable.HORIZONTAL_TABULATION + replace + StringTable.HORIZONTAL_TABULATION + i2 + StringTable.HORIZONTAL_TABULATION + i).append("\n");
                    synchronized (this) {
                        this.templateFreqSet.add(replace);
                    }
                    hashSet.add(replace);
                    i2++;
                }
                stringBuffer2.append(str2 + StringTable.HORIZONTAL_TABULATION + replace + StringTable.HORIZONTAL_TABULATION + next.getPartsCount() + StringTable.HORIZONTAL_TABULATION + next.getNlCount() + StringTable.HORIZONTAL_TABULATION + next.getKeyValueParts()).append("\n");
                for (String str3 : keySet) {
                    String str4 = replace + ";" + str3;
                    if (!hashSet2.contains(str4)) {
                        stringBuffer3.append(str2 + StringTable.HORIZONTAL_TABULATION + replace + StringTable.HORIZONTAL_TABULATION + str3).append("\n");
                        hashSet2.add(str4);
                    }
                }
            }
        }
        synchronized (this) {
            this.templateMapWriter.print(stringBuffer);
            this.templateMapWriterWithRepetitions.print(stringBuffer2);
            this.templateMapWriterProp.print(stringBuffer3);
        }
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaExtractor, org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaXmlDumpParser
    public void endProcess() {
        super.endProcess();
        SortedMap<Integer, List<String>> sortedMap = this.templateFreqSet.toSortedMap();
        int i = 0;
        for (Integer num : sortedMap.keySet()) {
            Iterator<String> it = sortedMap.get(num).iterator();
            while (it.hasNext()) {
                this.templateFreqWriter.println(num + StringTable.HORIZONTAL_TABULATION + it.next());
                i += num.intValue();
            }
        }
        logger.info(i + " pages with at least one template");
        this.templateFreqWriter.println(i + " pages with at least one template");
        this.templateNameWriter.flush();
        this.templateNameWriter.close();
        this.templateFreqWriter.flush();
        this.templateFreqWriter.close();
        this.templateMapWriter.flush();
        this.templateMapWriter.close();
        this.templateMapWriterWithRepetitions.flush();
        this.templateMapWriterWithRepetitions.close();
        this.templateMapWriterProp.flush();
        this.templateMapWriterProp.close();
    }
}
