package org.fbk.cit.hlt.thewikimachine.xmldump;

import java.io.File;
import java.io.IOException;
import java.util.Locale;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.fbk.cit.hlt.thewikimachine.ExtractorParameters;
import org.fbk.cit.hlt.thewikimachine.util.GenericFileUtils;
import org.fbk.cit.hlt.thewikimachine.wikipedia.StatisticsIndexer;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/xmldump/WikipediaCategoryContentExtractor.class */
public class WikipediaCategoryContentExtractor extends AbstractWikipediaExtractor implements WikipediaExtractor {
    static Logger logger = Logger.getLogger(WikipediaCategoryContentExtractor.class.getName());
    private IndexWriter categoryWriter;

    public WikipediaCategoryContentExtractor(int i, int i2, Locale locale) {
        super(i, i2, locale, "configuration/");
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void start(ExtractorParameters extractorParameters) {
        try {
            this.categoryWriter = new IndexWriter(FSDirectory.open(new File(GenericFileUtils.checkWriteableFolder(extractorParameters.getWikipediaPageCategoryXmlIndex(), true))), new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
        } catch (IOException e) {
            e.printStackTrace();
            logger.error(e);
        }
        startProcess(extractorParameters.getWikipediaXmlFileName());
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void disambiguationPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void categoryPage(String str, String str2, int i) {
        try {
            Document document = new Document();
            document.add(new Field("page", str2.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED));
            document.add(new Field("xml", str.getBytes(), Field.Store.YES));
            synchronized (this) {
                this.categoryWriter.addDocument(document);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void redirectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void portalPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void projectPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void filePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void templatePage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExtractor
    public void contentPage(String str, String str2, int i) {
    }

    @Override // org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaExtractor, org.fbk.cit.hlt.thewikimachine.xmldump.AbstractWikipediaXmlDumpParser
    public void endProcess() {
        super.endProcess();
        try {
            this.categoryWriter.optimize();
            this.categoryWriter.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] strArr) {
        PosixParser posixParser = new PosixParser();
        Options options = new Options();
        OptionBuilder.withLongOpt("in-wiki");
        OptionBuilder.withDescription("Input XML file");
        OptionBuilder.isRequired();
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("filename");
        options.addOption(OptionBuilder.create(AbstractBottomUpParser.INCOMPLETE));
        OptionBuilder.withLongOpt("output");
        OptionBuilder.withDescription("Output base folder");
        OptionBuilder.isRequired();
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("folder");
        options.addOption(OptionBuilder.create("o"));
        OptionBuilder.withLongOpt("threads");
        OptionBuilder.withDescription("Number of threads");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName(StatisticsIndexer.TRAFFIC_FIELD_NAME);
        options.addOption(OptionBuilder.create("t"));
        options.addOption("h", "help", false, "Print this message");
        CommandLine commandLine = null;
        try {
            commandLine = posixParser.parse(options, strArr);
        } catch (ParseException e) {
            System.out.println();
            if (e.getMessage().length() > 0) {
                System.out.println("ERR: " + e.getMessage());
                System.out.println();
            }
            new HelpFormatter().printHelp(400, "java -mx4g " + Thread.currentThread().getStackTrace()[1].getClassName(), "\n", options, "\n", true);
            System.out.println();
            System.exit(0);
        }
        if (commandLine.hasOption("help")) {
            throw new ParseException("");
        }
        String optionValue = commandLine.getOptionValue("in-wiki");
        String optionValue2 = commandLine.getOptionValue("output");
        Integer num = 1;
        if (commandLine.hasOption('t')) {
            num = Integer.valueOf(Integer.parseInt(commandLine.getOptionValue('t')));
        }
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        ExtractorParameters extractorParameters = new ExtractorParameters(optionValue, optionValue2, true);
        new WikipediaCategoryContentExtractor(num.intValue(), Integer.MAX_VALUE, extractorParameters.getLocale()).start(extractorParameters);
    }
}
