package org.fbk.cit.hlt.thewikimachine.index;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.HashSet;
import java.util.Properties;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.fbk.cit.hlt.core.math.Node;
import org.fbk.cit.hlt.core.util.HashIndexSet;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.wikipedia.WikipediaCategory;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/index/CategoryPageExtractor.class */
public class CategoryPageExtractor {
    static Logger logger = Logger.getLogger(CategoryPageExtractor.class.getName());
    public static final int DEFAULT_MIN_FREQUENCY = 0;
    public static final int DEFAULT_MAX_FREQUENCY = Integer.MAX_VALUE;
    CategorySubCategorySearcher categorySubCategorySearcher;
    CategoryPageSearcher categoryPageSearcher;
    PageVectorSearcher pageVectorSearcher;
    PageFreqSearcher pageFreqSearcher;
    private int maxDepth;
    private int minFrequency;
    private int maxFrequency;
    DecimalFormat nf = new DecimalFormat("###,###,###");

    public CategoryPageExtractor(String str, String str2, String str3, int i, int i2, int i3) throws IOException {
        this.maxDepth = i;
        this.minFrequency = i2;
        this.maxFrequency = i3;
        this.categorySubCategorySearcher = new CategorySubCategorySearcher(str);
        this.categoryPageSearcher = new CategoryPageSearcher(str2);
        this.pageFreqSearcher = new PageFreqSearcher(str3);
        this.pageFreqSearcher.loadCache(10);
    }

    private void process(String str, String str2) throws IOException {
        String str3 = str2 + "-depth-" + this.maxDepth;
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3 + ".bow"), "UTF-8")));
        PrintWriter printWriter2 = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3 + ".lsa"), "UTF-8")));
        PrintWriter printWriter3 = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3 + ".txt"), "UTF-8")));
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
        int i = 0;
        HashIndexSet hashIndexSet = new HashIndexSet();
        int i2 = 0;
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                lineNumberReader.close();
                printWriter.close();
                printWriter2.close();
                printWriter3.close();
                return;
            }
            logger.debug(readLine);
            String[] split = readLine.split(StringTable.HORIZONTAL_TABULATION);
            hashIndexSet.add(split[1]);
            int index = hashIndexSet.getIndex(split[1]);
            HashSet<WikipediaCategory> hashSet = new HashSet();
            this.categorySubCategorySearcher.search(split[0], hashSet, this.maxDepth);
            int i3 = 0;
            for (WikipediaCategory wikipediaCategory : hashSet) {
                String[] search = this.categoryPageSearcher.search(wikipediaCategory.getLabel());
                for (int i4 = 0; i4 < search.length; i4++) {
                    int i5 = i;
                    i++;
                    logger.info(i2 + StringTable.HORIZONTAL_TABULATION + i3 + "/" + i4 + StringTable.HORIZONTAL_TABULATION + wikipediaCategory + StringTable.HORIZONTAL_TABULATION + search[i4] + StringTable.HORIZONTAL_TABULATION + i5);
                    Node[][] search2 = this.pageVectorSearcher.search(search[i4]);
                    printWriter.println(index + " " + Node.toString(search2[1]));
                    printWriter2.println(index + " " + Node.toString(search2[0]));
                    printWriter3.println(readLine + StringTable.HORIZONTAL_TABULATION + index + StringTable.HORIZONTAL_TABULATION + search[i4]);
                }
                i3++;
            }
            i2++;
        }
    }

    public void interactive() throws Exception {
        while (true) {
            System.out.println("\nPlease write a key and type <return> to continue (CTRL C to exit):");
            String str = new BufferedReader(new InputStreamReader(System.in)).readLine().toString();
            int i = 0;
            int i2 = 0;
            HashSet<WikipediaCategory> hashSet = new HashSet();
            this.categorySubCategorySearcher.search(str, hashSet, this.maxDepth);
            int i3 = 0;
            long currentTimeMillis = System.currentTimeMillis();
            for (WikipediaCategory wikipediaCategory : hashSet) {
                String[] search = this.categoryPageSearcher.search(wikipediaCategory.getLabel());
                for (int i4 = 0; i4 < search.length; i4++) {
                    int search2 = this.pageFreqSearcher.search(search[i4]);
                    if (search2 >= this.minFrequency && search2 < this.maxFrequency) {
                        int i5 = i;
                        i++;
                        logger.debug(i3 + StringTable.HORIZONTAL_TABULATION + i4 + StringTable.HORIZONTAL_TABULATION + wikipediaCategory + StringTable.HORIZONTAL_TABULATION + search[i4] + StringTable.HORIZONTAL_TABULATION + search2 + StringTable.HORIZONTAL_TABULATION + i5);
                    }
                    i2++;
                }
                i3++;
            }
            long currentTimeMillis2 = System.currentTimeMillis() - currentTimeMillis;
            logger.info(this.nf.format(i) + ParsedPageLink.START_SUFFIX_PATTERN + this.nf.format(i2) + ") pages found in " + this.nf.format(currentTimeMillis2) + " ms");
            logger.info(this.nf.format(i3) + " categories found in " + this.nf.format(currentTimeMillis2) + " ms");
        }
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        Options options = new Options();
        try {
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("open the category/subcategory index with the specified name");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("category-index");
            options.addOption(OptionBuilder.create(AbstractBottomUpParser.COMPLETE));
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("open the category/page index with the specified name");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("page-index");
            options.addOption(OptionBuilder.create("p"));
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("open the page/frequency index with the specified name");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("frequency-index");
            options.addOption(OptionBuilder.create("f"));
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("max depth");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("max-depth");
            options.addOption(OptionBuilder.create("d"));
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("max page frequency (default is 2147483647)");
            OptionBuilder.withLongOpt("max-frequency");
            options.addOption(OptionBuilder.create("M"));
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("min page frequency (default is 0)");
            OptionBuilder.withLongOpt("min-frequency");
            options.addOption(OptionBuilder.create("m"));
            OptionBuilder.withArgName("interactive-mode");
            OptionBuilder.withDescription("enter in the interactive mode");
            OptionBuilder.withLongOpt("interactive-mode");
            options.addOption(OptionBuilder.create("t"));
            OptionBuilder.withArgName("search");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("search for the specified key");
            OptionBuilder.withLongOpt("search");
            options.addOption(OptionBuilder.create("s"));
            OptionBuilder.withDescription("trace mode");
            OptionBuilder.withLongOpt("trace");
            options.addOption(OptionBuilder.create());
            OptionBuilder.withDescription("debug mode");
            OptionBuilder.withLongOpt("debug");
            options.addOption(OptionBuilder.create());
            OptionBuilder.withDescription("info mode");
            OptionBuilder.withLongOpt("info");
            options.addOption(OptionBuilder.create());
            options.addOption("h", "help", false, "print this message");
            options.addOption("v", "version", false, "output version information and exit");
            Properties properties = new Properties();
            try {
                properties.load(new InputStreamReader(new FileInputStream(property), "UTF-8"));
            } catch (Exception e) {
                properties.setProperty("log4j.appender.stdout", "org.apache.log4j.ConsoleAppender");
                properties.setProperty("log4j.appender.stdout.layout.ConversionPattern", "[%t] %-5p (%F:%L) - %m %n");
                properties.setProperty("log4j.appender.stdout.layout", "org.apache.log4j.PatternLayout");
                properties.setProperty("log4j.appender.stdout.Encoding", "UTF-8");
            }
            CommandLine parse = new PosixParser().parse(options, strArr);
            if (parse.hasOption("trace")) {
                properties.setProperty("log4j.rootLogger", "trace,stdout");
            } else if (parse.hasOption("debug")) {
                properties.setProperty("log4j.rootLogger", "debug,stdout");
            } else if (parse.hasOption("info")) {
                properties.setProperty("log4j.rootLogger", "info,stdout");
            } else if (properties.getProperty("log4j.rootLogger") == null) {
                properties.setProperty("log4j.rootLogger", "info,stdout");
            }
            PropertyConfigurator.configure(properties);
            if (parse.hasOption("help") || parse.hasOption("version")) {
                throw new ParseException("");
            }
            if (parse.hasOption("notification-point")) {
                Integer.parseInt(parse.getOptionValue("notification-point"));
            }
            int parseInt = Integer.parseInt(parse.getOptionValue("max-depth"));
            int i = Integer.MAX_VALUE;
            if (parse.hasOption("max-frequency")) {
                i = Integer.parseInt(parse.getOptionValue("max-frequency"));
            }
            int i2 = 0;
            if (parse.hasOption("min-frequency")) {
                i2 = Integer.parseInt(parse.getOptionValue("min-frequency"));
            }
            CategoryPageExtractor categoryPageExtractor = new CategoryPageExtractor(parse.getOptionValue("category-index"), parse.getOptionValue("page-index"), parse.getOptionValue("frequency-index"), parseInt, i2, i);
            if (parse.hasOption("interactive-mode")) {
                categoryPageExtractor.interactive();
            }
        } catch (ParseException e2) {
            if (e2.getMessage().length() > 0) {
                System.out.println("Parsing failed: " + e2.getMessage() + "\n");
            }
            new HelpFormatter().printHelp(400, "java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.index.CategoryPageExtractor", "\n", options, "\n", true);
        }
    }
}
