package org.fbk.cit.hlt.thewikimachine.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.regex.Pattern;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.ParseException;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.log4j.spi.LocationInfo;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.fbk.cit.hlt.thewikimachine.FakeExtractorParameters;
import org.fbk.cit.hlt.thewikimachine.index.PageTypeIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageTypeSearcher;
import org.fbk.cit.hlt.thewikimachine.index.QIDPageSearcher;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/util/NamNomGlobalIndex.class */
public class NamNomGlobalIndex {
    static Logger logger = Logger.getLogger(NamNomGlobalIndex.class.getName());
    static final Pattern langPattern = Pattern.compile("^\\w{2}$");

    public static void main(String[] strArr) throws Exception {
        String replace;
        PageTypeSearcher.Entry search;
        CommandLineWithLogger commandLineWithLogger = new CommandLineWithLogger();
        OptionBuilder.withDescription("Big index");
        OptionBuilder.isRequired();
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("folder");
        OptionBuilder.withLongOpt("big-index");
        commandLineWithLogger.addOption(OptionBuilder.create(AbstractBottomUpParser.INCOMPLETE));
        OptionBuilder.withDescription("Base folder");
        OptionBuilder.isRequired();
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("folder");
        OptionBuilder.withLongOpt("base-folder");
        commandLineWithLogger.addOption(OptionBuilder.create("b"));
        OptionBuilder.withDescription("Output folder");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("file");
        OptionBuilder.withLongOpt("output");
        commandLineWithLogger.addOption(OptionBuilder.create("o"));
        OptionBuilder.withDescription("Interactive mode");
        OptionBuilder.withLongOpt("interactive");
        commandLineWithLogger.addOption(OptionBuilder.create("t"));
        OptionBuilder.withDescription("NAM-NOM global folder");
        OptionBuilder.hasArg();
        OptionBuilder.withArgName("folder");
        OptionBuilder.withLongOpt("namnom-folder");
        commandLineWithLogger.addOption(OptionBuilder.create("n"));
        CommandLine commandLine = null;
        try {
            commandLine = commandLineWithLogger.getCommandLine(strArr);
            PropertyConfigurator.configure(commandLineWithLogger.getLoggerProps());
        } catch (Exception e) {
            System.exit(1);
        }
        if (!commandLine.hasOption("interactive") && !commandLine.hasOption("output")) {
            System.err.println("Parsing failed: Output folder and interactive cannot be both missing\n");
            commandLineWithLogger.printHelp();
            throw new ParseException("");
        }
        String optionValue = commandLine.getOptionValue("big-index");
        if (!optionValue.endsWith(File.separator)) {
            optionValue = optionValue + File.separator;
        }
        String optionValue2 = commandLine.getOptionValue("base-folder");
        if (!optionValue2.endsWith(File.separator)) {
            optionValue2 = optionValue2 + File.separator;
        }
        String str = null;
        if (commandLine.hasOption("output")) {
            str = commandLine.getOptionValue("output");
            if (!str.endsWith(File.separator)) {
                str = str + File.separator;
            }
        }
        String str2 = null;
        if (commandLine.hasOption("namnom-folder")) {
            str2 = commandLine.getOptionValue("namnom-folder");
            if (!str2.endsWith(File.separator)) {
                str2 = str2 + File.separator;
            }
        }
        boolean hasOption = commandLine.hasOption("interactive");
        File file = new File(optionValue2);
        ArrayList arrayList = new ArrayList();
        File[] listFiles = file.listFiles();
        if (listFiles != null) {
            for (File file2 : listFiles) {
                if (file2.isDirectory() && langPattern.matcher(file2.getName()).matches()) {
                    arrayList.add(file2.getName());
                }
            }
        }
        IndexReader open = IndexReader.open((Directory) FSDirectory.open(new File(optionValue)), true);
        if (!hasOption) {
            HashMap hashMap = new HashMap();
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                String str3 = (String) it.next();
                logger.info(String.format("Loading %s", str3));
                try {
                    hashMap.put(str3, new HashSet());
                    PageTypeSearcher pageTypeSearcher = new PageTypeSearcher(new FakeExtractorParameters(str3, optionValue2).getWikipediaTypeIndexName());
                    IndexReader indexReader = pageTypeSearcher.getIndexReader();
                    for (int i = 0; i < indexReader.maxDoc(); i++) {
                        ((HashSet) hashMap.get(str3)).add(indexReader.document(i).get("page"));
                    }
                    logger.debug(String.format("Added %d pages", Integer.valueOf(((HashSet) hashMap.get(str3)).size())));
                    pageTypeSearcher.close();
                } catch (Exception e2) {
                    logger.error(e2.getMessage());
                    hashMap.remove(str3);
                }
            }
            for (String str4 : hashMap.keySet()) {
                if (hashMap.get(str4) == null) {
                    arrayList.remove(str4);
                }
            }
            String str5 = str + new SimpleDateFormat("yyyyMMdd").format(Calendar.getInstance().getTime()) + File.separator;
            HashMap hashMap2 = new HashMap();
            hashMap2.put(QIDPageSearcher.QID_LABEL, new PageTypeIndexer(str5 + QIDPageSearcher.QID_LABEL + File.separator, true));
            Iterator it2 = arrayList.iterator();
            while (it2.hasNext()) {
                String str6 = (String) it2.next();
                hashMap2.put(str6, new PageTypeIndexer(str5 + str6 + File.separator, true));
            }
            logger.info("Starting reading");
            for (int i2 = 0; i2 < open.numDocs(); i2++) {
                Document document = open.document(i2);
                String str7 = document.get(QIDPageSearcher.QID_LABEL);
                int i3 = 0;
                int i4 = 0;
                Iterator it3 = arrayList.iterator();
                while (it3.hasNext()) {
                    String str8 = (String) it3.next();
                    String str9 = document.get(str8);
                    if (str9 != null) {
                        i4++;
                        if (((HashSet) hashMap.get(str8)).contains(str9)) {
                            i3++;
                        }
                    }
                }
                double d = i3 / i4;
                logger.trace(str7);
                logger.trace(String.format("%d / %d = %f", Integer.valueOf(i3), Integer.valueOf(i4), Double.valueOf(d)));
                if (d > 0.5d) {
                    ((PageTypeIndexer) hashMap2.get(QIDPageSearcher.QID_LABEL)).add(str7);
                }
                Iterator it4 = arrayList.iterator();
                while (it4.hasNext()) {
                    String str10 = (String) it4.next();
                    String str11 = document.get(str10);
                    if (str11 != null && d > 0.5d) {
                        ((PageTypeIndexer) hashMap2.get(str10)).add(str11);
                    }
                }
                if ((i2 + 1) % 10000 == 0) {
                    System.out.print(".");
                }
                if ((i2 + 1) % 1000000 == 0) {
                    System.out.format(" %d/%d\n", Integer.valueOf(i2 + 1), Integer.valueOf(open.numDocs()));
                }
            }
            System.out.println();
            logger.info("Closing indexes");
            ((PageTypeIndexer) hashMap2.get(QIDPageSearcher.QID_LABEL)).close();
            Iterator it5 = arrayList.iterator();
            while (it5.hasNext()) {
                ((PageTypeIndexer) hashMap2.get((String) it5.next())).close();
            }
            return;
        }
        PageTypeSearcher pageTypeSearcher2 = str2 != null ? new PageTypeSearcher(str2 + QIDPageSearcher.QID_LABEL + File.separator) : null;
        HashMap hashMap3 = new HashMap();
        Iterator it6 = arrayList.iterator();
        while (it6.hasNext()) {
            String str12 = (String) it6.next();
            try {
                hashMap3.put(str12, new PageTypeSearcher(new FakeExtractorParameters(str12, optionValue2).getWikipediaTypeIndexName()));
            } catch (Exception e3) {
                logger.error(e3.getMessage());
                hashMap3.remove(str12);
            }
        }
        for (String str13 : hashMap3.keySet()) {
            if (hashMap3.get(str13) == null) {
                arrayList.remove(str13);
            }
        }
        IndexReader open2 = IndexReader.open((Directory) FSDirectory.open(new File(optionValue)), true);
        while (true) {
            System.out.println("\nPlease write a key and type <return> to continue (<enter> without keys to exit):");
            String trim = new BufferedReader(new InputStreamReader(System.in)).readLine().toString().trim();
            if (trim.length() == 0) {
                open2.close();
                System.exit(0);
            }
            if (trim.startsWith(LocationInfo.NA)) {
                replace = trim.substring(1).trim();
            } else {
                replace = trim.replace(' ', '_');
                TermDocs termDocs = open.termDocs(new Term("en", replace));
                if (termDocs.next()) {
                    replace = open2.document(termDocs.doc()).get(QIDPageSearcher.QID_LABEL);
                } else {
                    TermDocs termDocs2 = open.termDocs(new Term("it", replace));
                    if (termDocs2.next()) {
                        replace = open2.document(termDocs2.doc()).get(QIDPageSearcher.QID_LABEL);
                    }
                }
            }
            TermDocs termDocs3 = open2.termDocs(new Term(QIDPageSearcher.QID_LABEL, replace));
            Document document2 = termDocs3.next() ? open2.document(termDocs3.doc()) : null;
            Iterator it7 = arrayList.iterator();
            while (it7.hasNext()) {
                String str14 = (String) it7.next();
                String str15 = document2.get(str14);
                if (str15 != null && (search = ((PageTypeSearcher) hashMap3.get(str14)).search(str15)) != null) {
                    logger.info(String.format("Result: [%s] %s --> %s", str14, str15, search.getType()));
                }
            }
            if (pageTypeSearcher2 != null) {
                logger.info(String.format("From index: %s", pageTypeSearcher2.search(replace).getType()));
            }
        }
    }
}
