package org.fbk.cit.hlt.thewikimachine.index;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.text.DecimalFormat;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.fbk.cit.hlt.core.math.Node;
import org.fbk.cit.hlt.thewikimachine.index.util.AbstractSearcher;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/index/PageIncomingOutgoingWeightedSearcher.class */
public class PageIncomingOutgoingWeightedSearcher extends AbstractSearcher {
    public static final int DEFAULT_MIN_FREQ = 1000;
    public static final boolean DEFAULT_THREAD_SAFE = false;
    public static final int BOW_INDEX = 1;
    public static final int LS_INDEX = 0;
    protected boolean threadSafe;
    private Map<String, Node[]> cache;
    private Term keyTerm;
    static Logger logger = Logger.getLogger(PageIncomingOutgoingWeightedSearcher.class.getName());
    protected static DecimalFormat df = new DecimalFormat("###,###,###,###");
    protected static DecimalFormat vf = new DecimalFormat("###,###,###,##0.000");
    private static DecimalFormat tf = new DecimalFormat("000,000,000.#");
    private static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);

    public PageIncomingOutgoingWeightedSearcher(String str) throws IOException {
        this(str, false);
    }

    public PageIncomingOutgoingWeightedSearcher(String str, boolean z) throws IOException {
        super(str);
        this.threadSafe = z;
        this.keyTerm = new Term("page", "");
        logger.debug(this.keyTerm);
        logger.trace(toString(10));
    }

    public Map<String, Node[]> getCache() {
        return this.cache;
    }

    public void loadCache(String str) throws IOException {
        loadCache(new File(str));
    }

    public void loadCache(String str, int i) throws IOException {
        loadCache(new File(str), i);
    }

    public void loadCache(File file) throws IOException {
        loadCache(file, 1000);
    }

    public void loadCache(File file, int i) throws IOException {
        logger.info("loading cache from " + file + " (freq>" + i + ")...");
        long nanoTime = System.nanoTime();
        if (this.threadSafe) {
            logger.info(getClass().getName() + "'s cache is thread safe");
            this.cache = Collections.synchronizedMap(new HashMap());
        } else {
            logger.warn(getClass().getName() + "'s cache isn't thread safe");
            this.cache = new HashMap();
        }
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        int i2 = 1;
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                break;
            }
            String[] split = tabPattern.split(readLine);
            if (split.length == 2) {
                if (Integer.parseInt(split[0]) < i) {
                    break;
                }
                TermDocs termDocs = this.indexReader.termDocs(this.keyTerm.createTerm(split[1]));
                if (termDocs.next()) {
                    this.cache.put(split[1], toNode(this.indexReader.document(termDocs.doc()).getBinaryValue(PageIncomingOutgoingWeightedIndexer.VECTOR_FIELD_NAME)));
                }
            }
            if (i2 % this.notificationPoint == 0) {
                logger.debug(i2 + " keys read (" + this.cache.size() + ") " + new Date());
            }
            i2++;
        }
        System.out.print('\n');
        lineNumberReader.close();
        logger.info(df.format(this.cache.size()) + ParsedPageLink.START_SUFFIX_PATTERN + df.format(this.indexReader.numDocs()) + ") keys cached in " + tf.format(System.nanoTime() - nanoTime) + " ns");
    }

    public static Node[] toNode(byte[] bArr) throws IOException {
        DataInputStream dataInputStream = new DataInputStream(new ByteArrayInputStream(bArr));
        int readInt = dataInputStream.readInt();
        Node[] nodeArr = new Node[readInt];
        for (int i = 0; i < readInt; i++) {
            nodeArr[i] = new Node(dataInputStream.readInt(), dataInputStream.readDouble());
        }
        return nodeArr;
    }

    public Node[] search(String str) {
        Node[] nodeArr = null;
        if (this.cache != null) {
            nodeArr = this.cache.get(str);
        }
        if (nodeArr != null) {
            return nodeArr;
        }
        try {
            TermDocs termDocs = this.indexReader.termDocs(this.keyTerm.createTerm(str));
            return termDocs.next() ? toNode(this.indexReader.document(termDocs.doc()).getBinaryValue(PageIncomingOutgoingWeightedIndexer.VECTOR_FIELD_NAME)) : new Node[0];
        } catch (Exception e) {
            return new Node[0];
        }
    }

    public void interactive() throws Exception {
        while (true) {
            System.out.println("\nPlease write a query and type <return> to continue (CTRL C to exit):");
            String str = new BufferedReader(new InputStreamReader(System.in)).readLine().toString();
            String[] split = str.split(StringTable.HORIZONTAL_TABULATION);
            if (split.length == 1) {
                logger.info(str + "\t<" + nodeToString(search(str)) + ">\t" + tf.format(System.nanoTime() - System.nanoTime()) + " ns");
            } else if (split.length == 2) {
                System.nanoTime();
                double compare = compare(split[0], split[1]);
                System.nanoTime();
                logger.info(str);
                logger.info("compare\t" + vf.format(compare));
                System.nanoTime();
                double dot = dot(split[0], split[1]);
                System.nanoTime();
                logger.info(str);
                logger.info("dot\t" + vf.format(dot));
            }
        }
    }

    public double dot(String str, String str2) {
        Node[] search = search(str);
        Node[] search2 = search(str2);
        Node.normalize(search);
        Node.normalize(search2);
        return Node.dot(search, search2);
    }

    public double compare(String str, String str2) {
        Node[] search = search(str);
        Node[] search2 = search(str2);
        return Node.dot(search, search2) / Math.sqrt(Node.dot(search, search) * Node.dot(search2, search2));
    }

    public static String nodeToString(Node[] nodeArr) {
        StringBuilder sb = new StringBuilder();
        if (nodeArr.length > 0) {
            sb.append(nodeArr[0].index + ":" + nodeArr[0].value);
        }
        for (int i = 1; i < nodeArr.length; i++) {
            sb.append(" " + nodeArr[i].index + ":" + nodeArr[i].value);
        }
        return sb.toString();
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        Options options = new Options();
        try {
            OptionBuilder.withArgName("index");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("open an index with the specified name");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("index");
            Option create = OptionBuilder.create(AbstractBottomUpParser.INCOMPLETE);
            OptionBuilder.withArgName("interactive-mode");
            OptionBuilder.withDescription("enter in the interactive mode");
            OptionBuilder.withLongOpt("interactive-mode");
            Option create2 = OptionBuilder.create("t");
            OptionBuilder.withArgName("search");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("search for the specified key");
            OptionBuilder.withLongOpt("search");
            Option create3 = OptionBuilder.create("s");
            OptionBuilder.withArgName("key-freq");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("read the keys' frequencies from the specified file");
            OptionBuilder.withLongOpt("key-freq");
            Option create4 = OptionBuilder.create("f");
            OptionBuilder.withArgName("minimum-freq");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("minimum key frequency of cached values (default is 1000)");
            OptionBuilder.withLongOpt("minimum-freq");
            Option create5 = OptionBuilder.create("m");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("receive notification every n pages (default is 10000)");
            OptionBuilder.withLongOpt("notification-point");
            Option create6 = OptionBuilder.create("b");
            options.addOption("h", "help", false, "print this message");
            options.addOption("v", "version", false, "output version information and exit");
            options.addOption(create);
            options.addOption(create2);
            options.addOption(create3);
            options.addOption(create4);
            options.addOption(create5);
            options.addOption(create6);
            CommandLine parse = new PosixParser().parse(options, strArr);
            if (parse.hasOption("help") || parse.hasOption("version")) {
                throw new ParseException("");
            }
            int i = 1000;
            if (parse.hasOption("minimum-freq")) {
                i = Integer.parseInt(parse.getOptionValue("minimum-freq"));
            }
            int i2 = 10000;
            if (parse.hasOption("notification-point")) {
                i2 = Integer.parseInt(parse.getOptionValue("notification-point"));
            }
            PageIncomingOutgoingWeightedSearcher pageIncomingOutgoingWeightedSearcher = new PageIncomingOutgoingWeightedSearcher(parse.getOptionValue("index"));
            pageIncomingOutgoingWeightedSearcher.setNotificationPoint(i2);
            if (parse.hasOption("key-freq")) {
                pageIncomingOutgoingWeightedSearcher.loadCache(parse.getOptionValue("key-freq"), i);
            }
            if (parse.hasOption("search")) {
                logger.debug("searching " + parse.getOptionValue("search") + "...");
                logger.info(pageIncomingOutgoingWeightedSearcher.search(parse.getOptionValue("search")));
            }
            if (parse.hasOption("interactive-mode")) {
                pageIncomingOutgoingWeightedSearcher.interactive();
            }
        } catch (ParseException e) {
            if (e.getMessage().length() > 0) {
                System.out.println("Parsing failed: " + e.getMessage() + "\n");
            }
            new HelpFormatter().printHelp(400, "java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.index.PageIncomingOutgoingWeightedSearcher", "\n", options, "\n", true);
        }
    }
}
