package org.fbk.cit.hlt.thewikimachine.similarity;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.fbk.cit.hlt.core.math.Node;
import org.fbk.cit.hlt.thewikimachine.index.PageVectorSearcher;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.tukaani.xz.common.Util;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/similarity/LSSimilarityExtractor.class */
public class LSSimilarityExtractor {
    public static final int DEFAULT_THREADS_NUMBER = 1;
    public static final int DEFAULT_QUEUE_SIZE = 10000;
    public static final int DEFAULT_NOTIFICATION_POINT = 10000;
    private int notificationPoint;
    private int numPages;
    public static final int DEFAULT_NUM_PAGES = Integer.MAX_VALUE;
    private ExecutorService myExecutor;
    public static final int DEFAULT_MIN_FREQ = 10000;
    public static final int DEFAULT_OUTPUT_NUM_PAGES = 100;
    Map<String, Node[][]> cache;
    PageVectorSearcher pageVectorSearcher;
    PrintWriter similarityWriter;
    int outputNumPages;
    AtomicInteger pageCount;
    static Logger logger = Logger.getLogger(LSSimilarityExtractor.class.getName());
    private static DecimalFormat df = new DecimalFormat("###,###,###,###");
    private static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/similarity/LSSimilarityExtractor$PageSimilarity.class */
    public class PageSimilarity implements Runnable {
        String page;

        PageSimilarity(String str) {
            this.page = str;
        }

        @Override // java.lang.Runnable
        public void run() {
            TreeMap treeMap = new TreeMap(new Comparator<Double>() { // from class: org.fbk.cit.hlt.thewikimachine.similarity.LSSimilarityExtractor.PageSimilarity.1
                @Override // java.util.Comparator
                public int compare(Double d, Double d2) {
                    return d2.compareTo(d);
                }
            });
            int i = 0;
            for (String str : LSSimilarityExtractor.this.cache.keySet()) {
                double compare = LSSimilarityExtractor.this.pageVectorSearcher.compare(this.page, str);
                List list = (List) treeMap.get(Double.valueOf(compare));
                if (list == null) {
                    list = new ArrayList();
                    treeMap.put(Double.valueOf(compare), list);
                }
                list.add(str);
                i++;
            }
            LSSimilarityExtractor.this.write(this.page, treeMap);
        }
    }

    public LSSimilarityExtractor(int i, int i2, PageVectorSearcher pageVectorSearcher, String str) throws IOException {
        this(i, i2, pageVectorSearcher, new File(str));
    }

    public LSSimilarityExtractor(int i, int i2, PageVectorSearcher pageVectorSearcher, File file) throws IOException {
        this.numPages = i2;
        this.pageVectorSearcher = pageVectorSearcher;
        this.notificationPoint = 10000;
        this.outputNumPages = 100;
        this.cache = pageVectorSearcher.getCache();
        this.pageCount = new AtomicInteger(0);
        this.similarityWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")));
        this.myExecutor = new ThreadPoolExecutor(i, i, 1L, TimeUnit.MINUTES, new ArrayBlockingQueue(10000), new ThreadPoolExecutor.CallerRunsPolicy());
    }

    public void start() {
        long currentTimeMillis = System.currentTimeMillis();
        logger.info("pages\ttime\tdate");
        int i = 1;
        for (String str : this.cache.keySet()) {
            if (i > this.numPages) {
                break;
            }
            this.myExecutor.execute(new PageSimilarity(str));
            if (i % this.notificationPoint == 0) {
                logger.info(df.format(i) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
                currentTimeMillis = System.currentTimeMillis();
            }
            i++;
        }
        logger.info(df.format(i) + StringTable.HORIZONTAL_TABULATION + df.format(System.currentTimeMillis() - currentTimeMillis) + StringTable.HORIZONTAL_TABULATION + new Date());
        boolean z = true;
        try {
            this.myExecutor.shutdown();
            logger.debug("waiting to end " + new Date() + "...");
            z = this.myExecutor.awaitTermination(Util.VLI_MAX, TimeUnit.NANOSECONDS);
        } catch (InterruptedException e) {
            logger.error(e);
        }
        this.similarityWriter.close();
        logger.info("ending process " + z + " " + new Date() + "...");
    }

    /* JADX INFO: Access modifiers changed from: private */
    public void write(String str, Map<Double, List<String>> map) {
        StringBuilder sb = new StringBuilder();
        Iterator<Double> it = map.keySet().iterator();
        int i = 0;
        int i2 = 0;
        while (it.hasNext()) {
            double doubleValue = it.next().doubleValue();
            List<String> list = map.get(Double.valueOf(doubleValue));
            for (int i3 = 0; i3 < list.size(); i3++) {
                if (i > this.outputNumPages) {
                    write(sb.toString());
                    return;
                }
                sb.append(str);
                sb.append('\t');
                sb.append(doubleValue);
                sb.append('\t');
                sb.append(list.get(i3));
                sb.append('\n');
                i++;
            }
            i2++;
        }
        write(sb.toString());
    }

    private void write(String str) {
        this.pageCount.incrementAndGet();
        synchronized (this) {
            this.similarityWriter.print(str);
        }
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        Options options = new Options();
        try {
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("open an index with the specified name");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("index");
            Option create = OptionBuilder.create(AbstractBottomUpParser.INCOMPLETE);
            OptionBuilder.withArgName("file");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("read the keys' frequencies from the specified file");
            OptionBuilder.withLongOpt("key-freq");
            Option create2 = OptionBuilder.create("f");
            OptionBuilder.withArgName("file");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("read the keys' frequencies from the specified file");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("similarity-file");
            Option create3 = OptionBuilder.create("s");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("minimum key frequency of cached values (default is 10000)");
            OptionBuilder.withLongOpt("minimum-freq");
            Option create4 = OptionBuilder.create("m");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("receive notification every n pages (default is 10000)");
            OptionBuilder.withLongOpt("notification-point");
            Option create5 = OptionBuilder.create("b");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("number of threads (default 1)");
            OptionBuilder.withLongOpt("num-threads");
            Option create6 = OptionBuilder.create("t");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("maximum number of pages to process (default is all)");
            OptionBuilder.withLongOpt("num-pages");
            Option create7 = OptionBuilder.create("p");
            options.addOption("h", "help", false, "print this message");
            options.addOption("v", "version", false, "output version information and exit");
            options.addOption(create);
            options.addOption(create3);
            options.addOption(create2);
            options.addOption(create4);
            options.addOption(create5);
            options.addOption(create6);
            options.addOption(create7);
            CommandLine parse = new PosixParser().parse(options, strArr);
            if (parse.hasOption("help") || parse.hasOption("version")) {
                throw new ParseException("");
            }
            int i = 1;
            if (parse.hasOption("num-threads")) {
                i = Integer.parseInt(parse.getOptionValue("num-threads"));
            }
            int i2 = 10000;
            if (parse.hasOption("minimum-freq")) {
                i2 = Integer.parseInt(parse.getOptionValue("minimum-freq"));
            }
            int i3 = Integer.MAX_VALUE;
            if (parse.hasOption("num-pages")) {
                i3 = Integer.parseInt(parse.getOptionValue("num-pages"));
            }
            int i4 = 10000;
            if (parse.hasOption("notification-point")) {
                i4 = Integer.parseInt(parse.getOptionValue("notification-point"));
            }
            PageVectorSearcher pageVectorSearcher = new PageVectorSearcher(parse.getOptionValue("index"), true);
            pageVectorSearcher.setNotificationPoint(i4);
            if (parse.hasOption("key-freq")) {
                pageVectorSearcher.loadCache(parse.getOptionValue("key-freq"), i2);
            }
            new LSSimilarityExtractor(i, i3, pageVectorSearcher, parse.getOptionValue("similarity-file")).start();
        } catch (ParseException e) {
            if (e.getMessage().length() > 0) {
                System.out.println("Parsing failed: " + e.getMessage() + "\n");
            }
            new HelpFormatter().printHelp(400, "java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.similarity.LSSimilarityExtractor", "\n", options, "\n", true);
        }
    }
}
