package org.fbk.cit.hlt.thewikimachine.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.fbk.cit.hlt.core.lsa.BOW;
import org.fbk.cit.hlt.core.lsa.LSM;
import org.fbk.cit.hlt.core.math.Node;
import org.fbk.cit.hlt.core.math.Vector;
import org.fbk.cit.hlt.thewikimachine.analysis.HardTokenizer;
import org.fbk.cit.hlt.thewikimachine.analysis.Tokenizer;
import org.fbk.cit.hlt.thewikimachine.index.PageSeeAlsoSearcher;
import org.fbk.cit.hlt.thewikimachine.index.PageVectorSearcher;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.PageMap;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/util/PageCategorization.class */
public class PageCategorization {
    private LSM lsm;
    private PageVectorSearcher pageVectorSearcher;
    private PageSeeAlsoSearcher pageSeeAlsoSearcher;
    private PageMap redirectPageMap;
    Tokenizer tokenizer = HardTokenizer.getInstance();
    Map<String, Entry> pageCategoryMap;
    static Logger logger = Logger.getLogger(PageCategorization.class.getName());
    protected static DecimalFormat df = new DecimalFormat("###,###,##0.000");

    /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/util/PageCategorization$Category.class */
    public class Category implements Comparable<Category> {
        private double bow;
        private double ls;
        private double combo;
        private String category;

        Category(String str, double d, double d2) {
            this.category = str;
            this.bow = d;
            this.ls = d2;
            this.combo = (d + d2) / 2.0d;
        }

        public String getLabel() {
            return this.category;
        }

        public double getCombo() {
            return this.combo;
        }

        public double getBow() {
            return this.bow;
        }

        public double getLs() {
            return this.ls;
        }

        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj instanceof Category) {
                return ((Category) obj).equals(this.category);
            }
            return false;
        }

        public int hashCode() {
            return this.category.hashCode();
        }

        @Override // java.lang.Comparable
        public int compareTo(Category category) {
            double combo = this.combo - category.getCombo();
            if (combo > 0.0d) {
                return 1;
            }
            return combo < 0.0d ? -1 : 0;
        }

        public String toString() {
            return PageCategorization.df.format(this.bow) + StringTable.HORIZONTAL_TABULATION + PageCategorization.df.format(this.ls) + StringTable.HORIZONTAL_TABULATION + PageCategorization.df.format(this.combo) + StringTable.HORIZONTAL_TABULATION + this.category;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/util/PageCategorization$Entry.class */
    public class Entry {
        private String page;
        private String category;
        private Node[][] nodes;

        Entry(String str, String str2, Node[][] nodeArr) {
            this.page = str;
            this.category = str2;
            this.nodes = nodeArr;
        }

        String getCategory() {
            return this.category;
        }

        Node[][] getNodes() {
            return this.nodes;
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/util/PageCategorization$Voting.class */
    public class Voting {
        protected Map<String, Weight> map = new TreeMap();
        protected int total;

        /* JADX INFO: Access modifiers changed from: package-private */
        /* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/util/PageCategorization$Voting$Weight.class */
        public class Weight {
            double weight;

            public Weight(double d) {
                this.weight = d;
            }

            public void inc() {
                this.weight += 1.0d;
            }

            public void inc(double d) {
                this.weight += d;
            }

            public double get() {
                return this.weight;
            }

            public String toString() {
                return Double.toString(this.weight);
            }
        }

        public Voting() {
        }

        public void add(String str) {
            add(str, 1.0d);
        }

        public void add(String str, double d) {
            this.total = (int) (this.total + d);
            Weight weight = this.map.get(str);
            if (weight == null) {
                this.map.put(str, new Weight(d));
            } else {
                weight.inc(d);
            }
        }

        public SortedMap<Double, List<String>> toSortedMap() {
            TreeMap treeMap = new TreeMap(new Comparator<Double>() { // from class: org.fbk.cit.hlt.thewikimachine.util.PageCategorization.Voting.1
                @Override // java.util.Comparator
                public int compare(Double d, Double d2) {
                    return d2.compareTo(d);
                }
            });
            for (String str : this.map.keySet()) {
                Weight weight = this.map.get(str);
                List list = (List) treeMap.get(Double.valueOf(weight.get()));
                if (list == null) {
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(str);
                    treeMap.put(Double.valueOf(weight.get()), arrayList);
                } else {
                    list.add(str);
                }
            }
            return treeMap;
        }

        public String toString() {
            return this.map.toString();
        }
    }

    public PageCategorization(LSM lsm, PageVectorSearcher pageVectorSearcher, File file, PageMap pageMap, PageSeeAlsoSearcher pageSeeAlsoSearcher) {
        this.lsm = lsm;
        this.pageVectorSearcher = pageVectorSearcher;
        this.pageSeeAlsoSearcher = pageSeeAlsoSearcher;
        this.redirectPageMap = pageMap;
        try {
            init(file);
        } catch (IOException e) {
            logger.error(e);
        }
    }

    public void init(File file) throws IOException {
        String str;
        String str2;
        this.pageCategoryMap = new HashMap();
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                return;
            }
            if (readLine.length() > 0 && !readLine.startsWith("#")) {
                String[] split = readLine.split("=");
                if (split.length == 2) {
                    str = split[0];
                    str2 = split[1];
                } else {
                    str = split[0];
                    str2 = split[0];
                }
                logger.debug(str + StringTable.HORIZONTAL_TABULATION + str2);
                Node[][] search = this.pageVectorSearcher.search(str);
                if (search != null) {
                    this.pageCategoryMap.put(str, new Entry(str, str2, search));
                    String[] search2 = this.pageSeeAlsoSearcher.search(str);
                    for (int i = 0; i < search2.length; i++) {
                        logger.debug(i + StringTable.HORIZONTAL_TABULATION + search2[i] + StringTable.HORIZONTAL_TABULATION + str2);
                        Node[][] search3 = this.pageVectorSearcher.search(search2[i]);
                        if (search3 != null) {
                            new Entry(search2[i], str2, search3);
                        }
                    }
                }
            }
        }
    }

    public Category[] classify(String str) {
        Vector mapDocument = this.lsm.mapDocument(new BOW(this.tokenizer.tokenizedString(str).toLowerCase()));
        Vector mapPseudoDocument = this.lsm.mapPseudoDocument(mapDocument);
        mapDocument.normalize();
        mapPseudoDocument.normalize();
        Node[] nodeArray = mapDocument.toNodeArray();
        Node[] nodeArray2 = mapPseudoDocument.toNodeArray();
        Category[] categoryArr = new Category[this.pageCategoryMap.size()];
        Iterator<String> it = this.pageCategoryMap.keySet().iterator();
        int i = 0;
        while (it.hasNext()) {
            Entry entry = this.pageCategoryMap.get(it.next());
            Node[][] nodes = entry.getNodes();
            Node[] nodeArr = nodes[1];
            Node[] nodeArr2 = nodes[0];
            double dot = Node.dot(nodeArray, nodeArr);
            double dot2 = Node.dot(nodeArray2, nodeArr2);
            double d = (dot + dot2) / 2.0d;
            categoryArr[i] = new Category(entry.getCategory(), dot, dot2);
            i++;
        }
        Arrays.sort(categoryArr);
        return categoryArr;
    }

    public void interactive() throws Exception {
        while (true) {
            System.out.println("\nPlease write a query and type <return> to continue (CTRL C to exit):");
            String str = new BufferedReader(new InputStreamReader(System.in)).readLine().toString();
            File file = new File(str);
            Category[] classify = file.exists() ? classify(readFile(file)) : classify(str);
            if (classify != null) {
                int length = classify.length - 10;
                if (length < 0) {
                    length = 0;
                }
                for (int i = length; i < classify.length; i++) {
                    logger.debug(i + StringTable.HORIZONTAL_TABULATION + classify[i]);
                }
            }
        }
    }

    private String readFile(File file) throws IOException {
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        StringBuilder sb = new StringBuilder();
        int i = 0;
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null || i >= 2) {
                break;
            }
            sb.append(readLine);
            sb.append("\n");
            i++;
        }
        logger.debug(i + " lines read");
        return sb.toString();
    }

    public void test(File file) throws IOException {
        double d = 0.0d;
        int[] iArr = new int[3];
        String[] strArr = {"best", "weighted vote", "vote"};
        int i = 0;
        File[] listFiles = file.listFiles();
        for (int i2 = 0; i2 < listFiles.length; i2++) {
            logger.info(i2 + StringTable.HORIZONTAL_TABULATION + listFiles[i2]);
            File[] listFiles2 = listFiles[i2].listFiles();
            for (int i3 = 0; i3 < listFiles2.length; i3++) {
                d += 1.0d;
                logger.debug(d + StringTable.HORIZONTAL_TABULATION + i3 + StringTable.HORIZONTAL_TABULATION + listFiles2[i3]);
                Category[] classify = classify(readFile(listFiles2[i3]));
                if (classify != null) {
                    int length = classify.length - 10;
                    if (length < 0) {
                        length = 0;
                    }
                    Voting voting = new Voting();
                    Voting voting2 = new Voting();
                    for (int i4 = length; i4 < classify.length; i4++) {
                        voting.add(classify[i4].getLabel());
                        voting2.add(classify[i4].getLabel(), classify[i4].getCombo());
                        logger.debug("[" + i4 + StringTable.HORIZONTAL_TABULATION + classify[i4] + DefaultExpressionEngine.DEFAULT_ATTRIBUTE_END);
                    }
                    SortedMap<Double, List<String>> sortedMap = voting2.toSortedMap();
                    logger.info(sortedMap);
                    SortedMap<Double, List<String>> sortedMap2 = voting2.toSortedMap();
                    logger.info(sortedMap2);
                    if (classify[classify.length - 1].getLabel().equalsIgnoreCase(listFiles[i2].getName())) {
                        logger.debug("1\t" + classify[classify.length - 1].getLabel() + " == " + listFiles[i2].getName());
                        iArr[0] = iArr[0] + 1;
                    } else {
                        logger.debug("0\t" + classify[classify.length - 1].getLabel() + " != " + listFiles[i2].getName());
                    }
                    if (sortedMap.get(sortedMap.firstKey()).get(0).equalsIgnoreCase(listFiles[i2].getName())) {
                        logger.debug("1\t" + sortedMap.get(sortedMap.firstKey()).get(0) + " == " + listFiles[i2].getName());
                        iArr[1] = iArr[1] + 1;
                    } else {
                        logger.debug("0\t" + sortedMap.get(sortedMap.firstKey()).get(0) + " != " + listFiles[i2].getName());
                    }
                    if (sortedMap2.get(sortedMap2.firstKey()).get(0).equalsIgnoreCase(listFiles[i2].getName())) {
                        logger.debug("1\t" + sortedMap2.get(sortedMap2.firstKey()).get(0) + " == " + listFiles[i2].getName());
                        iArr[2] = iArr[2] + 1;
                    } else {
                        logger.debug("0\t" + sortedMap2.get(sortedMap2.firstKey()).get(0) + " != " + listFiles[i2].getName());
                    }
                }
                i++;
            }
        }
        double[] dArr = new double[3];
        for (int i5 = 0; i5 < dArr.length; i5++) {
            dArr[i5] = iArr[i5] / i;
            logger.info(df.format(dArr[i5]) + " = " + iArr[i5] + " / " + i + StringTable.HORIZONTAL_TABULATION + strArr[i5]);
        }
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        Options options = new Options();
        try {
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("open a page vector index with the specified name");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("vector-index");
            Option create = OptionBuilder.create();
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("open a page see also index with the specified name");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("see-also-index");
            Option create2 = OptionBuilder.create();
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("dir from which to read the test set");
            OptionBuilder.withLongOpt("test-dir");
            Option create3 = OptionBuilder.create();
            OptionBuilder.withArgName("file");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("redirect pages file");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("redirect");
            Option create4 = OptionBuilder.create(PrincetonRandomAccessDictionaryFile.READ_ONLY);
            OptionBuilder.withDescription("enter in the interactive mode");
            OptionBuilder.withLongOpt("interactive-mode");
            Option create5 = OptionBuilder.create("t");
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("lsm dir");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("lsm");
            Option create6 = OptionBuilder.create("l");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("lsm dim");
            OptionBuilder.withLongOpt("dim");
            Option create7 = OptionBuilder.create("d");
            OptionBuilder.withArgName("file");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("category file");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("categories");
            Option create8 = OptionBuilder.create(AbstractBottomUpParser.COMPLETE);
            OptionBuilder.withDescription("normalize vectors (default is false)");
            OptionBuilder.withLongOpt("normalized");
            Option create9 = OptionBuilder.create();
            options.addOption("h", "help", false, "print this message");
            options.addOption("v", "version", false, "output version information and exit");
            options.addOption(create);
            options.addOption(create4);
            options.addOption(create5);
            options.addOption(create8);
            options.addOption(create2);
            options.addOption(create6);
            options.addOption(create7);
            options.addOption(create9);
            options.addOption(create3);
            CommandLine parse = new PosixParser().parse(options, strArr);
            if (parse.hasOption("help") || parse.hasOption("version")) {
                throw new ParseException("");
            }
            int i = 1000;
            if (parse.hasOption("minimum-freq")) {
                i = Integer.parseInt(parse.getOptionValue("minimum-freq"));
            }
            int i2 = 10000;
            if (parse.hasOption("notification-point")) {
                i2 = Integer.parseInt(parse.getOptionValue("notification-point"));
            }
            String optionValue = parse.getOptionValue("lsm");
            if (!optionValue.endsWith(File.separator)) {
                optionValue = optionValue + File.separator;
            }
            boolean z = false;
            if (parse.hasOption("normalized")) {
                z = true;
            }
            File file = new File(optionValue + "X-Ut");
            File file2 = new File(optionValue + "X-S");
            File file3 = new File(optionValue + "X-row");
            File file4 = new File(optionValue + "X-col");
            File file5 = new File(optionValue + "X-df");
            int i3 = 100;
            if (parse.hasOption("dim")) {
                i3 = Integer.parseInt(parse.getOptionValue("dim"));
            }
            File file6 = null;
            if (parse.hasOption("categories")) {
                file6 = new File(parse.getOptionValue("categories"));
            }
            logger.debug(parse.getOptionValue("lsm") + StringTable.HORIZONTAL_TABULATION + parse.getOptionValue("dim"));
            LSM lsm = new LSM(file, file2, file3, file4, file5, i3, true, z);
            PageVectorSearcher pageVectorSearcher = new PageVectorSearcher(parse.getOptionValue("vector-index"));
            pageVectorSearcher.setNotificationPoint(i2);
            if (parse.hasOption("key-freq")) {
                pageVectorSearcher.loadCache(parse.getOptionValue("key-freq"), i);
            }
            PageCategorization pageCategorization = new PageCategorization(lsm, pageVectorSearcher, file6, new PageMap(new File(parse.getOptionValue("redirect"))), new PageSeeAlsoSearcher(parse.getOptionValue("see-also-index")));
            if (parse.hasOption("test-dir")) {
                pageCategorization.test(new File(parse.getOptionValue("test-dir")));
            }
            if (parse.hasOption("interactive-mode")) {
                pageCategorization.interactive();
            }
        } catch (ParseException e) {
            if (e.getMessage().length() > 0) {
                System.out.println("Parsing failed: " + e.getMessage() + "\n");
            }
            new HelpFormatter().printHelp(400, "java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.util.PageCategorization", "\n", options, "\n", true);
        }
    }
}
