package org.fbk.cit.hlt.core.lsa.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.util.Comparator;
import java.util.TreeMap;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.core.lsa.BOW;
import org.fbk.cit.hlt.core.lsa.LSM;
import org.fbk.cit.hlt.core.lsa.LSSimilarity;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExampleExtractor;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.SimpleNodeIterator;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/util/WebPageComparator.class */
public class WebPageComparator {
    static Logger logger = Logger.getLogger(WebPageComparator.class.getName());

    /* loaded from: input_file:org/fbk/cit/hlt/core/lsa/util/WebPageComparator$FloatComparator.class */
    public class FloatComparator implements Comparator {
        public FloatComparator() {
        }

        @Override // java.util.Comparator
        public int compare(Object obj, Object obj2) {
            float floatValue = ((Float) obj2).floatValue() - ((Float) obj).floatValue();
            if (floatValue > 0.0f) {
                return 1;
            }
            return floatValue < 0.0f ? -1 : 0;
        }

        @Override // java.util.Comparator
        public boolean equals(Object obj) {
            return true;
        }
    }

    public WebPageComparator(URL url, URL[] urlArr, LSSimilarity lSSimilarity) throws IOException {
        logger.info("parsing " + url + "...");
        BOW bow = new BOW(toText(url));
        logger.info("size bow " + bow.size());
        BOW[] bowArr = new BOW[urlArr.length];
        for (int i = 0; i < urlArr.length; i++) {
            logger.info("parsing concept " + urlArr[i]);
            bowArr[i] = new BOW(toText(urlArr[i]));
            logger.info("size concept " + i + " " + bowArr[i].size());
            logger.info(i + " = " + lSSimilarity.compare(bow, bowArr[i]));
        }
        interactive(urlArr, bowArr, lSSimilarity);
    }

    public void interactive(URL[] urlArr, BOW[] bowArr, LSSimilarity lSSimilarity) throws IOException {
        while (true) {
            System.out.println("\nPlease write a query and type <return> to continue (CTRL C to exit):");
            BOW bow = new BOW(toText(new URL(new BufferedReader(new InputStreamReader(System.in)).readLine().toString().toLowerCase())));
            logger.info("size bow " + bow.size());
            logger.info("bow " + bow);
            TreeMap treeMap = new TreeMap(new Comparator<Float>() { // from class: org.fbk.cit.hlt.core.lsa.util.WebPageComparator.1
                @Override // java.util.Comparator
                public int compare(Float f, Float f2) {
                    if (f.floatValue() == f2.floatValue()) {
                        return 0;
                    }
                    return f.floatValue() < f2.floatValue() ? -1 : 1;
                }
            });
            for (int i = 0; i < bowArr.length; i++) {
                if (bowArr[i].size() > 0) {
                    treeMap.put(Float.valueOf(lSSimilarity.compare(bow, bowArr[i])), urlArr[i].toString());
                }
            }
            PrintWriter printWriter = new PrintWriter(new FileWriter("output.html"));
            printWriter.println("<html><body><table>");
            logger.info("map size " + treeMap.size());
            int i2 = 0;
            for (Float f : treeMap.keySet()) {
                String str = (String) treeMap.get(f);
                i2++;
                printWriter.println("<tr><td>" + i2 + "</td><td>" + f + "</td><td><a href=\"" + str + "\">" + str + "</a></td></tr>");
                System.out.println(i2 + StringTable.HORIZONTAL_TABULATION + f + StringTable.HORIZONTAL_TABULATION + str);
            }
            printWriter.println("</table></body></html>");
            printWriter.flush();
            printWriter.close();
        }
    }

    public String toText(URL url) {
        Parser parser = null;
        StringBuilder sb = new StringBuilder();
        try {
            parser = new Parser(url.openConnection());
            SimpleNodeIterator elements = parser.extractAllNodesThatMatch(new TagNameFilter(WikipediaExampleExtractor.Example.CONTENT_FROM_PAGE)).elements();
            while (elements.hasMoreNodes()) {
                sb.append(elements.nextNode().toPlainTextString());
                sb.append("\n");
            }
        } catch (IOException e) {
            logger.error(e);
        } catch (EncodingChangeException e2) {
            logger.error(e2);
            parser.reset();
        } catch (ParserException e3) {
            logger.error(e3);
        }
        logger.info(url + "\n" + sb.length());
        return sb.toString();
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length <= 6) {
            System.out.println("Usage: java -mx512M org.fbk.cit.hlt.core.lsa.util.WebPageComparator input threshold size dim idf page concepts+");
            System.exit(1);
        }
        File file = new File(strArr[0] + "-Ut");
        File file2 = new File(strArr[0] + "-S");
        File file3 = new File(strArr[0] + "-row");
        File file4 = new File(strArr[0] + "-col");
        File file5 = new File(strArr[0] + "-df");
        Double.parseDouble(strArr[1]);
        LSSimilarity lSSimilarity = new LSSimilarity(new LSM(file, file2, file3, file4, file5, Integer.parseInt(strArr[3]), Boolean.parseBoolean(strArr[4])), Integer.parseInt(strArr[2]));
        URL url = new URL(strArr[5]);
        URL[] urlArr = new URL[strArr.length - 6];
        for (int i = 0; i < urlArr.length; i++) {
            urlArr[i] = new URL("http://it.wikipedia.org/wiki/" + strArr[i + 6]);
        }
        new WebPageComparator(url, urlArr, lSSimilarity);
    }
}
