package org.fbk.cit.hlt.core.lsa.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.commons.cli.HelpFormatter;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.core.io.FolderScanner;
import org.fbk.cit.hlt.core.lsa.BOW;
import org.fbk.cit.hlt.core.lsa.LSM;
import org.fbk.cit.hlt.core.lsa.LSSimilarity;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/util/SnippetComparator.class */
public class SnippetComparator {
    static Logger logger = Logger.getLogger(SnippetComparator.class.getName());
    private double qualityThreshold;
    private Random rnd;
    private List<BOW> vectorList;
    private List<String> textList;
    private List<File> idList;
    private Map<Integer, Integer> clusterMap;
    public List<SortedSet<Integer>> clusterList;

    public SnippetComparator(String str, LSSimilarity lSSimilarity, File file, double d) throws IOException, MalformedURLException {
        this.qualityThreshold = d;
        logger.info("*** reading " + str + "...");
        long currentTimeMillis = System.currentTimeMillis();
        this.textList = new ArrayList();
        this.vectorList = new ArrayList();
        this.idList = new ArrayList();
        logger.info("size bow " + new BOW(getText(file)).size());
        new TreeMap();
        FolderScanner folderScanner = new FolderScanner(new File(str));
        int i = 0;
        while (folderScanner.hasNext()) {
            Object[] next = folderScanner.next();
            for (int i2 = 0; i2 < next.length; i2++) {
                logger.info(next[i2]);
                String text = getText((File) next[i2]);
                BOW bow = new BOW(text);
                this.idList.add((File) next[i2]);
                this.vectorList.add(bow);
                this.textList.add(text);
                i++;
            }
        }
        calculateMatrix(lSSimilarity);
        File[] id = getID();
        String[] text2 = getText();
        System.out.println("term similarity calculated in " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
        logger.info("writing " + str + "1.html");
        PrintWriter printWriter = new PrintWriter(new FileWriter(str + HelpFormatter.DEFAULT_OPT_PREFIX + d + ".html"));
        PrintWriter printWriter2 = new PrintWriter(new FileWriter("/Users/giuliano/Public/jlsa/cos-tmp/output/" + d + "/Claudio_Giuliano.xml"));
        printWriter2.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
        printWriter2.println("<clustering>");
        printWriter.println("<html><body><table border=\"1\">");
        for (int i3 = 0; i3 < this.clusterList.size(); i3++) {
            SortedSet<Integer> sortedSet = this.clusterList.get(i3);
            StringBuilder sb = new StringBuilder();
            printWriter2.println("\t<entity id=\"" + i3 + "\">");
            sb.append("\t<tr><td>" + i3 + "</td><td><ul>\n");
            boolean z = false;
            for (Integer num : sortedSet) {
                printWriter2.println("\t\t<doc rank=\"" + id[num.intValue()].getName().substring(id[num.intValue()].getName().lastIndexOf(StringTable.LOW_LINE) + 1, id[num.intValue()].getName().indexOf(".")) + "\"/>");
                sb.append("\t\t<li><font color=\"red\">" + id[num.intValue()] + "</font>" + filter(text2[num.intValue()]) + "</li>\n");
                z = true;
            }
            if (z) {
                printWriter2.println("\t</entity>");
                printWriter.println("\t</ul></td></tr>");
            }
        }
        printWriter.println("</table></body></html>");
        printWriter2.print("\t</clustering>");
        printWriter2.flush();
        printWriter2.close();
        printWriter.flush();
        printWriter.close();
    }

    public String filter(String str) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (charAt == '&') {
                sb.append("&amp;");
            } else if (charAt == '<') {
                sb.append("&gt;");
            } else if (charAt == '>') {
                sb.append("&lt;");
            } else {
                sb.append(charAt);
            }
        }
        return sb.toString();
    }

    public String[] getText() {
        String[] strArr = new String[this.textList.size()];
        for (int i = 0; i < this.textList.size(); i++) {
            strArr[i] = this.textList.get(i);
        }
        return strArr;
    }

    public File[] getID() {
        File[] fileArr = new File[this.idList.size()];
        for (int i = 0; i < this.idList.size(); i++) {
            fileArr[i] = this.idList.get(i);
        }
        return fileArr;
    }

    private void calculateMatrix(LSSimilarity lSSimilarity) {
        double d = Double.POSITIVE_INFINITY;
        double d2 = Double.NEGATIVE_INFINITY;
        double d3 = 0.0d;
        double d4 = 0.0d;
        int i = 0;
        this.clusterList = new ArrayList();
        this.clusterMap = new HashMap();
        logger.info("calculateMatrix: " + this.vectorList.size());
        for (int i2 = 0; i2 < this.vectorList.size(); i2++) {
            BOW bow = this.vectorList.get(i2);
            for (int i3 = i2 + 1; i3 < this.vectorList.size(); i3++) {
                double d5 = lSSimilarity.compare2(bow, this.vectorList.get(i3))[0];
                d3 += d5;
                d4 += Math.pow(d5, 2.0d);
                i++;
                if (d5 >= d2) {
                    d2 = d5;
                }
                if (d5 <= d) {
                    d = d5;
                }
                if (d5 >= this.qualityThreshold) {
                    Integer num = this.clusterMap.get(Integer.valueOf(i2));
                    if (num == null) {
                        TreeSet treeSet = new TreeSet();
                        this.clusterList.add(treeSet);
                        int size = this.clusterList.size() - 1;
                        treeSet.add(Integer.valueOf(i2));
                        treeSet.add(Integer.valueOf(i3));
                        this.clusterMap.put(Integer.valueOf(i2), Integer.valueOf(size));
                        this.clusterMap.put(Integer.valueOf(i3), Integer.valueOf(size));
                    } else {
                        this.clusterList.get(num.intValue()).add(Integer.valueOf(i3));
                        this.clusterMap.put(Integer.valueOf(i3), num);
                    }
                }
            }
            if (this.clusterMap.get(Integer.valueOf(i2)) == null) {
                TreeSet treeSet2 = new TreeSet();
                this.clusterList.add(treeSet2);
                int size2 = this.clusterList.size() - 1;
                treeSet2.add(Integer.valueOf(i2));
                this.clusterMap.put(Integer.valueOf(i2), Integer.valueOf(size2));
            }
            System.out.print(i2 + " ");
        }
        System.out.print("\n");
        double d6 = d3 / i;
        double pow = ((1.0d / i) * d4) - Math.pow(d6, 2.0d);
        logger.info("min: " + d + ", max: " + d2 + ", mean: " + d6 + ", var: " + pow + ", std dev: " + Math.sqrt(pow));
    }

    private String getText(File file) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            int read = bufferedReader.read();
            if (read == -1) {
                bufferedReader.close();
                return stringBuffer.toString();
            }
            stringBuffer.append((char) read);
        }
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length != 5) {
            System.out.println("Usage: java -mx2G org.fbk.cit.hlt.core.lsa.util.SnippetComparator input dim snippet-dir doc-id qualityThreshold");
            System.exit(1);
        }
        File file = new File(strArr[0] + "-Ut");
        File file2 = new File(strArr[0] + "-S");
        File file3 = new File(strArr[0] + "-row");
        File file4 = new File(strArr[0] + "-col");
        File file5 = new File(strArr[0] + "-df");
        int parseInt = Integer.parseInt(strArr[1]);
        new SnippetComparator(strArr[2], new LSSimilarity(new LSM(file, file2, file3, file4, file5, parseInt, false), 20), new File(strArr[3]), Double.parseDouble(strArr[4]));
    }
}
