package org.fbk.cit.hlt.core.lsa;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Iterator;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.core.math.DenseVector;
import org.fbk.cit.hlt.core.math.SparseVector;
import org.fbk.cit.hlt.core.math.Vector;
import org.fbk.cit.hlt.core.util.MultiSet;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/LSM.class */
public class LSM extends AbstractLSI {
    static Logger logger = Logger.getLogger(LSM.class.getName());

    public LSM(String str, int i, boolean z) throws IOException {
        super(str, i, z);
    }

    public LSM(String str, int i, boolean z, boolean z2) throws IOException {
        super(str, i, z, z2);
    }

    public LSM(File file, File file2, File file3, File file4, File file5, int i, boolean z) throws IOException {
        super(file, file2, file3, file4, file5, i, z);
    }

    public LSM(File file, File file2, File file3, File file4, File file5, int i, boolean z, boolean z2) throws IOException {
        super(file, file2, file3, file4, file5, i, z, z2);
    }

    public Vector mapTerm(String str) throws TermNotFoundException {
        int i = this.termIndex.get(str);
        if (i == -1) {
            throw new TermNotFoundException(str);
        }
        return new DenseVector(this.Uk[i]);
    }

    public Vector mapDocument(BOW bow, boolean z) {
        SparseVector sparseVector = new SparseVector();
        Iterator<String> it = bow.termSet().iterator();
        int i = 0;
        while (it.hasNext()) {
            int i2 = this.termIndex.get(it.next());
            if (i2 != -1) {
                float log2 = (float) log2(bow.getFrequency(r0));
                if (z) {
                    log2 *= this.Iidf[i2];
                }
                sparseVector.add(i2, log2);
            }
            i++;
        }
        return sparseVector;
    }

    public Vector mapDocument(BOW bow) {
        SparseVector sparseVector = new SparseVector();
        int i = 0;
        for (String str : bow.termSet()) {
            int i2 = this.termIndex.get(str);
            if (i2 != -1) {
                sparseVector.add(i2, ((float) bow.tf(str)) * this.Iidf[i2]);
            }
            i++;
        }
        return sparseVector;
    }

    public Vector mapDocument(MultiSet<String> multiSet) {
        SparseVector sparseVector = new SparseVector();
        Iterator<String> it = multiSet.iterator();
        int i = 0;
        while (it.hasNext()) {
            int i2 = this.termIndex.get(it.next());
            if (i2 != -1) {
                sparseVector.add(i2, ((float) log2(multiSet.getFrequency(r0))) * this.Iidf[i2]);
            }
            i++;
        }
        return sparseVector;
    }

    public Vector mapPseudoDocument(Vector vector) {
        float[] fArr = new float[this.Uk[0].length];
        for (int i = 0; i < this.Uk[0].length; i++) {
            Iterator<Integer> nonZeroElements = vector.nonZeroElements();
            int i2 = 0;
            while (nonZeroElements.hasNext()) {
                Integer valueOf = Integer.valueOf(nonZeroElements.next().intValue());
                int i3 = i;
                fArr[i3] = fArr[i3] + (this.Uk[valueOf.intValue()][i] * vector.get(valueOf.intValue()));
                i2++;
            }
        }
        return new DenseVector(fArr);
    }

    public void interactive() throws IOException {
        while (true) {
            logger.info("\nPlease write a query and type <return> to continue (CTRL C to exit):");
            String str = new BufferedReader(new InputStreamReader(System.in)).readLine().toString();
            if (str.contains(StringTable.HORIZONTAL_TABULATION)) {
                String[] split = str.split(StringTable.HORIZONTAL_TABULATION);
                long nanoTime = System.nanoTime();
                BOW bow = new BOW(split[0].toLowerCase());
                BOW bow2 = new BOW(split[1].toLowerCase());
                logger.info("time required " + df.format(System.nanoTime() - nanoTime) + " ns");
                long nanoTime2 = System.nanoTime();
                Vector mapDocument = mapDocument(bow);
                Vector mapDocument2 = mapDocument(bow2);
                Vector mapPseudoDocument = mapPseudoDocument(mapDocument);
                Vector mapPseudoDocument2 = mapPseudoDocument(mapDocument2);
                double dotProduct = mapDocument.dotProduct(mapDocument2) / Math.sqrt(mapDocument.dotProduct(mapDocument) * mapDocument2.dotProduct(mapDocument2));
                double dotProduct2 = mapPseudoDocument.dotProduct(mapPseudoDocument2) / Math.sqrt(mapPseudoDocument.dotProduct(mapPseudoDocument) * mapPseudoDocument2.dotProduct(mapPseudoDocument2));
                long nanoTime3 = System.nanoTime();
                logger.info("bow1:" + bow);
                logger.info("bow2:" + bow2);
                logger.info("time required " + df.format(nanoTime3 - nanoTime2) + " ns");
                logger.info("<\"" + split[0] + "\",\"" + split[1] + "\"> = " + dotProduct2 + ParsedPageLink.START_SUFFIX_PATTERN + dotProduct + ")");
            } else {
                try {
                    String lowerCase = str.toLowerCase();
                    logger.debug("query " + lowerCase);
                    long nanoTime4 = System.nanoTime();
                    ScoreTermMap scoreTermMap = new ScoreTermMap(lowerCase, 20);
                    Vector mapTerm = mapTerm(lowerCase);
                    Iterator<String> terms = terms();
                    while (terms.hasNext()) {
                        String next = terms.next();
                        Vector mapTerm2 = mapTerm(next);
                        scoreTermMap.put(mapTerm.dotProduct(mapTerm2) / Math.sqrt(mapTerm.dotProduct(mapTerm) * mapTerm2.dotProduct(mapTerm2)), next);
                    }
                    long nanoTime5 = System.nanoTime();
                    logger.info(scoreTermMap.toString());
                    logger.info("time required " + df.format(nanoTime5 - nanoTime4) + " ns");
                } catch (TermNotFoundException e) {
                    logger.error(e);
                }
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        long currentTimeMillis = System.currentTimeMillis();
        PropertyConfigurator.configure(property);
        if (strArr.length != 5) {
            logger.info(getHelp());
            System.exit(1);
        }
        File file = new File(strArr[0] + "-Ut");
        File file2 = new File(strArr[0] + "-S");
        File file3 = new File(strArr[0] + "-row");
        File file4 = new File(strArr[0] + "-col");
        File file5 = new File(strArr[0] + "-df");
        Double.parseDouble(strArr[1]);
        Integer.parseInt(strArr[2]);
        new LSM(file, file2, file3, file4, file5, Integer.parseInt(strArr[3]), Boolean.parseBoolean(strArr[4])).interactive();
        logger.info("term similarity calculated in " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
    }

    private static String getHelp() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Usage: java -cp dist/jcore.jar -mx2G org.fbk.cit.hlt.core.lsa.LSM input threshold size dim idf\n\n");
        stringBuffer.append("Arguments:\n");
        stringBuffer.append("\tinput\t\t-> root of files from which to read the model\n");
        stringBuffer.append("\tthreshold\t-> similarity threshold\n");
        stringBuffer.append("\tsize\t\t-> number of similar terms to return\n");
        stringBuffer.append("\tdim\t\t-> number of dimensions\n");
        stringBuffer.append("\tidf\t\t-> if true rescale using the idf\n");
        return stringBuffer.toString();
    }
}
