package org.fbk.cit.hlt.core.lsa;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DecimalFormat;
import java.util.Iterator;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.core.analysis.stemmer.Stemmer;
import org.fbk.cit.hlt.core.analysis.stemmer.StemmerFactory;
import org.fbk.cit.hlt.core.analysis.stemmer.StemmerNotFoundException;
import org.fbk.cit.hlt.core.analysis.tokenizer.HardTokenizer;
import org.fbk.cit.hlt.core.math.Vector;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/LSSimilarity.class */
public class LSSimilarity implements TextSimilarity {
    private LSM lsm;
    private int size;
    Stemmer stemmer;
    static Logger logger = Logger.getLogger(LSSimilarity.class.getName());
    protected static DecimalFormat df = new DecimalFormat("000,000,000.#");

    public LSSimilarity(LSM lsm, int i) {
        this(lsm, i, null);
    }

    public LSSimilarity(LSM lsm, int i, Stemmer stemmer) {
        this.lsm = lsm;
        this.size = i;
        this.stemmer = stemmer;
    }

    @Override // org.fbk.cit.hlt.core.lsa.TextSimilarity
    public float compare(String str, String str2) throws TermNotFoundException {
        return this.lsm.mapTerm(str).dotProduct(this.lsm.mapTerm(str2)) / ((float) Math.sqrt(r0.dotProduct(r0) * r0.dotProduct(r0)));
    }

    public float[] compare2(BOW bow, BOW bow2) {
        Vector mapDocument = this.lsm.mapDocument(bow);
        Vector mapDocument2 = this.lsm.mapDocument(bow2);
        return new float[]{mapDocument.dotProduct(mapDocument2) / ((float) Math.sqrt(mapDocument.dotProduct(mapDocument) * mapDocument2.dotProduct(mapDocument2))), this.lsm.mapPseudoDocument(mapDocument).dotProduct(this.lsm.mapPseudoDocument(mapDocument2)) / ((float) Math.sqrt(r0.dotProduct(r0) * r0.dotProduct(r0)))};
    }

    @Override // org.fbk.cit.hlt.core.lsa.TextSimilarity
    public float compare(BOW bow, BOW bow2) {
        Vector mapDocument = this.lsm.mapDocument(bow);
        Vector mapDocument2 = this.lsm.mapDocument(bow2);
        return this.lsm.mapPseudoDocument(mapDocument).dotProduct(this.lsm.mapPseudoDocument(mapDocument2)) / ((float) Math.sqrt(r0.dotProduct(r0) * r0.dotProduct(r0)));
    }

    private String[] tokenize(String str) {
        logger.debug(str);
        String[] stringArray = HardTokenizer.getInstance().stringArray(str);
        String[] strArr = new String[stringArray.length];
        logger.debug(this.stemmer);
        if (this.stemmer != null) {
            for (int i = 0; i < stringArray.length; i++) {
                strArr[i] = this.stemmer.stem(stringArray[i].toLowerCase());
                logger.debug(stringArray[i] + StringTable.HORIZONTAL_TABULATION + strArr[i]);
            }
        }
        return strArr;
    }

    public void interactive() throws IOException {
        while (true) {
            logger.info("\nPlease write a query and type <return> to continue (CTRL C to exit):");
            String str = new BufferedReader(new InputStreamReader(System.in)).readLine().toString();
            if (str.contains(StringTable.HORIZONTAL_TABULATION)) {
                String[] split = str.split(StringTable.HORIZONTAL_TABULATION);
                long nanoTime = System.nanoTime();
                BOW bow = new BOW(tokenize(split[0]));
                BOW bow2 = new BOW(tokenize(split[1]));
                Vector mapDocument = this.lsm.mapDocument(bow);
                Vector mapDocument2 = this.lsm.mapDocument(bow2);
                Vector mapPseudoDocument = this.lsm.mapPseudoDocument(mapDocument);
                Vector mapPseudoDocument2 = this.lsm.mapPseudoDocument(mapDocument2);
                float dotProduct = mapDocument.dotProduct(mapDocument2) / ((float) Math.sqrt(mapDocument.dotProduct(mapDocument) * mapDocument2.dotProduct(mapDocument2)));
                float dotProduct2 = mapPseudoDocument.dotProduct(mapPseudoDocument2) / ((float) Math.sqrt(mapPseudoDocument.dotProduct(mapPseudoDocument) * mapPseudoDocument2.dotProduct(mapPseudoDocument2)));
                long nanoTime2 = System.nanoTime();
                logger.info("bow1:" + bow);
                logger.info("bow2:" + bow2);
                logger.info("time required " + df.format(nanoTime2 - nanoTime) + " ns");
                logger.info("<\"" + split[0] + "\",\"" + split[1] + "\"> = " + dotProduct2 + ParsedPageLink.START_SUFFIX_PATTERN + dotProduct + ")");
            } else {
                try {
                    String lowerCase = str.toLowerCase();
                    logger.debug("query " + lowerCase);
                    long nanoTime3 = System.nanoTime();
                    ScoreTermMap scoreTermMap = new ScoreTermMap(lowerCase, this.size);
                    Vector mapTerm = this.lsm.mapTerm(lowerCase);
                    Iterator<String> terms = this.lsm.terms();
                    while (terms.hasNext()) {
                        String next = terms.next();
                        Vector mapTerm2 = this.lsm.mapTerm(next);
                        scoreTermMap.put(mapTerm.dotProduct(mapTerm2) / ((float) Math.sqrt(mapTerm.dotProduct(mapTerm) * mapTerm2.dotProduct(mapTerm2))), next);
                    }
                    long nanoTime4 = System.nanoTime();
                    logger.info(scoreTermMap.toString());
                    logger.info("time required " + df.format(nanoTime4 - nanoTime3) + " ns");
                } catch (TermNotFoundException e) {
                    logger.error(e);
                }
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        long currentTimeMillis = System.currentTimeMillis();
        PropertyConfigurator.configure(property);
        if (strArr.length < 5) {
            logger.info(getHelp());
            System.exit(1);
        }
        File file = new File(strArr[0] + "-Ut");
        File file2 = new File(strArr[0] + "-S");
        File file3 = new File(strArr[0] + "-row");
        File file4 = new File(strArr[0] + "-col");
        File file5 = new File(strArr[0] + "-df");
        Double.parseDouble(strArr[1]);
        int parseInt = Integer.parseInt(strArr[2]);
        int parseInt2 = Integer.parseInt(strArr[3]);
        boolean parseBoolean = Boolean.parseBoolean(strArr[4]);
        Stemmer stemmer = null;
        if (strArr.length == 6) {
            try {
                stemmer = StemmerFactory.getInstance(strArr[5]);
            } catch (StemmerNotFoundException e) {
                logger.error(e);
            }
        }
        LSSimilarity lSSimilarity = new LSSimilarity(new LSM(file, file2, file3, file4, file5, parseInt2, parseBoolean), parseInt, stemmer);
        lSSimilarity.interactive();
        lSSimilarity.compare("word", "term");
        logger.info("term similarity calculated in " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
    }

    private static String getHelp() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Usage: java -cp dist/jcore.jar -mx2G org.fbk.cit.hlt.core.lsa.LSSimilarity input threshold size dim idf [lang]\n\n");
        stringBuffer.append("Arguments:\n");
        stringBuffer.append("\tinput\t\t-> root of files from which to read the model\n");
        stringBuffer.append("\tthreshold\t-> similarity threshold\n");
        stringBuffer.append("\tsize\t\t-> number of similar terms to return\n");
        stringBuffer.append("\tdim\t\t-> number of dimensions\n");
        stringBuffer.append("\tidf\t\t-> if true rescale using the idf\n");
        stringBuffer.append("\tlang\t\t-> input language\n");
        return stringBuffer.toString();
    }
}
