package org.fbk.cit.hlt.core.lsa.util;

import info.bliki.api.AbstractXMLParser;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.BreakIterator;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.core.io.FolderScanner;
import org.fbk.cit.hlt.core.lsa.BOW;
import org.fbk.cit.hlt.core.lsa.LSM;
import org.fbk.cit.hlt.core.math.SparseVector;
import org.fbk.cit.hlt.core.math.Vector;
import org.fbk.cit.hlt.thewikimachine.index.PageAbstractIndexer;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/util/AnvurDev.class */
public class AnvurDev {
    static Logger logger = Logger.getLogger(AnvurDev.class.getName());

    static List<String[]> readText(File file) throws IOException {
        logger.debug("reading text: " + file + "...");
        ArrayList arrayList = new ArrayList();
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                logger.debug(arrayList.size() + " lines read from in " + file);
                lineNumberReader.close();
                return arrayList;
            }
            arrayList.add(StringEscapeUtils.unescapeHtml(readLine).split(StringTable.HORIZONTAL_TABULATION));
        }
    }

    static String tokenize(String str) {
        BreakIterator wordInstance = BreakIterator.getWordInstance();
        wordInstance.setText(str);
        StringBuilder sb = new StringBuilder();
        int first = wordInstance.first();
        int next = wordInstance.next();
        while (true) {
            int i = next;
            if (i == -1) {
                return sb.toString();
            }
            sb.append(" ");
            sb.append(str.substring(first, i));
            first = i;
            next = wordInstance.next();
        }
    }

    public static String buildName(String[] strArr, String str) {
        String[] split = str.split(",");
        int[] iArr = new int[split.length];
        for (int i = 0; i < split.length; i++) {
            iArr[i] = Integer.parseInt(split[i]);
        }
        StringBuilder sb = new StringBuilder();
        for (int i2 : iArr) {
            sb.append(HelpFormatter.DEFAULT_OPT_PREFIX);
            sb.append(strArr[i2]);
        }
        return sb.toString();
    }

    public static Vector merge(Vector vector, Vector vector2) {
        SparseVector sparseVector = new SparseVector();
        Iterator<Integer> nonZeroElements = vector.nonZeroElements();
        if (nonZeroElements != null) {
            while (nonZeroElements.hasNext()) {
                int intValue = nonZeroElements.next().intValue();
                sparseVector.add(intValue, vector.get(intValue));
            }
        }
        Iterator<Integer> nonZeroElements2 = vector2.nonZeroElements();
        if (nonZeroElements2 != null) {
            while (nonZeroElements2.hasNext()) {
                int intValue2 = nonZeroElements2.next().intValue();
                sparseVector.add(intValue2 + vector.size(), vector2.get(intValue2));
            }
        }
        return sparseVector;
    }

    public static String buildText(String[] strArr, String str) {
        String[] split = str.split(",");
        int[] iArr = new int[split.length];
        for (int i = 0; i < split.length; i++) {
            iArr[i] = Integer.parseInt(split[i]);
        }
        StringBuilder sb = new StringBuilder();
        for (int i2 = 0; i2 < iArr.length; i2++) {
            if (i2 > 0) {
                sb.append(" ");
            }
            sb.append(strArr[iArr[i2]]);
        }
        return sb.toString();
    }

    public static void run(LSM lsm, LSM lsm2, String str, String str2) throws Exception {
        new DecimalFormat("#.00");
        String buildName = buildName(new String[]{"author_check", "authors", AbstractXMLParser.TITLE_ID, "year", "pubtype", "publisher", "journal", "volume", "number", "pages", PageAbstractIndexer.ABSTRACT_FIELD_NAME, "nauthors", "citedby"}, str2);
        File file = new File(str + buildName + "-bow-en-it.txt");
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8")));
        File file2 = new File(str + buildName + "-bow-en-it.csv");
        PrintWriter printWriter2 = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file2), "UTF-8")));
        File file3 = new File(str + buildName + "-ls-en-it.txt");
        PrintWriter printWriter3 = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file3), "UTF-8")));
        File file4 = new File(str + buildName + "-ls-en-it.csv");
        PrintWriter printWriter4 = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file4), "UTF-8")));
        File file5 = new File(str + buildName + "-bow+ls-en-it.txt");
        PrintWriter printWriter5 = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file5), "UTF-8")));
        File file6 = new File(str + buildName + "-bow+ls-en-it.csv");
        PrintWriter printWriter6 = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file6), "UTF-8")));
        File file7 = new File(str + buildName + "-en-it.log");
        PrintWriter printWriter7 = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file7), "UTF-8")));
        List<String[]> readText = readText(new File(str));
        for (int i = 0; i < readText.size(); i++) {
            String[] strArr = readText.get(i);
            strArr[0].toLowerCase();
            printWriter.print(StringTable.HORIZONTAL_TABULATION);
            printWriter3.print(StringTable.HORIZONTAL_TABULATION);
            printWriter5.print(StringTable.HORIZONTAL_TABULATION);
            printWriter.print(i + "(" + strArr[0] + ")");
            printWriter3.print(i + "(" + strArr[0] + ")");
            printWriter5.print(i + "(" + strArr[0] + ")");
        }
        printWriter.print("\n");
        printWriter3.print("\n");
        printWriter5.print("\n");
        for (int i2 = 0; i2 < readText.size(); i2++) {
            String[] strArr2 = readText.get(i2);
            BOW bow = new BOW(buildText(strArr2, str2));
            Vector mapDocument = lsm.mapDocument(bow);
            mapDocument.normalize();
            printWriter7.println("enD1:" + mapDocument);
            Vector mapPseudoDocument = lsm.mapPseudoDocument(mapDocument);
            mapPseudoDocument.normalize();
            printWriter7.println("enPd1:" + mapPseudoDocument);
            Vector merge = merge(mapPseudoDocument, mapDocument);
            printWriter7.println("enM1:" + merge);
            Vector mapDocument2 = lsm2.mapDocument(bow);
            mapDocument2.normalize();
            printWriter7.println("itD1:" + mapDocument2);
            Vector mapPseudoDocument2 = lsm2.mapPseudoDocument(mapDocument2);
            mapPseudoDocument2.normalize();
            printWriter7.println("itPd1:" + mapPseudoDocument2);
            Vector merge2 = merge(mapPseudoDocument2, mapDocument2);
            printWriter7.println("itM1:" + merge2);
            for (int i3 = 0; i3 < strArr2.length; i3++) {
                printWriter2.print(strArr2[i3]);
                printWriter2.print(StringTable.HORIZONTAL_TABULATION);
                printWriter4.print(strArr2[i3]);
                printWriter4.print(StringTable.HORIZONTAL_TABULATION);
                printWriter6.print(strArr2[i3]);
                printWriter6.print(StringTable.HORIZONTAL_TABULATION);
            }
            printWriter2.print(mapDocument);
            printWriter2.print(StringTable.HORIZONTAL_TABULATION);
            printWriter2.println(mapDocument2);
            printWriter4.print(mapPseudoDocument);
            printWriter4.print(StringTable.HORIZONTAL_TABULATION);
            printWriter4.println(mapPseudoDocument2);
            printWriter6.print(merge);
            printWriter6.print(StringTable.HORIZONTAL_TABULATION);
            printWriter6.println(merge2);
        }
        logger.debug("wrote " + file);
        logger.debug("wrote " + file);
        logger.debug("wrote " + file2);
        logger.debug("wrote " + file3);
        logger.debug("wrote " + file4);
        logger.debug("wrote " + file5);
        logger.debug("wrote " + file6);
        logger.debug("wrote " + file7);
        printWriter4.close();
        printWriter2.close();
        printWriter6.close();
        printWriter.close();
        printWriter3.close();
        printWriter5.close();
        printWriter7.close();
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        PropertyConfigurator.configure(property);
        if (strArr.length != 8) {
            System.out.println(strArr.length);
            System.out.println("Usage: java -mx2G org.fbk.cit.hlt.core.lsa.util.AnvurDev root-lsa-en root-lsa-it threshold-lsa size-lsa dim-lsa idf-lsa in-file-tsv fields-tsv\n\n");
            System.exit(1);
        }
        new DecimalFormat("#.00");
        File file = new File(strArr[0] + "-Ut");
        File file2 = new File(strArr[0] + "-S");
        File file3 = new File(strArr[0] + "-row");
        File file4 = new File(strArr[0] + "-col");
        File file5 = new File(strArr[0] + "-df");
        File file6 = new File(strArr[1] + "-Ut");
        File file7 = new File(strArr[1] + "-S");
        File file8 = new File(strArr[1] + "-row");
        File file9 = new File(strArr[1] + "-col");
        File file10 = new File(strArr[1] + "-df");
        Double.parseDouble(strArr[2]);
        Integer.parseInt(strArr[3]);
        int parseInt = Integer.parseInt(strArr[4]);
        boolean parseBoolean = Boolean.parseBoolean(strArr[5]);
        LSM lsm = new LSM(file, file2, file3, file4, file5, parseInt, parseBoolean);
        LSM lsm2 = new LSM(file6, file7, file8, file9, file10, parseInt, parseBoolean);
        File file11 = new File(strArr[6]);
        if (file11.isFile()) {
            run(lsm, lsm2, strArr[6], strArr[7]);
            return;
        }
        if (file11.isDirectory()) {
            FolderScanner folderScanner = new FolderScanner(file11);
            folderScanner.setFiler(new TsvFilter());
            int i = 0;
            while (folderScanner.hasNext()) {
                Object[] next = folderScanner.next();
                int i2 = i;
                i++;
                System.out.println(i2 + " : " + next.length);
                for (Object obj : next) {
                    String absolutePath = ((File) obj).getAbsolutePath();
                    System.out.println(absolutePath);
                    run(lsm, lsm2, absolutePath, strArr[7]);
                }
            }
        }
    }
}
