package org.fbk.cit.hlt.core.lsa.util;

import java.io.File;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.DefaultConfigurationBuilder;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.fbk.cit.hlt.core.analysis.stemmer.Stemmer;
import org.fbk.cit.hlt.core.analysis.stemmer.StemmerFactory;
import org.fbk.cit.hlt.core.analysis.stemmer.StemmerNotFoundException;
import org.fbk.cit.hlt.core.lsa.io.DenseText2DenseBinary;
import org.fbk.cit.hlt.core.lsa.io.FileFreqFilter;
import org.fbk.cit.hlt.core.lsa.io.TFIDF;
import org.fbk.cit.hlt.core.lsa.io.TermDocumentMatrixFileWriter;
import org.tartarus.snowball.SnowballStemmer;

/* loaded from: input_file:org/fbk/cit/hlt/core/lsa/util/CreateLsa.class */
public class CreateLsa {
    Configuration config;
    public static final int DEFAULT_CUTOFF = 5;
    public static final int DEFAULT_NUM_DOC = Integer.MAX_VALUE;
    SnowballStemmer snowballStemmer = null;
    static Logger logger = Logger.getLogger(CreateLsa.class.getName());
    public static final String DEFAULT_MODEL_DIR = System.getProperty("user.dir");

    public CreateLsa(File file, String str, int i, int i2, int i3, File file2, Stemmer stemmer) {
        try {
            logger.info("configuration file configuration/config.xml");
            DefaultConfigurationBuilder defaultConfigurationBuilder = new DefaultConfigurationBuilder("configuration/config.xml");
            defaultConfigurationBuilder.setBasePath(".");
            this.config = defaultConfigurationBuilder.getConfiguration();
            String str2 = str + "lsa-cutoff-" + i2 + "-dim-" + i;
            String str3 = (i3 == Integer.MAX_VALUE ? str2 + "-size-all" : str2 + "-size-" + i3) + File.separator;
            File file3 = new File(str3);
            if (!file3.exists()) {
                file3.mkdir();
            }
            String str4 = str3 + "X";
            logger.debug(str4);
            File file4 = new File("/dev/null");
            if (i2 > 1) {
                logger.info("calculating df...");
                new TermDocumentMatrixFileWriter(file, str4, file2, file4, i3, false, stemmer);
                File file5 = new File(str3 + "X-df");
                File file6 = new File(str3 + "X-df.cutoff-" + i2);
                logger.info("filtering " + (stemmer == null ? "terms" : "stems") + " with df < " + i2 + "...");
                new FileFreqFilter(file5, file6, i2);
                logger.info("creating term-by-document matrix...");
                new TermDocumentMatrixFileWriter(file, str4, file4, file6, i3, true, stemmer);
            } else {
                logger.info("creating term-by-document matrix...");
                new TermDocumentMatrixFileWriter(file, str4, file2, file4, i3, true, stemmer);
            }
            File file7 = new File(str4 + "-matrix");
            File file8 = new File(str4 + "-matrix-tf-idf");
            File file9 = new File(str4 + "-row");
            File file10 = new File(str4 + "-df");
            logger.info("calculating tfidf (" + file7 + ", " + file8 + ", " + file9 + ", " + file9 + ")...");
            new TFIDF(file7, file8, file9, file10);
            logger.info("running svd (" + file8.getAbsolutePath() + ", " + str3 + "X, " + i + ")...");
            new SVD(file8.getAbsolutePath(), str3 + "X", i);
            File file11 = new File(str3 + "X-Ut");
            File file12 = new File(str3 + "X-Ut.tmp");
            File file13 = new File(str3 + "X-Ut");
            file11.renameTo(file12);
            logger.info("converting matrix (" + file12 + ", " + file13 + ")...");
            DenseText2DenseBinary denseText2DenseBinary = new DenseText2DenseBinary(file12, file13);
            denseText2DenseBinary.read1();
            denseText2DenseBinary.close();
        } catch (Exception e) {
            logger.error(e);
        }
    }

    public void setLanguage(String str) {
    }

    public void init() {
    }

    public static void main(String[] strArr) {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        PropertyConfigurator.configure(property);
        Options options = new Options();
        try {
            OptionBuilder.withArgName("file");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("File from which to read the corpus");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("corpus-file");
            Option create = OptionBuilder.create("f");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_STRING);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("Directory in which to store the resulting model (default is " + DEFAULT_MODEL_DIR + ")");
            OptionBuilder.withLongOpt("model-dir");
            Option create2 = OptionBuilder.create("o");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("Desired SVD triples (default is 100)");
            OptionBuilder.withLongOpt("dimension");
            Option create3 = OptionBuilder.create("d");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("Desired cutoff (default is 5)");
            OptionBuilder.withLongOpt("cutoff");
            Option create4 = OptionBuilder.create(AbstractBottomUpParser.COMPLETE);
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("Desired number of documents (default is all)");
            OptionBuilder.withLongOpt("doc-num");
            Option create5 = OptionBuilder.create();
            OptionBuilder.withArgName("file");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("file from which to read the stopwords");
            OptionBuilder.withLongOpt("stopwords");
            Option create6 = OptionBuilder.create("s");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_STRING);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("if specified, use a language-specific stemmer");
            OptionBuilder.withLongOpt("lang");
            Option create7 = OptionBuilder.create("l");
            options.addOption("h", "help", false, "print this message");
            options.addOption("v", "version", false, "output version information and exit");
            options.addOption(create);
            options.addOption(create2);
            options.addOption(create3);
            options.addOption(create4);
            options.addOption(create6);
            options.addOption(create5);
            options.addOption(create7);
            CommandLine parse = new PosixParser().parse(options, strArr);
            String str = DEFAULT_MODEL_DIR;
            if (parse.hasOption("model-dir")) {
                str = parse.getOptionValue("model-dir");
            }
            int i = 100;
            if (parse.hasOption("dim")) {
                i = Integer.parseInt(parse.getOptionValue("dim"));
            }
            int i2 = 5;
            if (parse.hasOption("cutoff")) {
                i2 = Integer.parseInt(parse.getOptionValue("cutoff"));
            }
            int i3 = Integer.MAX_VALUE;
            if (parse.hasOption("doc-num")) {
                i3 = Integer.parseInt(parse.getOptionValue("doc-num"));
            }
            File file = new File("/dev/null");
            if (parse.hasOption("stopwords")) {
                file = new File(parse.getOptionValue("stopwords"));
            }
            Stemmer stemmer = null;
            if (parse.hasOption("lang")) {
                try {
                    stemmer = StemmerFactory.getInstance(parse.getOptionValue("lang"));
                } catch (StemmerNotFoundException e) {
                    logger.error(e);
                }
            }
            new CreateLsa(new File(parse.getOptionValue("corpus-file")), str, i, i2, i3, file, stemmer);
        } catch (ParseException e2) {
            System.out.println("Parsing failed: " + e2.getMessage() + "\n");
            new HelpFormatter().printHelp(400, "java -cp dist/jcore.jar org.fbk.cit.hlt.core.lsa.util.CreateLsa", "\n", options, "\n", true);
        }
    }
}
