package org.fbk.cit.hlt.thewikimachine;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.FileAttribute;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile;
import opennlp.tools.parser.AbstractBottomUpParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.fbk.cit.hlt.core.lsa.LSM;
import org.fbk.cit.hlt.thewikimachine.csv.FirstNameExtractor;
import org.fbk.cit.hlt.thewikimachine.csv.OneExamplePerSenseExtractor;
import org.fbk.cit.hlt.thewikimachine.csv.PageAllCategoryExtractor;
import org.fbk.cit.hlt.thewikimachine.csv.PageNGramExtractor;
import org.fbk.cit.hlt.thewikimachine.csv.PagePerCategoryCounter;
import org.fbk.cit.hlt.thewikimachine.csv.UnigramExtractor;
import org.fbk.cit.hlt.thewikimachine.csv.UnixSortWrapper;
import org.fbk.cit.hlt.thewikimachine.csv.VectorExtractor;
import org.fbk.cit.hlt.thewikimachine.index.CategoryPageIndexer;
import org.fbk.cit.hlt.thewikimachine.index.CategorySubCategoryIndexer;
import org.fbk.cit.hlt.thewikimachine.index.CategorySuperCategoryIndexer;
import org.fbk.cit.hlt.thewikimachine.index.FirstNameIndexer;
import org.fbk.cit.hlt.thewikimachine.index.FormPageIndexer;
import org.fbk.cit.hlt.thewikimachine.index.NGramIndexer;
import org.fbk.cit.hlt.thewikimachine.index.OneExamplePerSenseIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageAbstractIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageAirpediaClassIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageAllCategoryIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageCategoryIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageFileIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageFormIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageFreqIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageIncomingOutgoingWeightedIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageNavigationTemplateIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PagePortalIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageTextIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PageVectorIndexer;
import org.fbk.cit.hlt.thewikimachine.index.PersonInfoIndexer;
import org.fbk.cit.hlt.thewikimachine.index.TypeIndexer;
import org.fbk.cit.hlt.thewikimachine.util.FreqSet;
import org.fbk.cit.hlt.thewikimachine.util.GenericFileUtils;
import org.fbk.cit.hlt.thewikimachine.util.GenericValuesIndexer;
import org.fbk.cit.hlt.thewikimachine.util.PageCategoryTokenizer;
import org.fbk.cit.hlt.thewikimachine.util.SectionTitleTokenizer;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;
import org.fbk.cit.hlt.thewikimachine.util.UnixRsyncWrapper;
import org.fbk.cit.hlt.thewikimachine.xmldump.DBpediaClassExtractor;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaAbstractExtractor;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaExampleExtractor;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaFileSourceExtractor;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaIncomingOutgoingLinkExtractor;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaPageNavigationTemplateExtractor;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaPagePortalExtractor;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaPreprocessing;
import org.fbk.cit.hlt.thewikimachine.xmldump.WikipediaTextExtractor;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.CollectGoodTemplates;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.ParsedPageLink;
import org.fbk.cit.hlt.thewikimachine.xmldump.util.PruneTemplates;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/ModelExtractor.class */
public class ModelExtractor {
    static Logger logger = Logger.getLogger(ModelExtractor.class.getName());
    public static final boolean DEFAULT_OVERWRITE = false;

    public static void writeLog(ExtractorParameters extractorParameters, CommandLine commandLine, String[] strArr, String str) throws IOException {
        logger.debug("writing log (" + str + ")...");
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str), "UTF-8")));
        printWriter.println("date='" + new Date() + StringTable.APOSTROPHE);
        printWriter.print("command='java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.ModelExtractor");
        for (String str2 : strArr) {
            printWriter.print(" ");
            printWriter.print(str2);
        }
        printWriter.print("'\n");
        printWriter.println(extractorParameters);
        Iterator it = commandLine.iterator();
        int i = 0;
        while (it.hasNext()) {
            Option option = (Option) it.next();
            printWriter.println(option.getArgName() + "='" + option.getValue() + StringTable.APOSTROPHE);
            i++;
        }
        printWriter.close();
    }

    public static void createSymbolicLink(Path path, Path path2) {
        logger.debug("index: " + path + StringTable.HORIZONTAL_TABULATION + path2);
        try {
            Files.createSymbolicLink(path, path2, new FileAttribute[0]);
        } catch (IOException e) {
            logger.error(e);
        } catch (UnsupportedOperationException e2) {
            logger.error(e2);
        }
    }

    public static boolean delete(String str) throws IOException {
        File file = new File(str);
        if (!file.exists()) {
            logger.debug(file + " doesn't exist");
            return false;
        }
        if (!file.isDirectory()) {
            logger.debug("removing file " + file + "...");
            return file.delete();
        }
        logger.debug("removing directory " + file + "...");
        FileUtils.deleteDirectory(file);
        return true;
    }

    public static void main(String[] strArr) throws Exception {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "log-config.txt";
        }
        Options options = new Options();
        try {
            OptionBuilder.withArgName("file");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("file from which to read the Wikipedia XML dump");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("wikipedia-dump");
            Option create = OptionBuilder.create("d");
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("output directory in which to store output files");
            OptionBuilder.isRequired();
            OptionBuilder.withLongOpt("output-dir");
            Option create2 = OptionBuilder.create("o");
            OptionBuilder.withDescription("extract examples");
            OptionBuilder.withLongOpt("example");
            Option create3 = OptionBuilder.create("e");
            OptionBuilder.withDescription("execute pre-processing");
            OptionBuilder.withLongOpt("pre-processing");
            Option create4 = OptionBuilder.create(PrincetonRandomAccessDictionaryFile.READ_ONLY);
            OptionBuilder.withDescription("set compression to true (default is false)");
            OptionBuilder.withLongOpt("compress");
            Option create5 = OptionBuilder.create(AbstractBottomUpParser.COMPLETE);
            OptionBuilder.withDescription("extract vectors");
            OptionBuilder.withLongOpt("vectors");
            Option create6 = OptionBuilder.create();
            OptionBuilder.withDescription("if set overwrite the existing files and directories");
            OptionBuilder.withLongOpt("overwrite");
            Option create7 = OptionBuilder.create();
            OptionBuilder.withDescription("extract incoming/outgoing links");
            OptionBuilder.withLongOpt("incoming-outgoing");
            Option create8 = OptionBuilder.create("g");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("number of threads (default 1)");
            OptionBuilder.withLongOpt("num-threads");
            Option create9 = OptionBuilder.create("t");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("minimum frequency of wanted forms (default is 0)");
            OptionBuilder.withLongOpt("min-freq");
            Option create10 = OptionBuilder.create("f");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("maximum number of pages to process (default is all)");
            OptionBuilder.withLongOpt("num-pages");
            Option create11 = OptionBuilder.create("p");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("receive notification every n pages (default is 10000)");
            OptionBuilder.withLongOpt("notification-point");
            Option create12 = OptionBuilder.create("b");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("maximum frequency of wanted forms (default is 1000)");
            OptionBuilder.withLongOpt("max-freq");
            Option create13 = OptionBuilder.create("m");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("maximum number of lines to load in memory when sorting (default is 20000000)");
            OptionBuilder.withLongOpt("sort-size");
            Option create14 = OptionBuilder.create("z");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("n-grams length");
            OptionBuilder.withLongOpt("n-gram");
            Option create15 = OptionBuilder.create("n");
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("lsa dimension (default is 100)");
            OptionBuilder.withLongOpt("lsa-dim");
            Option create16 = OptionBuilder.create();
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("lsa dir");
            OptionBuilder.withLongOpt("lsm-dir");
            Option create17 = OptionBuilder.create("l");
            OptionBuilder.withDescription("if set, use the output folder as base dir");
            OptionBuilder.withLongOpt("base-dir");
            Option create18 = OptionBuilder.create();
            OptionBuilder.hasArgs();
            OptionBuilder.withDescription("if set, it extracts good templates and creates indexes after pre-processing");
            OptionBuilder.withLongOpt("templates");
            Option create19 = OptionBuilder.create();
            OptionBuilder.hasArgs();
            OptionBuilder.withDescription("if set, it creates categories indexes after pre-processing");
            OptionBuilder.withLongOpt("categories");
            Option create20 = OptionBuilder.create();
            OptionBuilder.hasArgs();
            OptionBuilder.withDescription("if set, it creates section indexes after pre-processing");
            OptionBuilder.withLongOpt("sections");
            Option create21 = OptionBuilder.create();
            OptionBuilder.withDescription("extract abstracts");
            OptionBuilder.withLongOpt(PageAbstractIndexer.ABSTRACT_FIELD_NAME);
            Option create22 = OptionBuilder.create();
            OptionBuilder.withArgName("folder");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("stopwords folder");
            OptionBuilder.withLongOpt("stopwords");
            Option create23 = OptionBuilder.create();
            OptionBuilder.withArgName("folder");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("configuration folder (default configuration)");
            OptionBuilder.withLongOpt("configuration");
            Option create24 = OptionBuilder.create();
            OptionBuilder.withDescription("skips unigrams");
            OptionBuilder.withLongOpt("skip-unigrams");
            Option create25 = OptionBuilder.create();
            OptionBuilder.withDescription("extract one example per sense (--lsm-dir is required, --lsm-dim is optional)");
            OptionBuilder.withLongOpt("one-example-per-sense");
            Option create26 = OptionBuilder.create();
            OptionBuilder.withDescription("extract all file and calculate the md5");
            OptionBuilder.withLongOpt("file");
            Option create27 = OptionBuilder.create();
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("extract airpedia classes (the directory from which to read the mapping between pages and classes is required)");
            OptionBuilder.withLongOpt("airpedia-class-dir");
            Option create28 = OptionBuilder.create();
            OptionBuilder.withDescription("extract person information");
            OptionBuilder.withLongOpt("person-info");
            Option create29 = OptionBuilder.create();
            OptionBuilder.withArgName("mappingfile-ontologyfile");
            OptionBuilder.hasArgs();
            OptionBuilder.withDescription("DBpedia mappings and ontology");
            OptionBuilder.withLongOpt("dbpedia-pars");
            Option create30 = OptionBuilder.create();
            OptionBuilder.withDescription("normalize vectors (default is false)");
            OptionBuilder.withLongOpt("normalized");
            Option create31 = OptionBuilder.create();
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("maximum number of forms to process (default is all)");
            OptionBuilder.withLongOpt("num-forms");
            Option create32 = OptionBuilder.create();
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("root directory from which to read cross language links (csv and index)");
            OptionBuilder.withLongOpt("cross-language-dir");
            Option create33 = OptionBuilder.create();
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("Topic folder for all languages");
            OptionBuilder.withLongOpt("topic-dir");
            Option create34 = OptionBuilder.create();
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("Nam-nom folder for the language");
            OptionBuilder.withLongOpt("namnom-dir");
            Option create35 = OptionBuilder.create();
            OptionBuilder.withArgName("dir");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("Airpedia2 folder for the language");
            OptionBuilder.withLongOpt("airpedia2-dir");
            Option create36 = OptionBuilder.create();
            OptionBuilder.withArgName(SchemaSymbols.ATTVAL_INT);
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("recursion maximum category depth (default is 7)");
            OptionBuilder.withLongOpt("max-depth");
            Option create37 = OptionBuilder.create();
            OptionBuilder.withDescription("extract page similarity based on categories");
            OptionBuilder.withLongOpt("category-similarity");
            Option create38 = OptionBuilder.create();
            OptionBuilder.withArgName("user@address");
            OptionBuilder.hasArg();
            OptionBuilder.withDescription("remote server in which to write the models");
            OptionBuilder.withLongOpt("remote-server");
            Option create39 = OptionBuilder.create();
            create19.setOptionalArg(true);
            create19.setArgName("range min threshold");
            create20.setOptionalArg(true);
            create20.setArgName("tokens labels");
            create21.setOptionalArg(true);
            create21.setArgName("tokens labels");
            options.addOption("h", "help", false, "print this message");
            options.addOption("v", "version", false, "output version information and exit");
            OptionBuilder.withDescription("trace mode");
            OptionBuilder.withLongOpt("trace");
            options.addOption(OptionBuilder.create());
            OptionBuilder.withDescription("debug mode");
            OptionBuilder.withLongOpt("debug");
            options.addOption(OptionBuilder.create());
            options.addOption(create);
            options.addOption(create2);
            options.addOption(create9);
            options.addOption(create3);
            options.addOption(create5);
            options.addOption(create4);
            options.addOption(create6);
            options.addOption(create28);
            options.addOption(create11);
            options.addOption(create12);
            options.addOption(create10);
            options.addOption(create13);
            options.addOption(create14);
            options.addOption(create15);
            options.addOption(create7);
            options.addOption(create8);
            options.addOption(create18);
            options.addOption(create19);
            options.addOption(create20);
            options.addOption(create21);
            options.addOption(create23);
            options.addOption(create24);
            options.addOption(create31);
            options.addOption(create32);
            options.addOption(create26);
            options.addOption(create17);
            options.addOption(create16);
            options.addOption(create25);
            options.addOption(create27);
            options.addOption(create22);
            options.addOption(create29);
            options.addOption(create33);
            options.addOption(create39);
            options.addOption(create34);
            options.addOption(create30);
            options.addOption(create35);
            options.addOption(create36);
            options.addOption(create38);
            options.addOption(create37);
            CommandLine parse = new PosixParser().parse(options, strArr);
            if (parse.hasOption("help") || parse.hasOption("version")) {
                throw new ParseException("");
            }
            String str = "configuration/";
            if (parse.hasOption("configuration")) {
                str = parse.getOptionValue("configuration");
                if (!str.endsWith(File.separator)) {
                    str = str + File.separator;
                }
            }
            Properties properties = new Properties();
            try {
                properties.load(new InputStreamReader(new FileInputStream(property), "UTF-8"));
            } catch (Exception e) {
                properties.setProperty("log4j.appender.stdout", "org.apache.log4j.ConsoleAppender");
                properties.setProperty("log4j.appender.stdout.layout.ConversionPattern", "[%t] %-5p (%F:%L) - %m %n");
                properties.setProperty("log4j.appender.stdout.layout", "org.apache.log4j.PatternLayout");
                properties.setProperty("log4j.appender.stdout.Encoding", "UTF-8");
            }
            if (parse.hasOption("trace")) {
                properties.setProperty("log4j.rootLogger", "trace,stdout");
            } else if (parse.hasOption("debug")) {
                properties.setProperty("log4j.rootLogger", "debug,stdout");
            } else if (properties.getProperty("log4j.rootLogger") == null) {
                properties.setProperty("log4j.rootLogger", "info,stdout");
            }
            PropertyConfigurator.configure(properties);
            logger.info("Configuration folder: " + str);
            String str2 = null;
            String str3 = null;
            String[] optionValues = parse.getOptionValues("dbpedia-pars");
            if (optionValues != null && optionValues.length == 2) {
                str2 = optionValues[0];
                str3 = optionValues[1];
            }
            int i = 1;
            if (parse.hasOption("num-threads")) {
                i = Integer.parseInt(parse.getOptionValue("num-threads"));
            }
            int i2 = Integer.MAX_VALUE;
            if (parse.hasOption("num-pages")) {
                i2 = Integer.parseInt(parse.getOptionValue("num-pages"));
            }
            int i3 = 10000;
            if (parse.hasOption("notification-point")) {
                i3 = Integer.parseInt(parse.getOptionValue("notification-point"));
            }
            boolean z = false;
            if (parse.hasOption("compress")) {
                z = true;
            }
            boolean z2 = false;
            if (parse.hasOption("normalized")) {
                z2 = true;
            }
            int i4 = 20000000;
            if (parse.hasOption("sort-size")) {
                i4 = Integer.parseInt(parse.getOptionValue("sort-size"));
            }
            ExtractorParameters extractorParameters = parse.hasOption("base-dir") ? new ExtractorParameters(parse.getOptionValue("wikipedia-dump"), parse.getOptionValue("output-dir"), true) : new ExtractorParameters(parse.getOptionValue("wikipedia-dump"), parse.getOptionValue("output-dir"));
            new File(extractorParameters.getExtractionOutputDirName()).mkdirs();
            logger.debug(extractorParameters);
            String str4 = null;
            if (parse.hasOption("stopwords")) {
                String optionValue = parse.getOptionValue("stopwords");
                if (!optionValue.endsWith(File.separator)) {
                    optionValue = optionValue + File.separator;
                }
                File file = new File(optionValue);
                if (!file.exists() || !file.isDirectory()) {
                    optionValue = null;
                }
                logger.info("Checking stopwords file...");
                if (optionValue != null) {
                    str4 = optionValue + "stopwords_" + extractorParameters.getLang() + ".txt";
                    if (new File(str4).exists()) {
                        logger.info("Stopwords file ok");
                    } else {
                        str4 = null;
                        logger.info("Stopwords file does not exist, it won't be used");
                    }
                }
            }
            if (parse.hasOption("pre-processing")) {
                logger.info("pre-processing...");
                writeLog(extractorParameters, parse, strArr, extractorParameters.getPreprocessingLogFileName());
                WikipediaPreprocessing wikipediaPreprocessing = new WikipediaPreprocessing(i, i2, extractorParameters.getLocale(), str);
                wikipediaPreprocessing.setNotificationPoint(i3);
                wikipediaPreprocessing.start(extractorParameters);
                logger.info("Portals and navigation templates...");
                WikipediaPageNavigationTemplateExtractor wikipediaPageNavigationTemplateExtractor = new WikipediaPageNavigationTemplateExtractor(i, i2, extractorParameters.getLocale());
                wikipediaPageNavigationTemplateExtractor.setNotificationPoint(i3);
                wikipediaPageNavigationTemplateExtractor.start(extractorParameters);
                new WikipediaPagePortalExtractor(i, i2, extractorParameters.getLocale()).start(extractorParameters);
                new PagePortalIndexer(extractorParameters.getWikipediaPagePortalIndexName()).index(extractorParameters.getWikipediaPagePortalFileName(), z);
                new PageNavigationTemplateIndexer(extractorParameters.getWikipediaPageNavigationTemplateIndexName()).index(extractorParameters.getWikipediaPageNavigationTemplateFileName(), z);
                logger.info("extracting text from " + extractorParameters.getWikipediaXmlFileName() + ParsedPageLink.START_SUFFIX_PATTERN + extractorParameters.getWikipediaTextFileName() + ")...");
                WikipediaTextExtractor wikipediaTextExtractor = new WikipediaTextExtractor(i, i2, extractorParameters.getLocale());
                wikipediaTextExtractor.setNotificationPoint(i3);
                wikipediaTextExtractor.start(extractorParameters);
                logger.info("extracting unigrams from " + extractorParameters.getWikipediaTextFileName() + ParsedPageLink.START_SUFFIX_PATTERN + extractorParameters.getWikipediaUnigramFileName() + ")...");
                UnigramExtractor unigramExtractor = new UnigramExtractor(i, i2);
                unigramExtractor.setNotificationPoint(i3);
                unigramExtractor.start(extractorParameters);
                delete(extractorParameters.getWikipediaTextIndexName());
                logger.info("indexing text (" + extractorParameters.getWikipediaTextIndexName() + ")...");
                PageTextIndexer pageTextIndexer = new PageTextIndexer(extractorParameters.getWikipediaTextIndexName());
                pageTextIndexer.index(extractorParameters.getWikipediaTextFileName(), z);
                pageTextIndexer.close();
                delete(extractorParameters.getWikipediaPageCategoryIndexName());
                logger.info("indexing page categories (" + extractorParameters.getWikipediaPageCategoryIndexName() + ")...");
                PageCategoryIndexer pageCategoryIndexer = new PageCategoryIndexer(extractorParameters.getWikipediaPageCategoryIndexName());
                pageCategoryIndexer.index(extractorParameters.getWikipediaPageCategoryFileName(), z);
                pageCategoryIndexer.close();
                delete(extractorParameters.getWikipediaCategorySuperCategoryIndexName());
                logger.info("indexing category super categories (" + extractorParameters.getWikipediaCategorySuperCategoryIndexName() + ")...");
                CategorySuperCategoryIndexer categorySuperCategoryIndexer = new CategorySuperCategoryIndexer(extractorParameters.getWikipediaCategorySuperCategoryIndexName());
                categorySuperCategoryIndexer.index(extractorParameters.getWikipediaCategorySuperCategoryFileName(), z);
                categorySuperCategoryIndexer.close();
                logger.info("sorting " + extractorParameters.getWikipediaCategorySuperCategoryFileName() + "...");
                UnixSortWrapper.sort(extractorParameters.getWikipediaCategorySuperCategoryFileName(), extractorParameters.getWikipediaCategorySubCategoryFileName(), 1, i);
                delete(extractorParameters.getWikipediaCategorySubCategoryIndexName());
                logger.info("indexing category sub categories (" + extractorParameters.getWikipediaCategorySubCategoryIndexName() + ")...");
                CategorySubCategoryIndexer categorySubCategoryIndexer = new CategorySubCategoryIndexer(extractorParameters.getWikipediaCategorySubCategoryIndexName());
                categorySubCategoryIndexer.index(extractorParameters.getWikipediaCategorySubCategoryFileName(), z);
                categorySubCategoryIndexer.close();
                UnixSortWrapper.sort(extractorParameters.getWikipediaPageCategoryFileName(), extractorParameters.getWikipediaCategoryPageFileName(), 1, i);
                delete(extractorParameters.getWikipediaCategoryPageIndexName());
                logger.info("indexing category page (" + extractorParameters.getWikipediaCategoryPageIndexName() + ")...");
                CategoryPageIndexer categoryPageIndexer = new CategoryPageIndexer(extractorParameters.getWikipediaCategoryPageIndexName());
                categoryPageIndexer.index(extractorParameters.getWikipediaCategoryPageFileName(), z);
                categoryPageIndexer.close();
            }
            if (str2 != null) {
                logger.info("DBpedia mappings");
                new DBpediaClassExtractor(i, extractorParameters.getLocale()).start(extractorParameters.getWikipediaXmlFileName(), extractorParameters.getWikipediaDBPediaClassesIndexName(), str2, str3);
            }
            if (parse.hasOption("templates")) {
                String[] optionValues2 = parse.getOptionValues("templates");
                String str5 = extractorParameters.getWikipediaTemplateFileNames().get("map-rep");
                String str6 = extractorParameters.getWikipediaTemplateFileNames().get("complete");
                String str7 = extractorParameters.getWikipediaTemplateFileNames().get("good");
                String str8 = extractorParameters.getWikipediaTemplateFileNames().get("pruned");
                String str9 = extractorParameters.getWikipediaTemplateFileNames().get("pruned-s-page");
                String str10 = extractorParameters.getWikipediaTemplateFileNames().get("pruned-s-tpl");
                String str11 = extractorParameters.getWikipediaTemplateFileNames().get("index-p2t");
                String str12 = extractorParameters.getWikipediaTemplateFileNames().get("index-t2p");
                String str13 = extractorParameters.getWikipediaTemplateFileNames().get("index-id");
                String str14 = extractorParameters.getWikipediaTemplateFileNames().get("infoboxes");
                double d = 1.01d;
                int i5 = 50;
                int i6 = 2;
                if (optionValues2 != null) {
                    if (optionValues2.length > 0) {
                        d = Double.parseDouble(optionValues2[0]);
                    }
                    if (optionValues2.length > 1) {
                        i5 = Integer.parseInt(optionValues2[1]);
                    }
                    if (optionValues2.length > 2) {
                        i6 = Integer.parseInt(optionValues2[2]);
                    }
                }
                logger.info("Working on templates [min=" + i5 + " range=" + d + " threshold=" + i6 + "]...");
                logger.debug("min " + i5);
                logger.debug("range " + d);
                logger.debug("threshold " + i6);
                logger.info("Pruning templates...");
                new PruneTemplates(str5, str6, str7, d, i5);
                logger.info("Collecting templates...");
                new CollectGoodTemplates(str5, str7, str8, i6, i, str14);
                logger.info("Sorting template files...");
                org.fbk.cit.hlt.thewikimachine.csv.FileUtils.sort(str8, str9, 0, i4, z);
                org.fbk.cit.hlt.thewikimachine.csv.FileUtils.sort(str8, str10, 1, i4, z);
                logger.info("Indexing templates...");
                String checkWriteableFolder = GenericFileUtils.checkWriteableFolder(str11, true);
                String checkWriteableFolder2 = GenericFileUtils.checkWriteableFolder(str12, true);
                String checkWriteableFolder3 = GenericFileUtils.checkWriteableFolder(str13, true);
                if (checkWriteableFolder2 != null && checkWriteableFolder != null && checkWriteableFolder3 != null) {
                    new GenericValuesIndexer(str9, str10, checkWriteableFolder, checkWriteableFolder2, checkWriteableFolder3);
                }
            }
            if (parse.hasOption("categories")) {
                logger.info("Working on categories...");
                String[] optionValues3 = parse.getOptionValues("categories");
                ArrayList arrayList = optionValues3 != null ? new ArrayList(Arrays.asList(optionValues3)) : new ArrayList();
                String wikipediaPageCategoryFileName = extractorParameters.getWikipediaPageCategoryFileName();
                if (arrayList.size() == 0 || arrayList.contains("labels")) {
                    String str15 = extractorParameters.getWikipediaCategoryFileNames().get("s-cat");
                    String str16 = extractorParameters.getWikipediaCategoryFileNames().get("index-p2c");
                    String str17 = extractorParameters.getWikipediaCategoryFileNames().get("index-c2p");
                    String str18 = extractorParameters.getWikipediaCategoryFileNames().get("index-id");
                    logger.info("Sorting category files...");
                    org.fbk.cit.hlt.thewikimachine.csv.FileUtils.sort(wikipediaPageCategoryFileName, str15, 1, i4, z);
                    logger.info("Indexing categories...");
                    String checkWriteableFolder4 = GenericFileUtils.checkWriteableFolder(str16, true);
                    String checkWriteableFolder5 = GenericFileUtils.checkWriteableFolder(str17, true);
                    String checkWriteableFolder6 = GenericFileUtils.checkWriteableFolder(str18, true);
                    if (checkWriteableFolder5 != null && checkWriteableFolder4 != null && checkWriteableFolder6 != null) {
                        new GenericValuesIndexer(wikipediaPageCategoryFileName, str15, checkWriteableFolder4, checkWriteableFolder5, checkWriteableFolder6);
                    }
                }
                if (arrayList.size() == 0 || arrayList.contains("tokens")) {
                    String str19 = extractorParameters.getWikipediaCategoryFileNames().get("tokens");
                    String str20 = extractorParameters.getWikipediaCategoryFileNames().get("tokens-s-tok");
                    String str21 = extractorParameters.getWikipediaCategoryFileNames().get("tokens-index-p2k");
                    String str22 = extractorParameters.getWikipediaCategoryFileNames().get("tokens-index-k2p");
                    String str23 = extractorParameters.getWikipediaCategoryFileNames().get("tokens-index-id");
                    logger.info("Tokenizing categories...");
                    new PageCategoryTokenizer(wikipediaPageCategoryFileName, str19, str4);
                    logger.info("Sorting category-token files...");
                    org.fbk.cit.hlt.thewikimachine.csv.FileUtils.sort(str19, str20, 1, i4, z);
                    logger.info("Indexing category-tokens...");
                    String checkWriteableFolder7 = GenericFileUtils.checkWriteableFolder(str21, true);
                    String checkWriteableFolder8 = GenericFileUtils.checkWriteableFolder(str22, true);
                    String checkWriteableFolder9 = GenericFileUtils.checkWriteableFolder(str23, true);
                    if (checkWriteableFolder7 != null && checkWriteableFolder8 != null && checkWriteableFolder9 != null) {
                        new GenericValuesIndexer(str19, str20, checkWriteableFolder7, checkWriteableFolder8, checkWriteableFolder9);
                    }
                }
            }
            if (parse.hasOption("sections")) {
                logger.info("Working on sections...");
                String[] optionValues4 = parse.getOptionValues("categories");
                ArrayList arrayList2 = optionValues4 != null ? new ArrayList(Arrays.asList(optionValues4)) : new ArrayList();
                String wikipediaSectionTitleFileName = extractorParameters.getWikipediaSectionTitleFileName();
                if (arrayList2.size() == 0 || arrayList2.contains("labels")) {
                    String str24 = extractorParameters.getWikipediaSectionTitleFileNames().get("s-sec");
                    String str25 = extractorParameters.getWikipediaSectionTitleFileNames().get("index-p2s");
                    String str26 = extractorParameters.getWikipediaSectionTitleFileNames().get("index-s2p");
                    String str27 = extractorParameters.getWikipediaSectionTitleFileNames().get("index-id");
                    logger.info("Sorting section files...");
                    org.fbk.cit.hlt.thewikimachine.csv.FileUtils.sort(wikipediaSectionTitleFileName, str24, 1, i4, z);
                    logger.info("Indexing sections...");
                    String checkWriteableFolder10 = GenericFileUtils.checkWriteableFolder(str25, true);
                    String checkWriteableFolder11 = GenericFileUtils.checkWriteableFolder(str26, true);
                    String checkWriteableFolder12 = GenericFileUtils.checkWriteableFolder(str27, true);
                    if (checkWriteableFolder11 != null && checkWriteableFolder10 != null && checkWriteableFolder12 != null) {
                        new GenericValuesIndexer(wikipediaSectionTitleFileName, str24, checkWriteableFolder10, checkWriteableFolder11, checkWriteableFolder12);
                    }
                }
                if (arrayList2.size() == 0 || arrayList2.contains("tokens")) {
                    String str28 = extractorParameters.getWikipediaSectionTitleFileNames().get("tokens");
                    String str29 = extractorParameters.getWikipediaSectionTitleFileNames().get("tokens-s-tok");
                    String str30 = extractorParameters.getWikipediaSectionTitleFileNames().get("tokens-index-p2k");
                    String str31 = extractorParameters.getWikipediaSectionTitleFileNames().get("tokens-index-k2p");
                    String str32 = extractorParameters.getWikipediaSectionTitleFileNames().get("tokens-index-id");
                    logger.info("Tokenizing sections...");
                    new SectionTitleTokenizer(wikipediaSectionTitleFileName, str28, str4);
                    logger.info("Sorting category-token files...");
                    org.fbk.cit.hlt.thewikimachine.csv.FileUtils.sort(str28, str29, 1, i4, z);
                    logger.info("Indexing category-tokens...");
                    String checkWriteableFolder13 = GenericFileUtils.checkWriteableFolder(str30, true);
                    String checkWriteableFolder14 = GenericFileUtils.checkWriteableFolder(str31, true);
                    String checkWriteableFolder15 = GenericFileUtils.checkWriteableFolder(str32, true);
                    if (checkWriteableFolder13 != null && checkWriteableFolder14 != null && checkWriteableFolder15 != null) {
                        new GenericValuesIndexer(str28, str29, checkWriteableFolder13, checkWriteableFolder14, checkWriteableFolder15);
                    }
                }
            }
            if (parse.hasOption("file")) {
                logger.debug("extracting files from page (" + extractorParameters.getWikipediaFileSourceName() + ")...");
                WikipediaFileSourceExtractor wikipediaFileSourceExtractor = new WikipediaFileSourceExtractor(i, i2, extractorParameters.getLocale());
                wikipediaFileSourceExtractor.setNotificationPoint(i3);
                wikipediaFileSourceExtractor.start(extractorParameters);
                delete(extractorParameters.getWikipediaFileSourceIndexName());
                logger.info("indexing files from " + extractorParameters.getWikipediaFileName() + ParsedPageLink.START_SUFFIX_PATTERN + extractorParameters.getWikipediaFileSourceIndexName() + ")...");
                PageFileIndexer pageFileIndexer = new PageFileIndexer(extractorParameters.getWikipediaFileSourceIndexName());
                pageFileIndexer.index(extractorParameters.getWikipediaFileSourceName(), z);
                pageFileIndexer.close();
            }
            if (parse.hasOption("airpedia-class-dir")) {
                logger.info("extracting airpedia...");
                delete(extractorParameters.getWikipediaPageAirPediaClassIndexName());
                String optionValue2 = parse.getOptionValue("airpedia-class-dir");
                if (!optionValue2.endsWith(File.separator)) {
                    optionValue2 = optionValue2 + File.separator;
                }
                String str33 = optionValue2 + extractorParameters.getLang() + ".csv";
                logger.info("indexing airpedia from " + str33 + ParsedPageLink.START_SUFFIX_PATTERN + extractorParameters.getWikipediaPageAirPediaClassIndexName() + ")...");
                try {
                    PageAirpediaClassIndexer pageAirpediaClassIndexer = new PageAirpediaClassIndexer(extractorParameters.getWikipediaPageAirPediaClassIndexName());
                    pageAirpediaClassIndexer.index(str33, z);
                    pageAirpediaClassIndexer.close();
                } catch (Exception e2) {
                    logger.error(e2.getMessage());
                }
            }
            if (parse.hasOption("person-info")) {
                logger.debug("extracting first name " + extractorParameters.getWikipediaFirstNameFileName() + "...");
                FirstNameExtractor firstNameExtractor = new FirstNameExtractor(i);
                firstNameExtractor.setNotificationPoint(i3);
                firstNameExtractor.start(extractorParameters);
                delete(extractorParameters.getWikipediaFirstNameIndexName());
                logger.info("indexing first name from " + extractorParameters.getWikipediaFirstNameFileName() + "(" + extractorParameters.getWikipediaFirstNameIndexName() + ")...");
                FirstNameIndexer firstNameIndexer = new FirstNameIndexer(extractorParameters.getWikipediaFirstNameIndexName());
                firstNameIndexer.index(extractorParameters.getWikipediaFirstNameFileName());
                firstNameIndexer.close();
                delete(extractorParameters.getWikipediaPersonInfoIndexName());
                logger.info("indexing person information from " + extractorParameters.getWikipediaPersonInfoFileName() + "...");
                PersonInfoIndexer personInfoIndexer = new PersonInfoIndexer(extractorParameters.getWikipediaPersonInfoIndexName());
                personInfoIndexer.index(extractorParameters.getWikipediaPersonInfoFileName());
                personInfoIndexer.close();
            }
            if (parse.hasOption(PageAbstractIndexer.ABSTRACT_FIELD_NAME)) {
                logger.debug("extracting abstracts (" + extractorParameters.getWikipediaAbstractFileName() + ")...");
                WikipediaAbstractExtractor wikipediaAbstractExtractor = new WikipediaAbstractExtractor(i, i2, extractorParameters.getLocale());
                wikipediaAbstractExtractor.setNotificationPoint(i3);
                wikipediaAbstractExtractor.start(extractorParameters);
                delete(extractorParameters.getWikipediaAbstractIndexName());
                logger.info("indexing abstract from " + extractorParameters.getWikipediaAbstractFileName() + ParsedPageLink.START_SUFFIX_PATTERN + extractorParameters.getWikipediaAbstractIndexName() + ")...");
                PageAbstractIndexer pageAbstractIndexer = new PageAbstractIndexer(extractorParameters.getWikipediaAbstractIndexName());
                pageAbstractIndexer.index(extractorParameters.getWikipediaAbstractFileName());
                pageAbstractIndexer.close();
            }
            if (parse.hasOption("outgoing")) {
            }
            if (parse.hasOption("vectors")) {
                if (parse.hasOption("lsm-dir")) {
                    writeLog(extractorParameters, parse, strArr, extractorParameters.getVectorLogFileName());
                    int i7 = 100;
                    if (parse.hasOption("lsa-dim")) {
                        i7 = Integer.parseInt(parse.getOptionValue("lsa-dim"));
                    }
                    logger.info("extracting vectors from " + extractorParameters.getWikipediaTextFileName() + ParsedPageLink.START_SUFFIX_PATTERN + extractorParameters.getWikipediaVectorFileName() + ")...");
                    String str34 = parse.getOptionValue("lsm-dir") + File.separator + extractorParameters.getLang() + File.separator + "current";
                    logger.debug("lsaDir " + str34);
                    VectorExtractor vectorExtractor = new VectorExtractor(i, i2, str34, i7, z2);
                    vectorExtractor.setNotificationPoint(i3);
                    vectorExtractor.start(extractorParameters);
                    delete(extractorParameters.getWikipediaVectorIndexName());
                    logger.info("indexing vectors from " + extractorParameters.getWikipediaVectorFileName() + ParsedPageLink.START_SUFFIX_PATTERN + extractorParameters.getWikipediaVectorIndexName() + ")...");
                    PageVectorIndexer pageVectorIndexer = new PageVectorIndexer(extractorParameters.getWikipediaVectorIndexName());
                    pageVectorIndexer.index(extractorParameters.getWikipediaVectorFileName());
                    pageVectorIndexer.close();
                } else {
                    logger.error("This extractor requires lsm-dir");
                }
            }
            if (parse.hasOption("example")) {
                writeLog(extractorParameters, parse, strArr, extractorParameters.getExtractionLogFileName());
                int i8 = 1000;
                if (parse.hasOption("max-freq")) {
                    i8 = Integer.parseInt(parse.getOptionValue("max-freq"));
                }
                logger.info("filtering examples with frequency higher than " + i8 + "...");
                logger.info("extracting examples (" + extractorParameters.getWikipediaExampleFileName() + ")...");
                WikipediaExampleExtractor wikipediaExampleExtractor = new WikipediaExampleExtractor(i, i2, extractorParameters.getLocale());
                wikipediaExampleExtractor.setCompress(z);
                wikipediaExampleExtractor.setNotificationPoint(i3);
                wikipediaExampleExtractor.setMaximumNumberOfExamplesPerPage(i8);
                wikipediaExampleExtractor.start(extractorParameters);
                int i9 = 0;
                if (parse.hasOption("min-freq")) {
                    i9 = Integer.parseInt(parse.getOptionValue("min-freq"));
                }
                if (i9 > 1) {
                    logger.info("filtering examples with frequency lower than " + i9 + "...");
                    FreqSet freqSet = new FreqSet();
                    freqSet.read(new BufferedReader(new FileReader(extractorParameters.getWikipediaFormFreqFileName())));
                    org.fbk.cit.hlt.thewikimachine.csv.FileUtils.filter(extractorParameters.getWikipediaExampleFileName(), extractorParameters.getWikipediaFilteredExampleFileName(), freqSet, 0, i9, z);
                    logger.info("sorting " + extractorParameters.getWikipediaExampleFileName() + ParsedPageLink.START_SUFFIX_PATTERN + i4 + ")...");
                    UnixSortWrapper.sort(extractorParameters.getWikipediaFilteredExampleFileName(), extractorParameters.getWikipediaSortedFormFileName(), 1, i);
                    UnixSortWrapper.sort(extractorParameters.getWikipediaFilteredExampleFileName(), extractorParameters.getWikipediaSortedPageFileName(), "1,2,3,4", 2, i);
                } else {
                    logger.info("sorting " + extractorParameters.getWikipediaExampleFileName() + "...");
                    UnixSortWrapper.sort(extractorParameters.getWikipediaExampleFileName(), extractorParameters.getWikipediaSortedFormFileName(), 1, i);
                    UnixSortWrapper.sort(extractorParameters.getWikipediaExampleFileName(), extractorParameters.getWikipediaSortedPageFileName(), "1,2,3,4", 2, i);
                }
                logger.info("indexing page/form pairs (" + extractorParameters.getWikipediaSortedPageFileName() + ")...");
                PageFormIndexer pageFormIndexer = new PageFormIndexer(extractorParameters.getWikipediaPageFormIndexName());
                pageFormIndexer.index(extractorParameters.getWikipediaSortedPageFileName(), z);
                pageFormIndexer.close();
                logger.info("indexing form/page pairs (" + extractorParameters.getWikipediaSortedFormFileName() + ")...");
                FormPageIndexer formPageIndexer = new FormPageIndexer(extractorParameters.getWikipediaFormPageIndexName());
                formPageIndexer.index(extractorParameters.getWikipediaSortedFormFileName(), z);
                formPageIndexer.close();
                logger.info("indexing types (" + extractorParameters.getWikipediaTypeIndexName() + ")...");
                TypeIndexer typeIndexer = new TypeIndexer(extractorParameters.getWikipediaTypeIndexName());
                typeIndexer.index(extractorParameters.getWikipediaSortedPageFileName());
                typeIndexer.close();
                logger.info("indexing page-freq (" + extractorParameters.getWikipediaPageFreqIndexName() + ")...");
                PageFreqIndexer pageFreqIndexer = new PageFreqIndexer(extractorParameters.getWikipediaPageFreqIndexName());
                pageFreqIndexer.index(extractorParameters.getWikipediaPageFreqFileName());
                pageFreqIndexer.close();
                int i10 = 10;
                if (parse.hasOption("n-gram")) {
                    i10 = Integer.parseInt(parse.getOptionValue("n-gram"));
                }
                logger.info("extracting " + i10 + "-grams from " + extractorParameters.getWikipediaTextFileName() + ParsedPageLink.START_SUFFIX_PATTERN + extractorParameters.getWikipediaNGramFileName() + ")...");
                new PageNGramExtractor(i, i2, i10).start(extractorParameters);
                logger.info("indexing n-grams (" + extractorParameters.getWikipediaNGramFileName() + ")...");
                NGramIndexer nGramIndexer = new NGramIndexer(extractorParameters.getWikipediaNGramIndexName());
                nGramIndexer.index(extractorParameters.getWikipediaNGramFileName());
                nGramIndexer.close();
            }
            if (parse.hasOption("incoming-outgoing")) {
                logger.info("extracting incoming/outgoing links...");
                writeLog(extractorParameters, parse, strArr, extractorParameters.getIncomingOutgoingLogFileName());
                WikipediaIncomingOutgoingLinkExtractor wikipediaIncomingOutgoingLinkExtractor = new WikipediaIncomingOutgoingLinkExtractor(i, i2, extractorParameters.getLocale());
                wikipediaIncomingOutgoingLinkExtractor.setNotificationPoint(i3);
                wikipediaIncomingOutgoingLinkExtractor.start(extractorParameters);
                logger.info("sorting incoming/outgoing links...");
                UnixSortWrapper.sort(extractorParameters.getWikipediaIncomingOutgoingFileName(), extractorParameters.getWikipediaSortedIncomingOutgoingFileName(), 1, i);
                delete(extractorParameters.getWikipediaIncomingOutgoingWeightedIndexName());
                logger.info("indexing incoming/outgoing weighted links...");
                PageIncomingOutgoingWeightedIndexer pageIncomingOutgoingWeightedIndexer = new PageIncomingOutgoingWeightedIndexer(extractorParameters.getWikipediaIncomingOutgoingWeightedIndexName(), extractorParameters.getWikipediaPageFreqFileName());
                pageIncomingOutgoingWeightedIndexer.index(extractorParameters.getWikipediaSortedIncomingOutgoingFileName(), z);
                pageIncomingOutgoingWeightedIndexer.close();
            }
            if (parse.hasOption("category-similarity")) {
                logger.info("extracting weighted categories...");
                PagePerCategoryCounter pagePerCategoryCounter = new PagePerCategoryCounter(i);
                if (parse.hasOption("max-depth")) {
                    pagePerCategoryCounter.setMaxDepth(Integer.parseInt(parse.getOptionValue("max-depth")));
                }
                pagePerCategoryCounter.setNotificationPoint(i3);
                pagePerCategoryCounter.start(extractorParameters);
                logger.info("sorting weighted categories...");
                UnixSortWrapper.sort(extractorParameters.getWikipediaPagePerCategoryCountFileName(), extractorParameters.getWikipediaSortedPagePerCategoryCountFileName(), 2, "-nr", i);
                logger.info("extracting page/weighted categories...");
                writeLog(extractorParameters, parse, strArr, extractorParameters.getWikipediaPageAllCategoryFileName());
                PageAllCategoryExtractor pageAllCategoryExtractor = new PageAllCategoryExtractor(i);
                if (parse.hasOption("max-depth")) {
                    pageAllCategoryExtractor.setMaxDepth(Integer.parseInt(parse.getOptionValue("max-depth")));
                }
                pageAllCategoryExtractor.setNotificationPoint(i3);
                pageAllCategoryExtractor.start(extractorParameters);
                delete(extractorParameters.getWikipediaPageAllCategoryIndexName());
                logger.info("indexing page/weighted categories...");
                PageAllCategoryIndexer pageAllCategoryIndexer = new PageAllCategoryIndexer(extractorParameters.getWikipediaPageAllCategoryIndexName());
                pageAllCategoryIndexer.index(extractorParameters.getWikipediaPageAllCategoryFileName());
                pageAllCategoryIndexer.close();
            }
            if (parse.hasOption("one-example-per-sense")) {
                if (parse.hasOption("lsm-dir")) {
                    String optionValue3 = parse.getOptionValue("lsm-dir");
                    if (!optionValue3.endsWith(File.separator)) {
                        optionValue3 = optionValue3 + File.separator;
                    }
                    String str35 = optionValue3 + extractorParameters.getLang() + File.separator + "current" + File.separator;
                    logger.debug("lsaDir " + str35);
                    File file2 = new File(str35 + "X-Ut");
                    File file3 = new File(str35 + "X-S");
                    File file4 = new File(str35 + "X-row");
                    File file5 = new File(str35 + "X-col");
                    File file6 = new File(str35 + "X-df");
                    int i11 = 100;
                    if (parse.hasOption("lsa-dim")) {
                        i11 = Integer.parseInt(parse.getOptionValue("dim"));
                    }
                    LSM lsm = new LSM(file2, file3, file4, file5, file6, i11, true, z2);
                    int i12 = Integer.MAX_VALUE;
                    if (parse.hasOption("num-forms")) {
                        i12 = Integer.parseInt(parse.getOptionValue("num-forms"));
                    }
                    logger.info("extracting one example per sense (" + extractorParameters.getOneExamplePerSenseFileName() + ")...");
                    OneExamplePerSenseExtractor oneExamplePerSenseExtractor = new OneExamplePerSenseExtractor(lsm, extractorParameters.getOneExamplePerSenseFileName(), i);
                    oneExamplePerSenseExtractor.setNormalized(z2);
                    oneExamplePerSenseExtractor.setNotificationPoint(i3);
                    oneExamplePerSenseExtractor.setNumForms(i12);
                    oneExamplePerSenseExtractor.extract(extractorParameters.getWikipediaSortedFormFileName());
                    if (delete(extractorParameters.getOneExamplePerSenseIndexName())) {
                        logger.warn("overwriting an existing index (" + extractorParameters.getOneExamplePerSenseIndexName() + ")...");
                    } else {
                        logger.info("indexing (" + extractorParameters.getOneExamplePerSenseIndexName() + ")...");
                    }
                    OneExamplePerSenseIndexer oneExamplePerSenseIndexer = new OneExamplePerSenseIndexer(extractorParameters.getOneExamplePerSenseIndexName());
                    oneExamplePerSenseIndexer.index(extractorParameters.getOneExamplePerSenseFileName(), z);
                    oneExamplePerSenseIndexer.close();
                } else {
                    logger.error("This extractor requires lsm-dir");
                }
            }
            if (parse.hasOption("cross-language-dir")) {
                String optionValue4 = parse.getOptionValue("cross-language-dir");
                logger.debug(String.format("Linking cross-language to %s", optionValue4));
                Path path = Paths.get(extractorParameters.getWikipediaCrossLanguageLinkFileName(), new String[0]);
                Path path2 = Paths.get(extractorParameters.getWikipediaCrossLanguageLinkIndexName(), new String[0]);
                Path path3 = FileSystems.getDefault().getPath(optionValue4, extractorParameters.getLang());
                createSymbolicLink(path, FileSystems.getDefault().getPath(optionValue4, extractorParameters.getLang() + ".csv"));
                createSymbolicLink(path2, path3);
            }
            if (parse.hasOption("topic-dir")) {
                String optionValue5 = parse.getOptionValue("topic-dir");
                logger.debug(String.format("Linking topics to %s", optionValue5));
                Path path4 = Paths.get(extractorParameters.getWikipediaPageTopicsFileName(), new String[0]);
                Path path5 = Paths.get(extractorParameters.getWikipediaPageTopicsIndexName(), new String[0]);
                Path path6 = FileSystems.getDefault().getPath(optionValue5, extractorParameters.getLang() + ".csv");
                Path path7 = FileSystems.getDefault().getPath(optionValue5, extractorParameters.getLang());
                createSymbolicLink(path4, path6);
                createSymbolicLink(path5, path7);
            }
            if (parse.hasOption("namnom-dir")) {
                String optionValue6 = parse.getOptionValue("namnom-dir");
                logger.debug(String.format("Linking NAM-NOM to %s", optionValue6));
                createSymbolicLink(Paths.get(extractorParameters.getWikipediaNamNomIndexName(), new String[0]), FileSystems.getDefault().getPath(optionValue6, extractorParameters.getLang()));
            }
            if (parse.hasOption("airpedia2-dir")) {
                String optionValue7 = parse.getOptionValue("airpedia2-dir");
                logger.debug(String.format("Linking Airpedia2 to %s", optionValue7));
                createSymbolicLink(Paths.get(extractorParameters.getWikipediaAirpedia2IndexName(), new String[0]), FileSystems.getDefault().getPath(optionValue7, extractorParameters.getLang()));
            }
            if (parse.hasOption("remote-server")) {
                String extractionOutputDirName = extractorParameters.getExtractionOutputDirName();
                String optionValue8 = parse.getOptionValue("remote-server");
                logger.info("synchronizing " + extractionOutputDirName + " with " + optionValue8 + ":" + extractionOutputDirName + "...");
                Map<String, String> searchForFilesInTheSameFolder = GenericFileUtils.searchForFilesInTheSameFolder(extractionOutputDirName, "type-index", "page-form-index", "form-page-index", "ngram-index", "page-freq.csv", "form-freq", "cross-lang-index", "ngram.csv", "unigram", "one-example-per-sense-index", "page-file-source-index", "first-name-index", "person-info-index", "airpedia-class-index", "abstract-index", "page-category-index", "category-super-category-index", "incoming-outgoing-weighted-index", "cross-lang.csv", "page-all-category-index");
                Iterator<String> it = searchForFilesInTheSameFolder.keySet().iterator();
                int i13 = 0;
                while (it.hasNext()) {
                    String str36 = searchForFilesInTheSameFolder.get(it.next());
                    logger.debug(i13 + StringTable.HORIZONTAL_TABULATION + optionValue8 + ":" + str36);
                    int rsync = UnixRsyncWrapper.rsync(str36, optionValue8 + ":" + extractionOutputDirName);
                    if (rsync != 0) {
                        logger.error(str36 + " cannot be synchronized (error " + rsync + ")");
                    }
                    i13++;
                }
            }
            logger.info("extraction ended " + new Date());
        } catch (ParseException e3) {
            if (e3.getMessage().length() > 0) {
                System.out.println("Parsing failed: " + e3.getMessage() + "\n");
            }
            new HelpFormatter().printHelp(400, "java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.ModelExtractor", "\n", options, "\n", true);
        }
    }
}
