package org.fbk.cit.hlt.thewikimachine.experiments.mt;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.Date;
import java.util.regex.Pattern;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.log4j.Logger;
import org.apache.log4j.PropertyConfigurator;
import org.fbk.cit.hlt.thewikimachine.util.StringTable;

/* loaded from: input_file:org/fbk/cit/hlt/thewikimachine/experiments/mt/Cleaner.class */
public class Cleaner {
    static Logger logger = Logger.getLogger(Cleaner.class.getName());
    private static Pattern tabPattern = Pattern.compile(StringTable.HORIZONTAL_TABULATION);
    private static Pattern spacePattern = Pattern.compile(" ");
    static DecimalFormat df = new DecimalFormat("###,###,###,###");

    public Cleaner(String str) throws IOException {
        String read = read(str);
        String[] strArr = {"&([^;]+);"};
        String[] strArr2 = {"\u0001"};
        for (int i = 0; i < strArr.length; i++) {
            read = read.replaceAll(strArr[i], strArr2[i]);
        }
        String str2 = str.substring(0, str.lastIndexOf(46)) + ".clean.txt";
        logger.debug(read);
    }

    public String read(String str) throws IOException {
        logger.info("reading " + str + "...");
        LineNumberReader lineNumberReader = new LineNumberReader(new InputStreamReader(new FileInputStream(str), "UTF-8"));
        StringBuilder sb = new StringBuilder();
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                return sb.toString();
            }
            sb.append(readLine);
            sb.append('\n');
        }
    }

    public void write(String str, String str2) throws IOException {
        logger.info("writing " + str + "...");
        PrintWriter printWriter = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str), "UTF-8")));
        printWriter.print(str2);
        printWriter.close();
    }

    public static void main(String[] strArr) throws IOException {
        String property = System.getProperty("log-config");
        if (property == null) {
            property = "configuration/log-config.txt";
        }
        PropertyConfigurator.configure(property);
        Options options = new Options();
        try {
            try {
                OptionBuilder.withArgName("dir");
                OptionBuilder.hasArg();
                OptionBuilder.withDescription("directory from which to read the text files");
                OptionBuilder.isRequired();
                OptionBuilder.withLongOpt("dir");
                Option create = OptionBuilder.create("d");
                options.addOption("h", "help", false, "print this message");
                options.addOption("v", "version", false, "output version information and exit");
                options.addOption(create);
                CommandLine parse = new PosixParser().parse(options, strArr);
                logger.debug(parse);
                if (parse.hasOption("help") || parse.hasOption("version")) {
                    throw new ParseException("");
                }
                File file = new File(parse.getOptionValue("dir"));
                if (file.isFile()) {
                    new Cleaner(parse.getOptionValue("dir"));
                } else {
                    File[] listFiles = file.listFiles(new FilenameFilter() { // from class: org.fbk.cit.hlt.thewikimachine.experiments.mt.Cleaner.1
                        @Override // java.io.FilenameFilter
                        public boolean accept(File file2, String str) {
                            return str.endsWith(".txt");
                        }
                    });
                    for (int i = 0; i < listFiles.length; i++) {
                        try {
                            new Cleaner(listFiles[i].getAbsolutePath());
                        } catch (Exception e) {
                            logger.error("Error at " + listFiles[i]);
                            logger.error(e);
                        }
                    }
                }
                logger.info("extraction ended " + new Date());
            } catch (Throwable th) {
                logger.info("extraction ended " + new Date());
                throw th;
            }
        } catch (ParseException e2) {
            logger.error("Parsing failed: " + e2.getMessage() + "\n");
            new HelpFormatter().printHelp(200, "java -cp dist/thewikimachine.jar org.fbk.cit.hlt.thewikimachine.experiments.mt.Cleaner", "\n", options, "\n", true);
            logger.info("extraction ended " + new Date());
        }
    }
}
