package org.languagetool.dev.dumpcheck;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.MultiThreadedJLanguageTool;
import org.languagetool.rules.CategoryId;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;

/* loaded from: input_file:org/languagetool/dev/dumpcheck/SentenceSourceChecker.class */
public class SentenceSourceChecker {
    private SentenceSourceChecker() {
    }

    public static void main(String[] strArr) throws IOException {
        SentenceSourceChecker sentenceSourceChecker = new SentenceSourceChecker();
        CommandLine ensureCorrectUsageOrExit = ensureCorrectUsageOrExit(strArr);
        File file = null;
        if (ensureCorrectUsageOrExit.hasOption('d')) {
            file = new File(ensureCorrectUsageOrExit.getOptionValue('d'));
            if (!file.exists() || file.isDirectory()) {
                throw new IOException("File not found or isn't a file: " + file.getAbsolutePath());
            }
        }
        String optionValue = ensureCorrectUsageOrExit.getOptionValue('l');
        HashSet hashSet = new HashSet();
        if (ensureCorrectUsageOrExit.hasOption("rule-properties")) {
            File file2 = new File(ensureCorrectUsageOrExit.getOptionValue("rule-properties"));
            if (!file2.exists() || file2.isDirectory()) {
                throw new IOException("File not found or isn't a file: " + file2.getAbsolutePath());
            }
            Properties properties = new Properties();
            FileInputStream fileInputStream = new FileInputStream(file2);
            Throwable th = null;
            try {
                try {
                    properties.load(fileInputStream);
                    addDisabledRules("all", hashSet, properties);
                    addDisabledRules(optionValue, hashSet, properties);
                    if (fileInputStream != null) {
                        if (0 != 0) {
                            try {
                                fileInputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            fileInputStream.close();
                        }
                    }
                } finally {
                }
            } catch (Throwable th3) {
                if (fileInputStream != null) {
                    if (th != null) {
                        try {
                            fileInputStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        fileInputStream.close();
                    }
                }
                throw th3;
            }
        }
        int parseInt = Integer.parseInt(ensureCorrectUsageOrExit.getOptionValue("max-sentences", "0"));
        int parseInt2 = Integer.parseInt(ensureCorrectUsageOrExit.getOptionValue("max-errors", "0"));
        sentenceSourceChecker.run(file, hashSet, optionValue, Arrays.asList(ensureCorrectUsageOrExit.getOptionValues('f')), ensureCorrectUsageOrExit.hasOption('r') ? ensureCorrectUsageOrExit.getOptionValue('r').split(",") : null, ensureCorrectUsageOrExit.hasOption("also-enable-categories") ? ensureCorrectUsageOrExit.getOptionValue("also-enable-categories").split(",") : null, parseInt, parseInt2, ensureCorrectUsageOrExit.hasOption("languagemodel") ? new File(ensureCorrectUsageOrExit.getOptionValue("languagemodel")) : null, ensureCorrectUsageOrExit.hasOption("word2vecmodel") ? new File(ensureCorrectUsageOrExit.getOptionValue("word2vecmodel")) : null, ensureCorrectUsageOrExit.hasOption("neuralnetworkmodel") ? new File(ensureCorrectUsageOrExit.getOptionValue("neuralnetworkmodel")) : null, ensureCorrectUsageOrExit.hasOption("filter") ? Pattern.compile(ensureCorrectUsageOrExit.getOptionValue("filter")) : null);
    }

    private static void addDisabledRules(String str, Set<String> set, Properties properties) {
        String property = properties.getProperty(str);
        if (property != null) {
            set.addAll(Arrays.asList(property.split(",")));
        }
    }

    private static CommandLine ensureCorrectUsageOrExit(String[] strArr) {
        Options options = new Options();
        OptionBuilder.withLongOpt("language");
        OptionBuilder.withArgName("code");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("language code like 'en' or 'de'");
        OptionBuilder.isRequired();
        options.addOption(OptionBuilder.create("l"));
        OptionBuilder.withLongOpt("db-properties");
        OptionBuilder.withArgName("file");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("A file to set database access properties. If not set, the output will be written to STDOUT. The file needs to set the properties dbUrl ('jdbc:...'), dbUser, and dbPassword. It can optionally define the batchSize for insert statements, which defaults to 1.");
        options.addOption(OptionBuilder.create("d"));
        OptionBuilder.withLongOpt("rule-properties");
        OptionBuilder.withArgName("file");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("A file to set rules which should be disabled per language (e.g. en=RULE1,RULE2 or all=RULE3,RULE4)");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt("rule-ids");
        OptionBuilder.withArgName("id");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("comma-separated list of rule-ids to activate");
        options.addOption(OptionBuilder.create("r"));
        OptionBuilder.withLongOpt("also-enable-categories");
        OptionBuilder.withArgName("categories");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("comma-separated list of categories to activate, additionally to rules activated anyway");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt("file");
        OptionBuilder.withArgName("file");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("an unpacked Wikipedia XML dump; (must be named *.xml, dumps are available from http://dumps.wikimedia.org/backup-index.html) or a Tatoeba CSV file filtered to contain only one language (must be named tatoeba-*). You can specify this option more than once.");
        OptionBuilder.isRequired();
        options.addOption(OptionBuilder.create("f"));
        OptionBuilder.withLongOpt("max-sentences");
        OptionBuilder.withArgName("number");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("maximum number of sentences to check");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt("max-errors");
        OptionBuilder.withArgName("number");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("maximum number of errors, stop when finding more");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt("languagemodel");
        OptionBuilder.withArgName("indexDir");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("directory with a '3grams' sub directory that contains an ngram index");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt("neuralnetworkmodel");
        OptionBuilder.withArgName("baseDir");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("base directory for saved neural network models");
        options.addOption(OptionBuilder.create());
        OptionBuilder.withLongOpt("filter");
        OptionBuilder.withArgName("regex");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("Consider only sentences that contain this regular expression (for speed up)");
        options.addOption(OptionBuilder.create());
        try {
            return new GnuParser().parse(options, strArr);
        } catch (ParseException e) {
            System.err.println("Error: " + e.getMessage());
            HelpFormatter helpFormatter = new HelpFormatter();
            helpFormatter.setWidth(80);
            helpFormatter.setSyntaxPrefix("Usage: ");
            helpFormatter.printHelp(SentenceSourceChecker.class.getSimpleName() + " [OPTION]... --file <file> --language <code>", options);
            System.exit(1);
            throw new IllegalStateException();
        }
    }

    private void run(File file, Set<String> set, String str, List<String> list, String[] strArr, String[] strArr2, int i, int i2, File file2, File file3, File file4, Pattern pattern) throws IOException {
        Language languageForShortCode = Languages.getLanguageForShortCode(str);
        MultiThreadedJLanguageTool multiThreadedJLanguageTool = new MultiThreadedJLanguageTool(languageForShortCode);
        multiThreadedJLanguageTool.setCleanOverlappingMatches(false);
        if (file2 != null) {
            multiThreadedJLanguageTool.activateLanguageModelRules(file2);
        }
        if (file3 != null) {
            multiThreadedJLanguageTool.activateWord2VecModelRules(file3);
        }
        if (file4 != null) {
            multiThreadedJLanguageTool.activateNeuralNetworkRules(file4);
        }
        if (strArr != null) {
            enableOnlySpecifiedRules(strArr, multiThreadedJLanguageTool);
        } else {
            applyRuleDeactivation(multiThreadedJLanguageTool, set);
        }
        if (pattern != null) {
            System.out.println("*** NOTE: only sentences that match regular expression '" + pattern + "' will be checked");
        }
        activateAdditionalCategories(strArr2, multiThreadedJLanguageTool);
        disableSpellingRules(multiThreadedJLanguageTool);
        System.out.println("Working on: " + StringUtils.join(list, ", "));
        System.out.println("Sentence limit: " + (i > 0 ? Integer.valueOf(i) : "no limit"));
        System.out.println("Error limit: " + (i2 > 0 ? Integer.valueOf(i2) : "no limit"));
        ResultHandler resultHandler = null;
        int i3 = 0;
        int i4 = 0;
        try {
            try {
                ResultHandler databaseHandler = file != null ? new DatabaseHandler(file, i, i2) : new StdoutHandler(i, i2);
                MixingSentenceSource create = MixingSentenceSource.create(list, languageForShortCode, pattern);
                while (create.hasNext()) {
                    Sentence next = create.next();
                    try {
                        List<RuleMatch> check = multiThreadedJLanguageTool.check(next.getText());
                        databaseHandler.handleResult(next, check, languageForShortCode);
                        i4++;
                        if (i4 % 5000 == 0) {
                            System.err.printf("%s sentences checked...\n", NumberFormat.getNumberInstance(Locale.US).format(i4));
                        }
                        i3 += check.size();
                    } catch (DocumentLimitReachedException | ErrorLimitReachedException e) {
                        throw e;
                    } catch (Exception e2) {
                        throw new RuntimeException("Check failed on sentence: " + StringUtils.abbreviate(next.getText(), 250), e2);
                    }
                }
                multiThreadedJLanguageTool.shutdown();
                if (databaseHandler != null) {
                    System.out.printf(languageForShortCode + ": %d total matches\n", Integer.valueOf(i3));
                    System.out.printf(languageForShortCode + ": ø%.2f rule matches per sentence\n", Float.valueOf(i3 / i4));
                    try {
                        databaseHandler.close();
                    } catch (Exception e3) {
                        e3.printStackTrace();
                    }
                }
            } catch (DocumentLimitReachedException | ErrorLimitReachedException e4) {
                System.out.println(getClass().getSimpleName() + ": " + e4);
                multiThreadedJLanguageTool.shutdown();
                if (0 != 0) {
                    System.out.printf(languageForShortCode + ": %d total matches\n", 0);
                    System.out.printf(languageForShortCode + ": ø%.2f rule matches per sentence\n", Float.valueOf(0 / 0));
                    try {
                        resultHandler.close();
                    } catch (Exception e5) {
                        e5.printStackTrace();
                    }
                }
            }
        } catch (Throwable th) {
            multiThreadedJLanguageTool.shutdown();
            if (0 != 0) {
                System.out.printf(languageForShortCode + ": %d total matches\n", 0);
                System.out.printf(languageForShortCode + ": ø%.2f rule matches per sentence\n", Float.valueOf(0 / 0));
                try {
                    resultHandler.close();
                } catch (Exception e6) {
                    e6.printStackTrace();
                }
            }
            throw th;
        }
    }

    private void enableOnlySpecifiedRules(String[] strArr, JLanguageTool jLanguageTool) {
        Iterator it = jLanguageTool.getAllRules().iterator();
        while (it.hasNext()) {
            jLanguageTool.disableRule(((Rule) it.next()).getId());
        }
        for (String str : strArr) {
            jLanguageTool.enableRule(str);
        }
        warnOnNonExistingRuleIds(strArr, jLanguageTool);
        System.out.println("Only these rules are enabled: " + Arrays.toString(strArr));
    }

    private void warnOnNonExistingRuleIds(String[] strArr, JLanguageTool jLanguageTool) {
        for (String str : strArr) {
            boolean z = false;
            Iterator it = jLanguageTool.getAllRules().iterator();
            while (true) {
                if (it.hasNext()) {
                    if (((Rule) it.next()).getId().equals(str)) {
                        z = true;
                        break;
                    }
                } else {
                    break;
                }
            }
            if (!z) {
                System.out.println("WARNING: Could not find rule '" + str + "'");
            }
        }
    }

    private void applyRuleDeactivation(JLanguageTool jLanguageTool, Set<String> set) {
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            jLanguageTool.disableRule(it.next());
        }
        System.out.println("These rules are disabled: " + jLanguageTool.getDisabledRules());
    }

    private void activateAdditionalCategories(String[] strArr, JLanguageTool jLanguageTool) {
        if (strArr != null) {
            for (String str : strArr) {
                for (Rule rule : jLanguageTool.getAllRules()) {
                    CategoryId id = rule.getCategory().getId();
                    if (id != null && id.toString().equals(str)) {
                        System.out.println("Activating " + rule.getId() + " in category " + str);
                        jLanguageTool.enableRule(rule.getId());
                    }
                }
            }
        }
    }

    private void disableSpellingRules(JLanguageTool jLanguageTool) {
        for (Rule rule : jLanguageTool.getAllActiveRules()) {
            if (rule.isDictionaryBasedSpellingRule()) {
                jLanguageTool.disableRule(rule.getId());
            }
        }
        System.out.println("All spelling rules are disabled");
    }
}
