package org.languagetool.dev.dumpcheck;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Properties;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.GlobalConfig;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.Languages;
import org.languagetool.MultiThreadedJLanguageTool;
import org.languagetool.RuleMatchListener;
import org.languagetool.UserConfig;
import org.languagetool.markup.AnnotatedTextBuilder;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.patterns.AbstractPatternRule;

/* loaded from: input_file:org/languagetool/dev/dumpcheck/SentenceSourceChecker.class */
public class SentenceSourceChecker {
    private SentenceSourceChecker() {
    }

    public static void main(String[] strArr) throws IOException {
        SentenceSourceChecker sentenceSourceChecker = new SentenceSourceChecker();
        CommandLine ensureCorrectUsageOrExit = ensureCorrectUsageOrExit(strArr);
        File file = null;
        if (ensureCorrectUsageOrExit.hasOption('d')) {
            file = new File(ensureCorrectUsageOrExit.getOptionValue('d'));
            if (!file.exists() || file.isDirectory()) {
                throw new IOException("File not found or isn't a file: " + file.getAbsolutePath());
            }
        }
        String optionValue = ensureCorrectUsageOrExit.getOptionValue('l');
        HashSet hashSet = new HashSet();
        if (ensureCorrectUsageOrExit.hasOption("rule-properties")) {
            File file2 = new File(ensureCorrectUsageOrExit.getOptionValue("rule-properties"));
            if (!file2.exists() || file2.isDirectory()) {
                throw new IOException("File not found or isn't a file: " + file2.getAbsolutePath());
            }
            Properties properties = new Properties();
            FileInputStream fileInputStream = new FileInputStream(file2);
            Throwable th = null;
            try {
                try {
                    properties.load(fileInputStream);
                    addDisabledRules("all", hashSet, properties);
                    addDisabledRules(optionValue, hashSet, properties);
                    if (fileInputStream != null) {
                        if (0 != 0) {
                            try {
                                fileInputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            fileInputStream.close();
                        }
                    }
                } finally {
                }
            } catch (Throwable th3) {
                if (fileInputStream != null) {
                    if (th != null) {
                        try {
                            fileInputStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        fileInputStream.close();
                    }
                }
                throw th3;
            }
        }
        sentenceSourceChecker.run(file, hashSet, optionValue, ensureCorrectUsageOrExit.getOptionValue('m'), Integer.parseInt(ensureCorrectUsageOrExit.getOptionValue("max-sentences", "0")), Integer.parseInt(ensureCorrectUsageOrExit.getOptionValue("max-errors", "0")), Integer.parseInt(ensureCorrectUsageOrExit.getOptionValue("context-size", "50")), ensureCorrectUsageOrExit);
    }

    private static void addDisabledRules(String str, Set<String> set, Properties properties) {
        String property = properties.getProperty(str);
        if (property != null) {
            set.addAll(Arrays.asList(property.split(",")));
        }
    }

    private static CommandLine ensureCorrectUsageOrExit(String[] strArr) {
        Options options = new Options();
        options.addOption(Option.builder("l").longOpt("language").argName("code").hasArg().desc("language code like 'en' or 'de'").required().build());
        options.addOption(Option.builder("m").longOpt("mother-tongue").argName("code").hasArg().desc("language code like 'en' or 'de'").build());
        options.addOption(Option.builder("d").longOpt("db-properties").argName("file").hasArg().desc("A file to set database access properties. If not set, the output will be written to STDOUT. The file needs to set the properties dbUrl ('jdbc:...'), dbUser, and dbPassword. It can optionally define the batchSize for insert statements, which defaults to 1.").build());
        options.addOption(Option.builder().longOpt("rule-properties").argName("file").hasArg().desc("A file to set rules which should be disabled per language (e.g. en=RULE1,RULE2 or all=RULE3,RULE4)").build());
        options.addOption(Option.builder("r").longOpt("rule-ids").argName("id").hasArg().desc("comma-separated list of rule-ids to activate").build());
        options.addOption(Option.builder().longOpt("also-enable-categories").argName("categories").hasArg().desc("comma-separated list of categories to activate, additionally to rules activated anyway").build());
        options.addOption(Option.builder("f").longOpt("file").argName("file").hasArg().desc("an unpacked Wikipedia XML dump; (must be named *.xml, dumps are available from http://dumps.wikimedia.org/backup-index.html) or a Tatoeba CSV file filtered to contain only one language (must be named tatoeba-*). You can specify this option more than once.").required().build());
        options.addOption(Option.builder().longOpt("csv").desc("print matches in a simple CSV format, marking matches like '__matched words__'").build());
        options.addOption(Option.builder().longOpt("max-sentences").argName("number").hasArg().desc("maximum number of sentences to check").build());
        options.addOption(Option.builder().longOpt("max-errors").argName("number").hasArg().desc("maximum number of errors, stop when finding more").build());
        options.addOption(Option.builder().longOpt("context-size").argName("number").hasArg().desc("context size per error, in characters").build());
        options.addOption(Option.builder().longOpt("languagemodel").argName("indexDir").hasArg().desc("directory with a '3grams' sub directory that contains an ngram index").build());
        options.addOption(Option.builder().longOpt("remoterules").argName("configFile").hasArg().desc("JSON file with configuration of remote rules").build());
        options.addOption(Option.builder().longOpt("filter").argName("regex").hasArg().desc("Consider only sentences that contain this regular expression (for speed up)").build());
        options.addOption(Option.builder().longOpt("spelling").desc("Don't skip spell checking rules").build());
        options.addOption(Option.builder().longOpt("rulesource").hasArg().desc("Activate only rules from this XML file (e.g. 'grammar.xml')").build());
        options.addOption(Option.builder().longOpt("skip").hasArg().desc("Skip this many sentences from input before actually checking sentences").build());
        options.addOption(Option.builder().longOpt("print-duration").desc("Print the duration of analysis in milliseconds").build());
        options.addOption(Option.builder().longOpt("nerUrl").argName("url").hasArg().desc("URL of a named entity recognition service").build());
        options.addOption(Option.builder().longOpt("skip-exceptions").desc("Whether internal Java exceptions should only be printed instead of stopping this script").build());
        try {
            return new DefaultParser().parse(options, strArr);
        } catch (ParseException e) {
            System.err.println("Error: " + e.getMessage());
            HelpFormatter helpFormatter = new HelpFormatter();
            helpFormatter.setWidth(80);
            helpFormatter.setSyntaxPrefix("Usage: ");
            helpFormatter.printHelp(SentenceSourceChecker.class.getSimpleName() + " [OPTION]... --file <file> --language <code>", options);
            System.exit(1);
            throw new IllegalStateException();
        }
    }

    private void run(File file, Set<String> set, String str, String str2, int i, int i2, int i3, CommandLine commandLine) throws IOException {
        String sourceFile;
        long currentTimeMillis = System.currentTimeMillis();
        String[] split = commandLine.hasOption('r') ? commandLine.getOptionValue('r').split(",") : null;
        String[] split2 = commandLine.hasOption("also-enable-categories") ? commandLine.getOptionValue("also-enable-categories").split(",") : null;
        String[] optionValues = commandLine.getOptionValues('f');
        File file2 = commandLine.hasOption("languagemodel") ? new File(commandLine.getOptionValue("languagemodel")) : null;
        File file3 = commandLine.hasOption("remoterules") ? new File(commandLine.getOptionValue("remoterules")) : null;
        Pattern compile = commandLine.hasOption("filter") ? Pattern.compile(commandLine.getOptionValue("filter")) : null;
        String optionValue = commandLine.hasOption("rulesource") ? commandLine.getOptionValue("rulesource") : null;
        int parseInt = commandLine.hasOption("skip") ? Integer.parseInt(commandLine.getOptionValue("skip")) : 0;
        Language languageForShortCode = Languages.getLanguageForShortCode(str);
        Language languageForShortCode2 = str2 != null ? Languages.getLanguageForShortCode(str2) : null;
        GlobalConfig globalConfig = new GlobalConfig();
        if (commandLine.hasOption("nerUrl")) {
            System.out.println("Using NER service: " + commandLine.getOptionValue("nerUrl"));
            globalConfig.setNERUrl(commandLine.getOptionValue("nerUrl"));
        }
        MultiThreadedJLanguageTool multiThreadedJLanguageTool = new MultiThreadedJLanguageTool(languageForShortCode, languageForShortCode2, -1, globalConfig, (UserConfig) null);
        multiThreadedJLanguageTool.setCleanOverlappingMatches(false);
        if (file2 != null) {
            multiThreadedJLanguageTool.activateLanguageModelRules(file2);
        }
        int i4 = 0;
        for (Rule rule : multiThreadedJLanguageTool.getAllRules()) {
            if (rule.isDefaultTempOff()) {
                System.out.println("Activating " + rule.getFullId() + ", which is default='temp_off'");
                multiThreadedJLanguageTool.enableRule(rule.getId());
            }
            if (optionValue != null) {
                boolean z = false;
                if ((rule instanceof AbstractPatternRule) && (sourceFile = rule.getSourceFile()) != null && sourceFile.endsWith("/" + optionValue) && !rule.isDefaultOff()) {
                    z = true;
                    i4++;
                }
                if (z) {
                    multiThreadedJLanguageTool.enableRule(rule.getId());
                } else {
                    multiThreadedJLanguageTool.disableRule(rule.getId());
                }
            }
        }
        multiThreadedJLanguageTool.activateRemoteRules(file3);
        if (optionValue != null) {
            System.out.println("Activated " + i4 + " rules from " + optionValue);
        } else if (split != null) {
            enableOnlySpecifiedRules(split, multiThreadedJLanguageTool);
        } else {
            applyRuleDeactivation(multiThreadedJLanguageTool, set);
        }
        if (compile != null) {
            System.out.println("*** NOTE: only sentences that match regular expression '" + compile + "' will be checked");
        }
        activateAdditionalCategories(split2, multiThreadedJLanguageTool);
        if (commandLine.hasOption("spelling")) {
            System.out.println("Spelling rules active: yes (only if you're using a language code like en-US which comes with spelling)");
        } else if (split == null) {
            disableSpellingRules(multiThreadedJLanguageTool);
            System.out.println("Spelling rules active: no");
        }
        System.out.println("Working on: " + StringUtils.join(optionValues, ", "));
        System.out.println("Sentence limit: " + (i > 0 ? Integer.valueOf(i) : "no limit"));
        System.out.println("Context size: " + i3);
        System.out.println("Error limit: " + (i2 > 0 ? Integer.valueOf(i2) : "no limit"));
        System.out.println("Skip: " + parseInt);
        ResultHandler resultHandler = null;
        int i5 = 0;
        int i6 = 0;
        int i7 = 0;
        boolean z2 = false;
        try {
            try {
                ResultHandler cSVHandler = commandLine.hasOption("csv") ? new CSVHandler(i, i2) : file != null ? new DatabaseHandler(file, i, i2) : new StdoutHandler(i, i2, i3);
                MixingSentenceSource create = MixingSentenceSource.create(Arrays.asList(optionValues), languageForShortCode, compile);
                while (create.hasNext()) {
                    Sentence next = create.next();
                    if (parseInt <= 0 || i7 >= parseInt) {
                        if (parseInt > 0 && !z2) {
                            System.err.println("Done skipping " + parseInt + " sentences.");
                            z2 = true;
                        }
                        try {
                            List<RuleMatch> check = multiThreadedJLanguageTool.check(new AnnotatedTextBuilder().addText(next.getText()).build(), true, JLanguageTool.ParagraphHandling.NORMAL, (RuleMatchListener) null, JLanguageTool.Mode.ALL, JLanguageTool.Level.PICKY);
                            cSVHandler.handleResult(next, check, languageForShortCode);
                            i6++;
                            if (i6 % 5000 == 0) {
                                System.err.printf("%s sentences checked...\n", NumberFormat.getNumberInstance(Locale.US).format(i6));
                            }
                            i5 += check.size();
                        } catch (DocumentLimitReachedException | ErrorLimitReachedException e) {
                            throw e;
                        } catch (Exception e2) {
                            if (!commandLine.hasOption("skip-exceptions")) {
                                throw new RuntimeException("Check failed on sentence: " + StringUtils.abbreviate(next.getText(), 250), e2);
                            }
                            e2.printStackTrace();
                        }
                    } else {
                        if (i7 % 5000 == 0) {
                            System.err.printf("%s sentences skipped...\n", NumberFormat.getNumberInstance(Locale.US).format(i7));
                        }
                        i7++;
                    }
                }
                int ignoredCount = create.getIgnoredCount();
                multiThreadedJLanguageTool.shutdown();
                if (cSVHandler != null) {
                    System.out.printf(languageForShortCode + ": %d total matches\n", Integer.valueOf(i5));
                    System.out.printf(languageForShortCode + ": %d total sentences considered\n", Integer.valueOf(i6));
                    System.out.printf(Locale.ENGLISH, languageForShortCode + ": ø%.2f rule matches per sentence\n", Float.valueOf(i5 / i6));
                    System.out.printf(Locale.ENGLISH, languageForShortCode + ": %d input lines ignored (e.g. not between %d and %d chars or at least %d tokens)\n", Integer.valueOf(ignoredCount), 10, 300, 4);
                    if (commandLine.hasOption("print-duration")) {
                        System.out.println("The analysis took " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
                    }
                    try {
                        cSVHandler.close();
                    } catch (Exception e3) {
                        e3.printStackTrace();
                    }
                }
            } catch (DocumentLimitReachedException | ErrorLimitReachedException e4) {
                System.out.println(getClass().getSimpleName() + ": " + e4);
                multiThreadedJLanguageTool.shutdown();
                if (0 != 0) {
                    System.out.printf(languageForShortCode + ": %d total matches\n", 0);
                    System.out.printf(languageForShortCode + ": %d total sentences considered\n", 0);
                    System.out.printf(Locale.ENGLISH, languageForShortCode + ": ø%.2f rule matches per sentence\n", Float.valueOf(0 / 0));
                    System.out.printf(Locale.ENGLISH, languageForShortCode + ": %d input lines ignored (e.g. not between %d and %d chars or at least %d tokens)\n", 0, 10, 300, 4);
                    if (commandLine.hasOption("print-duration")) {
                        System.out.println("The analysis took " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
                    }
                    try {
                        resultHandler.close();
                    } catch (Exception e5) {
                        e5.printStackTrace();
                    }
                }
            }
        } catch (Throwable th) {
            multiThreadedJLanguageTool.shutdown();
            if (0 != 0) {
                System.out.printf(languageForShortCode + ": %d total matches\n", 0);
                System.out.printf(languageForShortCode + ": %d total sentences considered\n", 0);
                System.out.printf(Locale.ENGLISH, languageForShortCode + ": ø%.2f rule matches per sentence\n", Float.valueOf(0 / 0));
                System.out.printf(Locale.ENGLISH, languageForShortCode + ": %d input lines ignored (e.g. not between %d and %d chars or at least %d tokens)\n", 0, 10, 300, 4);
                if (commandLine.hasOption("print-duration")) {
                    System.out.println("The analysis took " + (System.currentTimeMillis() - currentTimeMillis) + "ms");
                }
                try {
                    resultHandler.close();
                } catch (Exception e6) {
                    e6.printStackTrace();
                }
            }
            throw th;
        }
    }

    private static void enableOnlySpecifiedRules(String[] strArr, JLanguageTool jLanguageTool) {
        Iterator it = jLanguageTool.getAllRules().iterator();
        while (it.hasNext()) {
            jLanguageTool.disableRule(((Rule) it.next()).getId());
        }
        for (String str : strArr) {
            jLanguageTool.enableRule(str);
        }
        warnOnNonExistingRuleIds(strArr, jLanguageTool);
        System.out.println("Only these rules are enabled: " + Arrays.toString(strArr));
    }

    private static void warnOnNonExistingRuleIds(String[] strArr, JLanguageTool jLanguageTool) {
        for (String str : strArr) {
            boolean z = false;
            Iterator it = jLanguageTool.getAllRules().iterator();
            while (true) {
                if (it.hasNext()) {
                    if (((Rule) it.next()).getId().equals(str)) {
                        z = true;
                        break;
                    }
                } else {
                    break;
                }
            }
            if (!z) {
                System.out.println("WARNING: Could not find rule '" + str + "'");
            }
        }
    }

    private static void applyRuleDeactivation(JLanguageTool jLanguageTool, Set<String> set) {
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            jLanguageTool.disableRule(it.next());
        }
        System.out.println("These rules are disabled: " + jLanguageTool.getDisabledRules());
    }

    private static void activateAdditionalCategories(String[] strArr, JLanguageTool jLanguageTool) {
        if (strArr != null) {
            for (String str : strArr) {
                for (Rule rule : jLanguageTool.getAllRules()) {
                    if (rule.getCategory().getId().toString().equals(str)) {
                        System.out.println("Activating " + rule.getId() + " in category " + str);
                        jLanguageTool.enableRule(rule.getId());
                    }
                }
            }
        }
    }

    private static void disableSpellingRules(JLanguageTool jLanguageTool) {
        for (Rule rule : jLanguageTool.getAllActiveRules()) {
            if (rule.isDictionaryBasedSpellingRule()) {
                jLanguageTool.disableRule(rule.getId());
            }
        }
    }
}
