package org.apache.tika.eval.core.langid;

import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import org.apache.tika.eval.core.tokens.CommonTokenCountManager;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/tika/eval/core/langid/LangIdTest.class */
public class LangIdTest {
    @Disabled("make sure to run this when updating common tokens or the language modelbut there's no reason to test this for every build.")
    @Test
    public void testCommonTokensCoverage() throws Exception {
        LanguageIDWrapper languageIDWrapper = new LanguageIDWrapper();
        CommonTokenCountManager commonTokenCountManager = new CommonTokenCountManager((Path) null, "eng");
        String[] supportedLanguages = languageIDWrapper.getSupportedLanguages();
        Arrays.sort(supportedLanguages);
        for (String str : supportedLanguages) {
            Set tokens = commonTokenCountManager.getTokens(str);
            if (tokens.size() == 0) {
                System.out.printf(Locale.US, "missing common tokens for: %s%n", str);
            } else if (tokens.size() < 250) {
                Assertions.fail(String.format(Locale.US, "common tokens too small (%s) for: %s", Integer.valueOf(tokens.size()), str));
            }
        }
        Path path = Paths.get(getClass().getResource("/common_tokens").toURI());
        HashSet hashSet = new HashSet(Arrays.asList(languageIDWrapper.getSupportedLanguages()));
        for (File file : path.toFile().listFiles()) {
            if (!hashSet.contains(file.getName())) {
                Assertions.fail("extra common tokens for: " + file.getName());
            }
        }
    }
}
