package org.apache.tika.eval.core.tokens;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/tika/eval/core/tokens/TokenCounterTest.class */
public class TokenCounterTest {
    private static final String FIELD = "f";
    private static AnalyzerManager analyzerManager;
    private final int topN = 10;

    @BeforeAll
    public static void setUp() throws IOException {
        analyzerManager = AnalyzerManager.newInstance(100000);
    }

    @Test
    public void testBasic() throws Exception {
        TokenCounter tokenCounter = new TokenCounter(analyzerManager.getGeneralAnalyzer());
        tokenCounter.add(FIELD, " bde cde def abc efg f f f f ghijklmnop a a a a a a a a a a a a a a a a a b b b b b b b b b b b b b");
        TokenStatistics tokenStatistics = tokenCounter.getTokenStatistics(FIELD);
        LuceneTokenCounter luceneTokenCounter = new LuceneTokenCounter(analyzerManager.getGeneralAnalyzer());
        luceneTokenCounter.add(FIELD, " bde cde def abc efg f f f f ghijklmnop a a a a a a a a a a a a a a a a a b b b b b b b b b b b b b");
        Assertions.assertEquals(tokenStatistics, luceneTokenCounter.getTokenStatistics(FIELD));
    }

    @Test
    public void testRandom() throws Exception {
        long j = 0;
        long j2 = 0;
        for (int i = 0; i < 100; i++) {
            String generateString = generateString();
            long currentTimeMillis = System.currentTimeMillis();
            TokenCounter tokenCounter = new TokenCounter(analyzerManager.getGeneralAnalyzer());
            tokenCounter.add(FIELD, generateString);
            j += System.currentTimeMillis() - currentTimeMillis;
            TokenStatistics tokenStatistics = tokenCounter.getTokenStatistics(FIELD);
            long currentTimeMillis2 = System.currentTimeMillis();
            LuceneTokenCounter luceneTokenCounter = new LuceneTokenCounter(analyzerManager.getGeneralAnalyzer());
            luceneTokenCounter.add(FIELD, generateString);
            j2 += System.currentTimeMillis() - currentTimeMillis2;
            Assertions.assertEquals(tokenStatistics, luceneTokenCounter.getTokenStatistics(FIELD), generateString);
        }
    }

    @Test
    public void testCommonTokens() throws Exception {
        TokenCounter tokenCounter = new TokenCounter(analyzerManager.getCommonTokensAnalyzer());
        tokenCounter.add(FIELD, "the http://www.cnn.com and blahdeblah@apache.org are in valuable www.sites.org 普林斯顿大学");
        Map tokens = tokenCounter.getTokens(FIELD);
        Assertions.assertEquals(new MutableInt(2), tokens.get("___url___"));
        Assertions.assertEquals(new MutableInt(1), tokens.get("___email___"));
    }

    @Test
    public void testCJKFilter() throws Exception {
        TokenStream tokenStream = analyzerManager.getCommonTokensAnalyzer().tokenStream(FIELD, "then quickbrownfoxjumpedoverthelazy dogss d 2000 普林斯顿大学");
        CharTermAttribute attribute = tokenStream.getAttribute(CharTermAttribute.class);
        tokenStream.reset();
        HashMap hashMap = new HashMap();
        while (tokenStream.incrementToken()) {
            String obj = attribute.toString();
            Integer num = (Integer) hashMap.get(obj);
            hashMap.put(obj, Integer.valueOf((num == null ? 0 : num).intValue() + 1));
        }
        tokenStream.end();
        tokenStream.close();
        Assertions.assertEquals(7, hashMap.size());
        Assertions.assertEquals(1, (Integer) hashMap.get("林斯"));
    }

    private String generateString() {
        Random random = new Random();
        int nextInt = random.nextInt(1000);
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < nextInt; i++) {
            sb.append(Integer.toString(random.nextInt(10000) + 100000));
            sb.append(" ");
        }
        return sb.toString();
    }
}
