package fr.pilato.elasticsearch.crawler.fs.test.integration.elasticsearch;

import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Predicate;
import fr.pilato.elasticsearch.crawler.fs.FsCrawlerImpl;
import fr.pilato.elasticsearch.crawler.fs.client.ESBoolQuery;
import fr.pilato.elasticsearch.crawler.fs.client.ESMatchQuery;
import fr.pilato.elasticsearch.crawler.fs.client.ESSearchHit;
import fr.pilato.elasticsearch.crawler.fs.client.ESSearchRequest;
import fr.pilato.elasticsearch.crawler.fs.client.ESSearchResponse;
import fr.pilato.elasticsearch.crawler.fs.client.ESTermQuery;
import fr.pilato.elasticsearch.crawler.fs.framework.JsonUtil;
import fr.pilato.elasticsearch.crawler.fs.framework.TimeValue;
import fr.pilato.elasticsearch.crawler.fs.settings.Fs;
import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings;
import fr.pilato.elasticsearch.crawler.fs.test.integration.AbstractFsCrawlerITCase;
import java.io.IOException;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.util.Iterator;
import org.apache.tika.parser.external.ExternalParser;
import org.hamcrest.MatcherAssert;
import org.hamcrest.Matchers;
import org.junit.AfterClass;
import org.junit.Assume;
import org.junit.BeforeClass;
import org.junit.Test;

/* loaded from: input_file:fr/pilato/elasticsearch/crawler/fs/test/integration/elasticsearch/FsCrawlerImplAllDocumentsIT.class */
public class FsCrawlerImplAllDocumentsIT extends AbstractFsCrawlerITCase {
    private static FsCrawlerImpl crawler = null;

    @BeforeClass
    public static void startCrawling() throws Exception {
        Path resolve = rootTmpDir.resolve("resources").resolve("documents");
        if (Files.notExists(resolve, new LinkOption[0])) {
            copyResourcesToTestDir();
        }
        try {
            Files.walk(resolve, new FileVisitOption[0]).filter(path -> {
                return Files.isRegularFile(path, new LinkOption[0]);
            }).forEach(path2 -> {
                staticLogger.debug("    - [{}]", path2);
            });
            long count = Files.list(resolve).count();
            staticLogger.info(" -> Removing existing index [fscrawler_test_all_documents*]");
            managementService.getClient().deleteIndex("fscrawler_test_all_documents*");
            staticLogger.info("  --> starting crawler in [{}] which contains [{}] files", resolve, Long.valueOf(count));
            crawler = new FsCrawlerImpl(metadataDir, FsSettings.builder("fscrawler_test_all_documents").setElasticsearch(generateElasticsearchConfig("fscrawler_test_all_documents", "fscrawler_test_all_documents_folder", 5, TimeValue.timeValueSeconds(1L), null)).setFs(Fs.builder().setUrl(resolve.toString()).setLangDetect(true).build()).build(), -1, false);
            crawler.start();
            countTestHelper(new ESSearchRequest().withIndex("fscrawler_test_all_documents"), Long.valueOf(count), null, TimeValue.timeValueMinutes(1L));
        } catch (NoSuchFileException e) {
            staticLogger.error("directory [{}] should exist before we can start tests.", resolve);
            throw new RuntimeException(resolve + " doesn't seem to exist. Check your JUnit tests.");
        }
    }

    @AfterClass
    public static void stopCrawling() throws Exception {
        if (crawler != null) {
            staticLogger.info("  --> Stopping crawler");
            crawler.close();
            crawler = null;
        }
    }

    @Test
    public void testXmlIssue163() throws IOException {
        runSearch("issue-163.xml");
    }

    @Test
    public void testJson() throws IOException {
        runSearch("test.json", "json");
    }

    @Test
    public void testExtractFromDoc() throws IOException {
        runSearch("test.doc", "sample");
    }

    @Test
    public void testExtractFromDocx() throws IOException {
        Iterator it = runSearch("test.docx", "sample").getHits().iterator();
        while (it.hasNext()) {
            Object parseJson = JsonUtil.parseJson(((ESSearchHit) it.next()).getSourceAsString());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.file.filename", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.file.content_type", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.file.url", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.file.filesize", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.file.indexing_date", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.file.created", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.file.last_modified", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.file.last_accessed", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.meta.title", new Predicate[0]), Matchers.notNullValue());
            MatcherAssert.assertThat(JsonPath.read(parseJson, "$.meta.keywords", new Predicate[0]), Matchers.notNullValue());
        }
    }

    @Test
    public void testExtractFromHtml() throws IOException {
        runSearch("test.html", "sample");
    }

    @Test
    public void testExtractFromMp3() throws IOException {
        runSearch("test.mp3", "tika");
    }

    @Test
    public void testExtractFromOdt() throws IOException {
        runSearch("test.odt", "sample");
    }

    @Test
    public void testExtractFromPdf() throws IOException {
        runSearch("test.pdf", "sample");
    }

    @Test
    public void testExtractFromRtf() throws IOException {
        runSearch("test.rtf", "sample");
    }

    @Test
    public void testExtractFromTxt() throws IOException {
        runSearch("test.txt", "contains");
    }

    @Test
    public void testExtractFromWav() throws IOException {
        runSearch("test.wav");
    }

    @Test
    public void testProtectedDocument229() throws IOException {
        runSearch("test-protected.docx");
    }

    @Test
    public void testProtectedDocument221() throws IOException {
        runSearch("issue-221-doc1.pdf", "coucou");
        runSearch("issue-221-doc2.pdf", "FORMATIONS");
    }

    @Test
    public void testLanguageDetection() throws IOException {
        Iterator it = runSearch("test-fr.txt", "fichier").getHits().iterator();
        while (it.hasNext()) {
            MatcherAssert.assertThat((String) JsonPath.read(((ESSearchHit) it.next()).getSourceAsString(), "$.meta.language", new Predicate[0]), Matchers.is("fr"));
        }
        Iterator it2 = runSearch("test-de.txt", "Datei").getHits().iterator();
        while (it2.hasNext()) {
            MatcherAssert.assertThat((String) JsonPath.read(((ESSearchHit) it2.next()).getSourceAsString(), "$.meta.language", new Predicate[0]), Matchers.is("de"));
        }
        Iterator it3 = runSearch("test.txt", "contains").getHits().iterator();
        while (it3.hasNext()) {
            MatcherAssert.assertThat((String) JsonPath.read(((ESSearchHit) it3.next()).getSourceAsString(), "$.meta.language", new Predicate[0]), Matchers.is("en"));
        }
    }

    @Test
    public void testChineseContent369() throws IOException {
        runSearch("issue-369.txt", "今天天气晴好");
    }

    @Test
    public void testOcr() throws IOException {
        Assume.assumeTrue("Tesseract is not installed so we are skipping this test", ExternalParser.check("tesseract", new int[0]));
        runSearch("test-ocr.png", "words");
        runSearch("test-ocr.pdf", "words");
    }

    @Test
    public void testShiftJisEncoding() throws IOException {
        runSearch("issue-400-shiftjis.txt", "elasticsearch");
    }

    @Test
    public void testNonUtf8Filename418() throws IOException {
        runSearch("issue-418-中文名称.txt");
    }

    private ESSearchResponse runSearch(String str) throws IOException {
        return runSearch(str, null);
    }

    private ESSearchResponse runSearch(String str, String str2) throws IOException {
        this.logger.info(" -> Testing if file [{}] has been indexed correctly{}.", str, str2 == null ? "" : " and contains [" + str2 + "]");
        ESBoolQuery addMust = new ESBoolQuery().addMust(new ESTermQuery("file.filename", str));
        if (str2 != null) {
            addMust.addMust(new ESMatchQuery("content", str2));
        }
        ESSearchResponse search = documentService.search(new ESSearchRequest().withIndex("fscrawler_test_all_documents").withESQuery(addMust));
        MatcherAssert.assertThat(Long.valueOf(search.getTotalHits()), Matchers.is(1L));
        return search;
    }
}
