package fr.pilato.elasticsearch.crawler.fs;

import fr.pilato.elasticsearch.crawler.fs.beans.Attributes;
import fr.pilato.elasticsearch.crawler.fs.beans.Doc;
import fr.pilato.elasticsearch.crawler.fs.beans.DocParser;
import fr.pilato.elasticsearch.crawler.fs.beans.FsJob;
import fr.pilato.elasticsearch.crawler.fs.beans.FsJobFileHandler;
import fr.pilato.elasticsearch.crawler.fs.beans.PathParser;
import fr.pilato.elasticsearch.crawler.fs.beans.ScanStatistic;
import fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClientManager;
import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractModel;
import fr.pilato.elasticsearch.crawler.fs.crawler.FileAbstractor;
import fr.pilato.elasticsearch.crawler.fs.framework.ByteSizeValue;
import fr.pilato.elasticsearch.crawler.fs.framework.FsCrawlerUtil;
import fr.pilato.elasticsearch.crawler.fs.framework.SignTool;
import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings;
import fr.pilato.elasticsearch.crawler.fs.tika.TikaDocParser;
import fr.pilato.elasticsearch.crawler.fs.tika.XmlDocParser;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.time.LocalDateTime;
import java.time.temporal.ChronoUnit;
import java.time.temporal.TemporalUnit;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import org.apache.http.Header;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.action.bulk.BulkProcessor;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.document.DocumentField;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;

/* loaded from: input_file:fr/pilato/elasticsearch/crawler/fs/FsParser.class */
public abstract class FsParser implements Runnable {
    private static final String PATH_ROOT = "path.root";
    private static final String FILE_FILENAME = "file.filename";
    private static final int REQUEST_SIZE = 10000;
    final FsSettings fsSettings;
    private final FsJobFileHandler fsJobFileHandler;
    private final ElasticsearchClientManager esClientManager;
    private final Integer loop;
    private final MessageDigest messageDigest;
    private final String typeName;
    private ScanStatistic stats;
    private final AtomicInteger runNumber = new AtomicInteger(0);
    private boolean closed;
    private static final Logger logger = LogManager.getLogger(FsParser.class);
    private static final Object semaphore = new Object();

    public FsParser(FsSettings fsSettings, Path path, ElasticsearchClientManager elasticsearchClientManager, Integer num) {
        this.fsSettings = fsSettings;
        this.fsJobFileHandler = new FsJobFileHandler(path);
        this.esClientManager = elasticsearchClientManager;
        this.loop = num;
        logger.debug("creating fs crawler thread [{}] for [{}] every [{}]", fsSettings.getName(), fsSettings.getFs().getUrl(), fsSettings.getFs().getUpdateRate());
        if (fsSettings.getFs().getChecksum() != null) {
            try {
                this.messageDigest = MessageDigest.getInstance(fsSettings.getFs().getChecksum());
            } catch (NoSuchAlgorithmException e) {
                throw new RuntimeException("This should never happen as we checked that previously");
            }
        } else {
            this.messageDigest = null;
        }
        this.typeName = elasticsearchClientManager.client().getDefaultTypeName();
    }

    protected abstract FileAbstractor buildFileAbstractor();

    public Object getSemaphore() {
        return semaphore;
    }

    @Override // java.lang.Runnable
    public void run() {
        logger.info("FS crawler started for [{}] for [{}] every [{}]", this.fsSettings.getName(), this.fsSettings.getFs().getUrl(), this.fsSettings.getFs().getUpdateRate());
        this.closed = false;
        while (!this.closed) {
            int incrementAndGet = this.runNumber.incrementAndGet();
            FileAbstractor fileAbstractor = null;
            try {
                try {
                    logger.debug("Fs crawler thread [{}] is now running. Run #{}...", this.fsSettings.getName(), Integer.valueOf(incrementAndGet));
                    this.stats = new ScanStatistic(this.fsSettings.getFs().getUrl());
                    fileAbstractor = buildFileAbstractor();
                    fileAbstractor.open();
                } catch (Throwable th) {
                    if (fileAbstractor != null) {
                        try {
                            fileAbstractor.close();
                        } catch (Exception e) {
                            logger.warn("Error while closing the connection: {}", e, e.getMessage());
                        }
                    }
                    throw th;
                }
            } catch (Exception e2) {
                logger.warn("Error while crawling {}: {}", this.fsSettings.getFs().getUrl(), e2.getMessage());
                if (logger.isDebugEnabled()) {
                    logger.warn("Full stacktrace", e2);
                }
                if (fileAbstractor != null) {
                    try {
                        fileAbstractor.close();
                    } catch (Exception e3) {
                        logger.warn("Error while closing the connection: {}", e3, e3.getMessage());
                    }
                }
            }
            if (!fileAbstractor.exists(this.fsSettings.getFs().getUrl())) {
                throw new RuntimeException(this.fsSettings.getFs().getUrl() + " doesn't exists.");
                break;
            }
            this.stats.setRootPathId(SignTool.sign(this.fsSettings.getFs().getUrl()));
            LocalDateTime now = LocalDateTime.now();
            LocalDateTime lastDateFromMeta = getLastDateFromMeta(this.fsSettings.getName());
            if (lastDateFromMeta == null && this.fsSettings.getFs().isIndexFolders()) {
                indexDirectory(this.fsSettings.getFs().getUrl());
            }
            if (lastDateFromMeta == null) {
                lastDateFromMeta = LocalDateTime.MIN;
            }
            addFilesRecursively(fileAbstractor, this.fsSettings.getFs().getUrl(), lastDateFromMeta);
            updateFsJob(this.fsSettings.getName(), now);
            if (fileAbstractor != null) {
                try {
                    fileAbstractor.close();
                } catch (Exception e4) {
                    logger.warn("Error while closing the connection: {}", e4, e4.getMessage());
                }
            }
            if (this.loop.intValue() > 0 && incrementAndGet >= this.loop.intValue()) {
                logger.info("FS crawler is stopping after {} run{}", Integer.valueOf(incrementAndGet), incrementAndGet > 1 ? "s" : "");
                this.closed = true;
                return;
            }
            try {
                logger.debug("Fs crawler is going to sleep for {}", this.fsSettings.getFs().getUpdateRate());
                synchronized (semaphore) {
                    semaphore.wait(this.fsSettings.getFs().getUpdateRate().millis());
                    logger.debug("Fs crawler is now waking up again...");
                }
            } catch (InterruptedException e5) {
                logger.debug("Fs crawler thread has been interrupted: [{}]", e5.getMessage());
                Thread.currentThread().interrupt();
            }
        }
        logger.debug("FS crawler thread [{}] is now marked as closed...", this.fsSettings.getName());
    }

    private LocalDateTime getLastDateFromMeta(String str) throws IOException {
        try {
            return this.fsJobFileHandler.read(str).getLastrun();
        } catch (NoSuchFileException e) {
            return null;
        }
    }

    private void updateFsJob(String str, LocalDateTime localDateTime) throws Exception {
        this.fsJobFileHandler.write(str, FsJob.builder().setName(str).setLastrun(localDateTime.minus(2L, (TemporalUnit) ChronoUnit.SECONDS)).setIndexed(this.stats.getNbDocScan()).setDeleted(this.stats.getNbDocDeleted()).build());
    }

    private void addFilesRecursively(FileAbstractor<?> fileAbstractor, String str, LocalDateTime localDateTime) throws Exception {
        logger.debug("indexing [{}] content", str);
        Collection<FileAbstractModel> files = fileAbstractor.getFiles(str);
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        if (files != null) {
            for (FileAbstractModel fileAbstractModel : files) {
                String name = fileAbstractModel.getName();
                String computeVirtualPathName = FsCrawlerUtil.computeVirtualPathName(this.stats.getRootPath(), new File(str, name).toString());
                boolean isIndexable = FsCrawlerUtil.isIndexable(fileAbstractModel.isDirectory(), computeVirtualPathName, this.fsSettings.getFs().getIncludes(), this.fsSettings.getFs().getExcludes());
                logger.debug("[{}] can be indexed: [{}]", computeVirtualPathName, Boolean.valueOf(isIndexable));
                if (!isIndexable) {
                    logger.debug("  - ignored file/dir: {}", name);
                } else if (fileAbstractModel.isFile()) {
                    logger.debug("  - file: {}", computeVirtualPathName);
                    arrayList.add(name);
                    if (!fileAbstractModel.getLastModifiedDate().isAfter(localDateTime) && (fileAbstractModel.getCreationDate() == null || !fileAbstractModel.getCreationDate().isAfter(localDateTime))) {
                        logger.debug("    - not modified: creation date {} , file date {}, last scan date {}", fileAbstractModel.getCreationDate(), fileAbstractModel.getLastModifiedDate(), localDateTime);
                    } else if (FsCrawlerUtil.isFileSizeUnderLimit(this.fsSettings.getFs().getIgnoreAbove(), fileAbstractModel.getSize())) {
                        try {
                            indexFile(fileAbstractModel, this.stats, str, (this.fsSettings.getFs().isIndexContent() || this.fsSettings.getFs().isStoreSource()) ? fileAbstractor.getInputStream(fileAbstractModel) : null, fileAbstractModel.getSize());
                            this.stats.addFile();
                        } catch (FileNotFoundException e) {
                            if (!this.fsSettings.getFs().isContinueOnError()) {
                                throw e;
                            }
                            logger.warn("Unable to open Input Stream for {}, skipping...: {}", name, e.getMessage());
                        }
                    } else {
                        logger.debug("file [{}] has a size [{}] above the limit [{}]. We skip it.", name, new ByteSizeValue(fileAbstractModel.getSize()), this.fsSettings.getFs().getIgnoreAbove());
                    }
                } else if (fileAbstractModel.isDirectory()) {
                    logger.debug("  - folder: {}", name);
                    if (this.fsSettings.getFs().isIndexFolders()) {
                        arrayList2.add(fileAbstractModel.getFullpath());
                        indexDirectory(fileAbstractModel.getFullpath());
                    }
                    addFilesRecursively(fileAbstractor, fileAbstractModel.getFullpath(), localDateTime);
                } else {
                    logger.debug("  - other: {}", name);
                    logger.debug("Not a file nor a dir. Skipping {}", fileAbstractModel.getFullpath());
                }
            }
        }
        if (this.fsSettings.getFs().isRemoveDeleted()) {
            logger.debug("Looking for removed files in [{}]...", str);
            for (String str2 : getFileDirectory(str)) {
                logger.trace("Checking file [{}]", str2);
                if (FsCrawlerUtil.isIndexable(false, FsCrawlerUtil.computeVirtualPathName(this.stats.getRootPath(), new File(str, str2).toString()), this.fsSettings.getFs().getIncludes(), this.fsSettings.getFs().getExcludes()) && !arrayList.contains(str2)) {
                    logger.trace("Removing file [{}] in elasticsearch", str2);
                    esDelete(this.fsSettings.getElasticsearch().getIndex(), generateIdFromFilename(str2, str));
                    this.stats.removeFile();
                }
            }
            if (this.fsSettings.getFs().isIndexFolders()) {
                logger.debug("Looking for removed directories in [{}]...", str);
                for (String str3 : getFolderDirectory(str)) {
                    if (FsCrawlerUtil.isIndexable(true, FsCrawlerUtil.computeVirtualPathName(this.stats.getRootPath(), new File(str, str3).toString()), this.fsSettings.getFs().getIncludes(), this.fsSettings.getFs().getExcludes())) {
                        logger.trace("Checking directory [{}]", str3);
                        if (!arrayList2.contains(str3)) {
                            logger.trace("Removing recursively directory [{}] in elasticsearch", str3);
                            removeEsDirectoryRecursively(str3);
                        }
                    }
                }
            }
        }
    }

    private Collection<String> getFileDirectory(String str) throws Exception {
        Collection<String> fromStoredFieldsV2;
        if (this.closed) {
            return Collections.emptyList();
        }
        logger.trace("Querying elasticsearch for files in dir [{}:{}]", PATH_ROOT, SignTool.sign(str));
        if (this.esClientManager.client().isIngestSupported()) {
            fromStoredFieldsV2 = new ArrayList();
            SearchResponse search = this.esClientManager.client().search(new SearchRequest(new String[]{this.fsSettings.getElasticsearch().getIndex()}).source(new SearchSourceBuilder().size(REQUEST_SIZE).storedField(FILE_FILENAME).query(QueryBuilders.termQuery(PATH_ROOT, SignTool.sign(str)))), new Header[0]);
            logger.trace("Response [{}]", search.toString());
            if (search.getHits() != null && search.getHits().getHits() != null) {
                for (SearchHit searchHit : search.getHits().getHits()) {
                    if (searchHit.getFields() == null || searchHit.getFields().get(FILE_FILENAME) == null) {
                        logger.warn("Can't find stored field name to check existing filenames in path [{}]. Please set store: true on field [{}]", str, FILE_FILENAME);
                        throw new RuntimeException("Mapping is incorrect: please set stored: true on field [file.filename].");
                    }
                    fromStoredFieldsV2.add((String) ((DocumentField) searchHit.getFields().get(FILE_FILENAME)).getValue());
                }
            }
        } else {
            fromStoredFieldsV2 = this.esClientManager.client().getFromStoredFieldsV2(this.fsSettings.getElasticsearch().getIndex(), REQUEST_SIZE, FILE_FILENAME, str, QueryBuilders.termQuery(PATH_ROOT, SignTool.sign(str)));
        }
        logger.trace("We found: {}", fromStoredFieldsV2);
        return fromStoredFieldsV2;
    }

    private Collection<String> getFolderDirectory(String str) throws Exception {
        ArrayList arrayList = new ArrayList();
        if (this.closed) {
            return arrayList;
        }
        SearchResponse search = this.esClientManager.client().search(new SearchRequest(new String[]{this.fsSettings.getElasticsearch().getIndexFolder()}).source(new SearchSourceBuilder().size(REQUEST_SIZE).query(QueryBuilders.termQuery("root", SignTool.sign(str)))), new Header[0]);
        if (search.getHits() != null && search.getHits().getHits() != null) {
            for (SearchHit searchHit : search.getHits().getHits()) {
                arrayList.add(searchHit.getSourceAsMap().get("real").toString());
            }
        }
        return arrayList;
    }

    private void indexFile(FileAbstractModel fileAbstractModel, ScanStatistic scanStatistic, String str, InputStream inputStream, long j) throws Exception {
        String name = fileAbstractModel.getName();
        LocalDateTime creationDate = fileAbstractModel.getCreationDate();
        LocalDateTime lastModifiedDate = fileAbstractModel.getLastModifiedDate();
        LocalDateTime accessDate = fileAbstractModel.getAccessDate();
        String extension = fileAbstractModel.getExtension();
        long size = fileAbstractModel.getSize();
        logger.debug("fetching content from [{}],[{}]", str, name);
        try {
            if (this.fsSettings.getFs().isAddAsInnerObject() || !(this.fsSettings.getFs().isJsonSupport() || this.fsSettings.getFs().isXmlSupport())) {
                String file = new File(str, name).toString();
                Doc doc = new Doc();
                doc.getFile().setFilename(name);
                doc.getFile().setCreated(FsCrawlerUtil.localDateTimeToDate(creationDate));
                doc.getFile().setLastModified(FsCrawlerUtil.localDateTimeToDate(lastModifiedDate));
                doc.getFile().setLastAccessed(FsCrawlerUtil.localDateTimeToDate(accessDate));
                doc.getFile().setIndexingDate(FsCrawlerUtil.localDateTimeToDate(LocalDateTime.now()));
                doc.getFile().setUrl("file://" + file);
                doc.getFile().setExtension(extension);
                if (this.fsSettings.getFs().isAddFilesize()) {
                    doc.getFile().setFilesize(Long.valueOf(size));
                }
                doc.getPath().setRoot(SignTool.sign(str));
                doc.getPath().setVirtual(FsCrawlerUtil.computeVirtualPathName(scanStatistic.getRootPath(), file));
                doc.getPath().setReal(file);
                if (this.fsSettings.getFs().isAttributesSupport()) {
                    doc.setAttributes(new Attributes());
                    doc.getAttributes().setOwner(fileAbstractModel.getOwner());
                    doc.getAttributes().setGroup(fileAbstractModel.getGroup());
                    if (fileAbstractModel.getPermissions() >= 0) {
                        doc.getAttributes().setPermissions(fileAbstractModel.getPermissions());
                    }
                }
                if (this.fsSettings.getFs().isJsonSupport()) {
                    doc.setObject(DocParser.asMap(read(inputStream)));
                } else if (this.fsSettings.getFs().isXmlSupport()) {
                    doc.setObject(XmlDocParser.generateMap(inputStream));
                } else {
                    TikaDocParser.generate(this.fsSettings, inputStream, name, doc, this.messageDigest, j);
                }
                if (FsCrawlerUtil.isIndexable(doc.getContent(), this.fsSettings.getFs().getFilters())) {
                    esIndex(this.esClientManager.bulkProcessorDoc(), this.fsSettings.getElasticsearch().getIndex(), generateIdFromFilename(name, str), DocParser.toJson(doc), this.fsSettings.getElasticsearch().getPipeline());
                } else {
                    logger.debug("We ignore file [{}] because it does not match all the patterns {}", name, this.fsSettings.getFs().getFilters());
                }
            } else if (this.fsSettings.getFs().isJsonSupport()) {
                esIndex(this.esClientManager.bulkProcessorDoc(), this.fsSettings.getElasticsearch().getIndex(), generateIdFromFilename(name, str), read(inputStream), this.fsSettings.getElasticsearch().getPipeline());
            } else if (this.fsSettings.getFs().isXmlSupport()) {
                esIndex(this.esClientManager.bulkProcessorDoc(), this.fsSettings.getElasticsearch().getIndex(), generateIdFromFilename(name, str), XmlDocParser.generate(inputStream), this.fsSettings.getElasticsearch().getPipeline());
            }
        } finally {
            if (inputStream != null) {
                inputStream.close();
            }
        }
    }

    private String generateIdFromFilename(String str, String str2) throws NoSuchAlgorithmException {
        return this.fsSettings.getFs().isFilenameAsId() ? str : SignTool.sign(new File(str2, str).toString());
    }

    private String read(InputStream inputStream) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
        Throwable th = null;
        try {
            try {
                String str = (String) bufferedReader.lines().collect(Collectors.joining("\n"));
                if (0 != 0) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    bufferedReader.close();
                }
                return str;
            } finally {
            }
        } catch (Throwable th3) {
            if (th != null) {
                try {
                    bufferedReader.close();
                } catch (Throwable th4) {
                    th.addSuppressed(th4);
                }
            } else {
                bufferedReader.close();
            }
            throw th3;
        }
    }

    private void indexDirectory(String str, fr.pilato.elasticsearch.crawler.fs.beans.Path path) throws Exception {
        esIndex(this.esClientManager.bulkProcessorFolder(), this.fsSettings.getElasticsearch().getIndexFolder(), str, PathParser.toJson(path), null);
    }

    private void indexDirectory(String str) throws Exception {
        fr.pilato.elasticsearch.crawler.fs.beans.Path path = new fr.pilato.elasticsearch.crawler.fs.beans.Path();
        path.setReal(str);
        path.setRoot(SignTool.sign(str.substring(0, str.lastIndexOf(File.separator))));
        path.setVirtual(FsCrawlerUtil.computeVirtualPathName(this.stats.getRootPath(), str));
        indexDirectory(SignTool.sign(str), path);
    }

    private void removeEsDirectoryRecursively(String str) throws Exception {
        logger.debug("Delete folder [{}]", str);
        Iterator<String> it = getFileDirectory(str).iterator();
        while (it.hasNext()) {
            esDelete(this.fsSettings.getElasticsearch().getIndex(), SignTool.sign(str.concat(File.separator).concat(it.next())));
        }
        Iterator<String> it2 = getFolderDirectory(str).iterator();
        while (it2.hasNext()) {
            removeEsDirectoryRecursively(it2.next());
        }
        esDelete(this.fsSettings.getElasticsearch().getIndexFolder(), SignTool.sign(str));
    }

    void esIndex(BulkProcessor bulkProcessor, String str, String str2, String str3, String str4) {
        logger.debug("Indexing {}/{}/{}?pipeline={}", str, this.typeName, str2, str4);
        logger.trace("JSon indexed : {}", str3);
        if (this.closed) {
            logger.warn("trying to add new file while closing crawler. Document [{}]/[{}]/[{}] has been ignored", str, this.typeName, str2);
        } else {
            bulkProcessor.add(new IndexRequest(str, this.typeName, str2).source(str3, XContentType.JSON).setPipeline(str4));
        }
    }

    void esDelete(String str, String str2) {
        logger.debug("Deleting {}/{}/{}", str, this.typeName, str2);
        if (this.closed) {
            logger.warn("trying to remove a file while closing crawler. Document [{}]/[{}]/[{}] has been ignored", str, this.typeName, str2);
        } else {
            this.esClientManager.bulkProcessorDoc().add(new DeleteRequest(str, this.typeName, str2));
        }
    }

    public int getRunNumber() {
        return this.runNumber.get();
    }

    public void setClosed(boolean z) {
        this.closed = z;
    }

    public boolean isClosed() {
        return this.closed;
    }
}
