package com.bytegriffin.get4j.store;

import com.bytegriffin.get4j.conf.DefaultConfig;
import com.bytegriffin.get4j.conf.Seed;
import com.bytegriffin.get4j.core.ExceptionCatcher;
import com.bytegriffin.get4j.core.Globals;
import com.bytegriffin.get4j.core.Page;
import com.bytegriffin.get4j.core.Process;
import com.bytegriffin.get4j.download.DiskDownloader;
import com.bytegriffin.get4j.send.EmailSender;
import com.bytegriffin.get4j.util.DateUtil;
import com.bytegriffin.get4j.util.FileUtil;
import java.io.File;
import java.io.IOException;
import java.nio.file.Paths;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;

/* loaded from: input_file:com/bytegriffin/get4j/store/LuceneIndexStorage.class */
public class LuceneIndexStorage implements Process {
    private static final Logger logger = LogManager.getLogger(DiskDownloader.class);
    private static final Analyzer analyzer = new SmartChineseAnalyzer();
    private static final String unique_term = "url";

    @Override // com.bytegriffin.get4j.core.Process
    public void init(Seed seed) {
        String storeLuceneIndex = seed.getStoreLuceneIndex();
        if (DefaultConfig.default_value.equalsIgnoreCase(storeLuceneIndex)) {
            storeLuceneIndex = DefaultConfig.getLuceneIndexPath(seed.getSeedName());
        } else if (!storeLuceneIndex.contains(File.separator) && !storeLuceneIndex.contains(":")) {
            logger.error("Lucene索引文件夹[" + storeLuceneIndex + "]配置出错，请重新检查。");
            System.exit(1);
        } else if (!storeLuceneIndex.contains(seed.getSeedName())) {
            storeLuceneIndex = storeLuceneIndex + File.separator + seed.getSeedName();
        }
        Globals.LUCENE_INDEX_DIR_CACHE.put(seed.getSeedName(), FileUtil.makeDiskDir(storeLuceneIndex));
        initParams(seed.getSeedName());
        logger.info("种子[" + seed.getSeedName() + "]的组件LuceneStorage的初始化完成。");
    }

    private void initParams(String str) {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogDocMergePolicy logDocMergePolicy = new LogDocMergePolicy();
        logDocMergePolicy.setMergeFactor(20);
        indexWriterConfig.setMergePolicy(logDocMergePolicy);
        indexWriterConfig.setRAMBufferSizeMB(100.0d);
        indexWriterConfig.setUseCompoundFile(false);
        try {
            Globals.INDEX_WRITER_CACHE.put(str, new IndexWriter(FSDirectory.open(Paths.get(Globals.LUCENE_INDEX_DIR_CACHE.get(str), new String[0])), indexWriterConfig));
        } catch (Exception e) {
            logger.error("系统初始化种子[" + str + "]的Lucene索引时出错。");
            EmailSender.sendMail(e);
            ExceptionCatcher.addException(str, e);
        }
    }

    @Override // com.bytegriffin.get4j.core.Process
    public void execute(Page page) {
        Document document = new Document();
        document.add(new StringField("seed_name", page.getSeedName(), Field.Store.YES));
        document.add(new StringField(unique_term, page.getUrl(), Field.Store.YES));
        document.add(new StringField("site_host", page.getHost(), Field.Store.YES));
        document.add(new StringField("fetch_time", page.getFetchTime(), Field.Store.YES));
        document.add(new StringField("create_time", DateUtil.getCurrentDate(), Field.Store.YES));
        document.add(new TextField("title", page.getTitle(), Field.Store.YES));
        document.add(new TextField("content", page.getContent(), Field.Store.NO));
        int i = 1;
        for (Object obj : page.getFields().values()) {
            if (obj instanceof String) {
                int i2 = i;
                i++;
                document.add(new TextField("FIELD" + i2, obj.toString(), Field.Store.YES));
            } else if (obj instanceof Integer) {
                int i3 = i;
                i++;
                document.add(new StoredField("FIELD" + i3, Integer.valueOf(obj.toString()).intValue()));
            }
        }
        try {
            IndexWriter indexWriter = Globals.INDEX_WRITER_CACHE.get(page.getSeedName());
            indexWriter.updateDocument(new Term(unique_term, page.getUrl()), document);
            indexWriter.forceMergeDeletes();
            indexWriter.commit();
        } catch (IOException e) {
            EmailSender.sendMail(e);
            ExceptionCatcher.addException(page.getSeedName(), e);
            logger.error("线程[" + Thread.currentThread().getName() + "]保存种子[" + page.getSeedName() + "]url为[" + page.getUrl() + "]到Lucene索引中是出错。", e);
        }
        logger.info("线程[" + Thread.currentThread().getName() + "]保存种子[" + page.getSeedName() + "]url为[" + page.getUrl() + "]到Lucene索引中。");
    }
}
