package org.codelibs.elasticsearch.web.river;

import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.lang.time.DateUtils;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.NTCredentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.impl.auth.AuthSchemeBase;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.auth.DigestScheme;
import org.apache.http.impl.auth.NTLMScheme;
import org.codelibs.elasticsearch.quartz.service.ScheduleService;
import org.codelibs.elasticsearch.util.lang.StringUtils;
import org.codelibs.elasticsearch.util.settings.SettingsUtils;
import org.codelibs.elasticsearch.web.config.RiverConfig;
import org.codelibs.elasticsearch.web.robot.service.EsDataService;
import org.codelibs.elasticsearch.web.robot.service.EsUrlFilterService;
import org.codelibs.elasticsearch.web.robot.service.EsUrlQueueService;
import org.elasticsearch.action.admin.indices.mapping.delete.DeleteMappingResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.river.AbstractRiverComponent;
import org.elasticsearch.river.River;
import org.elasticsearch.river.RiverName;
import org.elasticsearch.river.RiverSettings;
import org.elasticsearch.script.ScriptService;
import org.quartz.CronScheduleBuilder;
import org.quartz.Job;
import org.quartz.JobBuilder;
import org.quartz.JobDataMap;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.quartz.JobKey;
import org.quartz.TriggerBuilder;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.container.factory.SingletonS2ContainerFactory;
import org.seasar.framework.util.StringUtil;
import org.seasar.robot.S2Robot;
import org.seasar.robot.S2RobotContext;
import org.seasar.robot.client.http.Authentication;
import org.seasar.robot.client.http.RequestHeader;
import org.seasar.robot.client.http.impl.AuthenticationImpl;
import org.seasar.robot.client.http.ntlm.JcifsEngine;

/* loaded from: input_file:org/codelibs/elasticsearch/web/river/WebRiver.class */
public class WebRiver extends AbstractRiverComponent implements River {
    private static final ESLogger logger = Loggers.getLogger(WebRiver.class);
    private static final String RIVER_NAME = "riverName";
    private static final String SETTINGS = "settings";
    private static final String RUNNING_JOB = "runningJob";
    private static final String SCRIPT_SERVICE = "scriptService";
    private static final String TRIGGER_ID_SUFFIX = "Trigger";
    private static final String JOB_ID_SUFFIX = "Job";
    private static final String ES_CLIENT = "esClient";
    private static final String DEFAULT_USER_AGENT = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Elasticsearch River Web/1.1.2)";
    private static final String NTLM_SCHEME = "NTLM";
    private static final String DIGEST_SCHEME = "DIGEST";
    private static final String BASIC_SCHEME = "BASIC";
    private static final String ONE_TIME = "oneTime";
    private static final String EMPTY_STRING = "";
    private final Client client;
    private final ScheduleService scheduleService;
    private String groupId;
    private String id;
    private AtomicReference<CrawlJob> runningJob;
    private ScriptService scriptService;

    /* loaded from: input_file:org/codelibs/elasticsearch/web/river/WebRiver$CrawlJob.class */
    public static class CrawlJob implements Job {
        private S2Robot s2Robot;

        public void execute(JobExecutionContext jobExecutionContext) throws JobExecutionException {
            Object obj;
            JobDataMap mergedJobDataMap = jobExecutionContext.getMergedJobDataMap();
            AtomicReference atomicReference = (AtomicReference) mergedJobDataMap.get(WebRiver.RUNNING_JOB);
            ScriptService scriptService = (ScriptService) mergedJobDataMap.get(WebRiver.SCRIPT_SERVICE);
            if (!atomicReference.compareAndSet(null, this)) {
                WebRiver.logger.info(jobExecutionContext.getJobDetail().getKey() + " is running.", new Object[0]);
                return;
            }
            RiverName riverName = (RiverName) mergedJobDataMap.get(WebRiver.RIVER_NAME);
            String uuid = UUID.randomUUID().toString();
            Client client = getClient(mergedJobDataMap);
            HashMap hashMap = new HashMap();
            hashMap.put(WebRiver.RIVER_NAME, riverName);
            hashMap.put("sessionId", uuid);
            hashMap.put("client", client);
            RiverConfig riverConfig = null;
            Map map = ((RiverSettings) mergedJobDataMap.get(WebRiver.SETTINGS)).settings();
            try {
                WebRiver.executeScript(scriptService, map, hashMap, "execute");
                Map map2 = (Map) map.get("crawl");
                if (map2 == null) {
                    WebRiver.logger.warn("No settings for crawling.", new Object[0]);
                    WebRiver.executeScript(scriptService, map, hashMap, "finish");
                    atomicReference.set(null);
                    if (0 != 0) {
                        riverConfig.cleanup(uuid);
                    }
                    try {
                        ((EsUrlQueueService) SingletonS2Container.getComponent(EsUrlQueueService.class)).delete(uuid);
                    } catch (Exception e) {
                        WebRiver.logger.warn("Failed to delete ", e, new Object[0]);
                    }
                    ((EsDataService) SingletonS2Container.getComponent(EsDataService.class)).delete(uuid);
                    ((EsUrlFilterService) SingletonS2Container.getComponent(EsUrlFilterService.class)).delete(uuid);
                    if (mergedJobDataMap.get(WebRiver.ONE_TIME) == null || client == null) {
                        return;
                    }
                    DeleteMappingResponse deleteMappingResponse = (DeleteMappingResponse) client.admin().indices().prepareDeleteMapping(new String[]{"_river"}).setType(new String[]{riverName.name()}).execute().actionGet();
                    if (deleteMappingResponse.isAcknowledged()) {
                        WebRiver.logger.info("Deleted one time river: " + riverName.name(), new Object[0]);
                        return;
                    } else {
                        WebRiver.logger.warn("Failed to delete " + riverName.name() + ". Resposne: " + deleteMappingResponse.toString(), new Object[0]);
                        return;
                    }
                }
                List<Map> list = (List) map2.get("target");
                if (list == null || list.isEmpty()) {
                    WebRiver.logger.warn("No targets for crawling.", new Object[0]);
                    WebRiver.executeScript(scriptService, map, hashMap, "finish");
                    atomicReference.set(null);
                    if (0 != 0) {
                        riverConfig.cleanup(uuid);
                    }
                    try {
                        ((EsUrlQueueService) SingletonS2Container.getComponent(EsUrlQueueService.class)).delete(uuid);
                    } catch (Exception e2) {
                        WebRiver.logger.warn("Failed to delete ", e2, new Object[0]);
                    }
                    ((EsDataService) SingletonS2Container.getComponent(EsDataService.class)).delete(uuid);
                    ((EsUrlFilterService) SingletonS2Container.getComponent(EsUrlFilterService.class)).delete(uuid);
                    if (mergedJobDataMap.get(WebRiver.ONE_TIME) == null || client == null) {
                        return;
                    }
                    DeleteMappingResponse deleteMappingResponse2 = (DeleteMappingResponse) client.admin().indices().prepareDeleteMapping(new String[]{"_river"}).setType(new String[]{riverName.name()}).execute().actionGet();
                    if (deleteMappingResponse2.isAcknowledged()) {
                        WebRiver.logger.info("Deleted one time river: " + riverName.name(), new Object[0]);
                        return;
                    } else {
                        WebRiver.logger.warn("Failed to delete " + riverName.name() + ". Resposne: " + deleteMappingResponse2.toString(), new Object[0]);
                        return;
                    }
                }
                this.s2Robot = (S2Robot) SingletonS2Container.getComponent(S2Robot.class);
                this.s2Robot.setSessionId(uuid);
                HashMap hashMap2 = new HashMap();
                this.s2Robot.getClientFactory().setInitParameterMap(hashMap2);
                String str = (String) SettingsUtils.get(map2, "userAgent", WebRiver.DEFAULT_USER_AGENT);
                if (StringUtil.isNotBlank(str)) {
                    hashMap2.put("userAgent", str);
                }
                hashMap2.put("robotsTxtEnabled", (Boolean) SettingsUtils.get(map2, "robotsTxt", Boolean.TRUE));
                Map map3 = (Map) SettingsUtils.get(map2, "proxy", (Object) null);
                if (map3 != null && (obj = map3.get("host")) != null) {
                    hashMap2.put("proxyHost", obj);
                    Object obj2 = map3.get("port");
                    if (obj2 instanceof Integer) {
                        hashMap2.put("proxyPort", obj2);
                    } else {
                        hashMap2.put("proxyPort", 8080);
                    }
                }
                List<Map> list2 = (List) SettingsUtils.get(map2, "authentications", (Object) null);
                if (list2 != null && !list2.isEmpty()) {
                    ArrayList arrayList = new ArrayList();
                    for (Map map4 : list2) {
                        String upperCase = ((String) SettingsUtils.get((Map) map4.get("scope"), "scheme", WebRiver.EMPTY_STRING)).toUpperCase(Locale.ENGLISH);
                        if (StringUtil.isBlank(upperCase)) {
                            WebRiver.logger.warn("Invalid authentication: " + map4, new Object[0]);
                        } else {
                            Map map5 = (Map) map4.get("credentials");
                            String str2 = (String) SettingsUtils.get(map5, "username", (Object) null);
                            if (StringUtil.isBlank(str2)) {
                                WebRiver.logger.warn("Invalid authentication: " + map4, new Object[0]);
                            } else {
                                String str3 = (String) SettingsUtils.get(map4, "host", AuthScope.ANY_HOST);
                                int intValue = ((Integer) SettingsUtils.get(map4, "port", -1)).intValue();
                                String str4 = (String) SettingsUtils.get(map4, "realm", AuthScope.ANY_REALM);
                                String str5 = (String) SettingsUtils.get(map5, "password", (Object) null);
                                AuthSchemeBase authSchemeBase = null;
                                Credentials credentials = null;
                                if (WebRiver.BASIC_SCHEME.equalsIgnoreCase(upperCase)) {
                                    authSchemeBase = new BasicScheme();
                                    credentials = new UsernamePasswordCredentials(str2, str5);
                                } else if (WebRiver.DIGEST_SCHEME.equals(upperCase)) {
                                    authSchemeBase = new DigestScheme();
                                    credentials = new UsernamePasswordCredentials(str2, str5);
                                } else if (WebRiver.NTLM_SCHEME.equals(upperCase)) {
                                    authSchemeBase = new NTLMScheme(new JcifsEngine());
                                    upperCase = AuthScope.ANY_SCHEME;
                                    String str6 = (String) SettingsUtils.get(map5, "workstation", (Object) null);
                                    String str7 = (String) SettingsUtils.get(map5, "domain", (Object) null);
                                    credentials = new NTCredentials(str2, str5, str6 == null ? WebRiver.EMPTY_STRING : str6, str7 == null ? WebRiver.EMPTY_STRING : str7);
                                }
                                arrayList.add(new AuthenticationImpl(new AuthScope(str3, intValue, str4, upperCase), credentials, authSchemeBase));
                            }
                        }
                    }
                    hashMap2.put("basicAuthentications", arrayList.toArray(new Authentication[arrayList.size()]));
                }
                List<Map> list3 = (List) SettingsUtils.get(map2, "headers", (Object) null);
                if (list3 != null && !list3.isEmpty()) {
                    ArrayList arrayList2 = new ArrayList();
                    for (Map map6 : list3) {
                        String str8 = (String) SettingsUtils.get(map6, "name", (Object) null);
                        String str9 = (String) SettingsUtils.get(map6, "value", (Object) null);
                        if (str8 != null && str9 != null) {
                            arrayList2.add(new RequestHeader(str8, str9));
                        }
                    }
                    hashMap2.put("requestHeaders", arrayList2.toArray(new RequestHeader[arrayList2.size()]));
                }
                List list4 = (List) map2.get("url");
                if (list4 == null || list4.isEmpty()) {
                    WebRiver.logger.warn("No url for crawling.", new Object[0]);
                    WebRiver.executeScript(scriptService, map, hashMap, "finish");
                    atomicReference.set(null);
                    if (0 != 0) {
                        riverConfig.cleanup(uuid);
                    }
                    try {
                        ((EsUrlQueueService) SingletonS2Container.getComponent(EsUrlQueueService.class)).delete(uuid);
                    } catch (Exception e3) {
                        WebRiver.logger.warn("Failed to delete ", e3, new Object[0]);
                    }
                    ((EsDataService) SingletonS2Container.getComponent(EsDataService.class)).delete(uuid);
                    ((EsUrlFilterService) SingletonS2Container.getComponent(EsUrlFilterService.class)).delete(uuid);
                    if (mergedJobDataMap.get(WebRiver.ONE_TIME) == null || client == null) {
                        return;
                    }
                    DeleteMappingResponse deleteMappingResponse3 = (DeleteMappingResponse) client.admin().indices().prepareDeleteMapping(new String[]{"_river"}).setType(new String[]{riverName.name()}).execute().actionGet();
                    if (deleteMappingResponse3.isAcknowledged()) {
                        WebRiver.logger.info("Deleted one time river: " + riverName.name(), new Object[0]);
                        return;
                    } else {
                        WebRiver.logger.warn("Failed to delete " + riverName.name() + ". Resposne: " + deleteMappingResponse3.toString(), new Object[0]);
                        return;
                    }
                }
                Iterator it = list4.iterator();
                while (it.hasNext()) {
                    this.s2Robot.addUrl((String) it.next());
                }
                List list5 = (List) map2.get("includeFilter");
                if (list5 != null) {
                    Iterator it2 = list5.iterator();
                    while (it2.hasNext()) {
                        this.s2Robot.addIncludeFilter((String) it2.next());
                    }
                }
                List list6 = (List) map2.get("excludeFilter");
                if (list6 != null) {
                    Iterator it3 = list6.iterator();
                    while (it3.hasNext()) {
                        this.s2Robot.addExcludeFilter((String) it3.next());
                    }
                }
                S2RobotContext robotContext = this.s2Robot.getRobotContext();
                robotContext.setMaxDepth(((Integer) SettingsUtils.get(map2, "maxDepth", -1)).intValue());
                robotContext.setMaxAccessCount(((Integer) SettingsUtils.get(map2, "maxAccessCount", 100)).intValue());
                robotContext.setNumOfThread(((Integer) SettingsUtils.get(map2, "numOfThread", 5)).intValue());
                this.s2Robot.getIntervalController().setDelayMillisForWaitingNewUrl(((Integer) SettingsUtils.get(map2, "interval", 1000)).intValue());
                HashMap hashMap3 = new HashMap();
                hashMap3.put("index", SettingsUtils.get(map2, "index", "web"));
                hashMap3.put("type", SettingsUtils.get(map2, "type", riverName.getName()));
                hashMap3.put("overwrite", SettingsUtils.get(map2, "overwrite", Boolean.FALSE));
                hashMap3.put("incremental", SettingsUtils.get(map2, "incremental", Boolean.FALSE));
                RiverConfig riverConfig2 = (RiverConfig) SingletonS2Container.getComponent(RiverConfig.class);
                riverConfig2.createLock(uuid);
                riverConfig2.addRiverParams(uuid, hashMap3);
                for (Map map7 : list) {
                    Map<String, Object> map8 = (Map) map7.get("pattern");
                    Map<String, Map<String, Object>> map9 = (Map) map7.get("properties");
                    if (map8 == null || map9 == null) {
                        WebRiver.logger.warn("Invalid pattern or target: patternMap: " + map8 + ", propMap: " + map9, new Object[0]);
                    } else {
                        if (WebRiver.logger.isDebugEnabled()) {
                            WebRiver.logger.debug("patternMap: " + map8, new Object[0]);
                            WebRiver.logger.debug("propMap: " + map9, new Object[0]);
                        }
                        riverConfig2.addScrapingRule(uuid, (Map) map7.get(WebRiver.SETTINGS), map8, map9);
                    }
                }
                this.s2Robot.execute();
                this.s2Robot.stop();
                WebRiver.executeScript(scriptService, map, hashMap, "finish");
                atomicReference.set(null);
                if (riverConfig2 != null) {
                    riverConfig2.cleanup(uuid);
                }
                try {
                    ((EsUrlQueueService) SingletonS2Container.getComponent(EsUrlQueueService.class)).delete(uuid);
                } catch (Exception e4) {
                    WebRiver.logger.warn("Failed to delete ", e4, new Object[0]);
                }
                ((EsDataService) SingletonS2Container.getComponent(EsDataService.class)).delete(uuid);
                ((EsUrlFilterService) SingletonS2Container.getComponent(EsUrlFilterService.class)).delete(uuid);
                if (mergedJobDataMap.get(WebRiver.ONE_TIME) == null || client == null) {
                    return;
                }
                DeleteMappingResponse deleteMappingResponse4 = (DeleteMappingResponse) client.admin().indices().prepareDeleteMapping(new String[]{"_river"}).setType(new String[]{riverName.name()}).execute().actionGet();
                if (deleteMappingResponse4.isAcknowledged()) {
                    WebRiver.logger.info("Deleted one time river: " + riverName.name(), new Object[0]);
                } else {
                    WebRiver.logger.warn("Failed to delete " + riverName.name() + ". Resposne: " + deleteMappingResponse4.toString(), new Object[0]);
                }
            } catch (Throwable th) {
                WebRiver.executeScript(scriptService, map, hashMap, "finish");
                atomicReference.set(null);
                if (0 != 0) {
                    riverConfig.cleanup(uuid);
                }
                try {
                    ((EsUrlQueueService) SingletonS2Container.getComponent(EsUrlQueueService.class)).delete(uuid);
                } catch (Exception e5) {
                    WebRiver.logger.warn("Failed to delete ", e5, new Object[0]);
                }
                ((EsDataService) SingletonS2Container.getComponent(EsDataService.class)).delete(uuid);
                ((EsUrlFilterService) SingletonS2Container.getComponent(EsUrlFilterService.class)).delete(uuid);
                if (mergedJobDataMap.get(WebRiver.ONE_TIME) != null && client != null) {
                    DeleteMappingResponse deleteMappingResponse5 = (DeleteMappingResponse) client.admin().indices().prepareDeleteMapping(new String[]{"_river"}).setType(new String[]{riverName.name()}).execute().actionGet();
                    if (deleteMappingResponse5.isAcknowledged()) {
                        WebRiver.logger.info("Deleted one time river: " + riverName.name(), new Object[0]);
                    } else {
                        WebRiver.logger.warn("Failed to delete " + riverName.name() + ". Resposne: " + deleteMappingResponse5.toString(), new Object[0]);
                    }
                }
                throw th;
            }
        }

        private Client getClient(JobDataMap jobDataMap) {
            Object obj = jobDataMap.get(WebRiver.ES_CLIENT);
            if (obj instanceof Client) {
                return (Client) obj;
            }
            return null;
        }

        public void stop() {
            if (this.s2Robot != null) {
                this.s2Robot.stop();
            }
        }
    }

    @Inject
    public WebRiver(RiverName riverName, RiverSettings riverSettings, Client client, ScheduleService scheduleService, ScriptService scriptService) {
        super(riverName, riverSettings);
        this.runningJob = new AtomicReference<>();
        this.client = client;
        this.scheduleService = scheduleService;
        this.scriptService = scriptService;
        this.groupId = riverName.type() == null ? "web" : riverName.type();
        this.id = riverName.name();
        logger.info("Creating WebRiver: " + this.id, new Object[0]);
    }

    public void start() {
        logger.info("Scheduling CrawlJob...", new Object[0]);
        if (this.scheduleService == null) {
            logger.warn("Elasticsearch River Web plugin depends on Elasticsearch Quartz plugin, but it's not found. River Web plugin does not start.", new Object[0]);
            return;
        }
        JobDataMap jobDataMap = new JobDataMap();
        String str = null;
        Map map = (Map) this.settings.settings().get("schedule");
        if (map != null) {
            str = (String) map.get("cron");
        }
        if (str == null) {
            Date date = new Date();
            DateUtils.addSeconds(date, 60);
            str = new SimpleDateFormat("s m H d M ? yyyy").format(date);
            jobDataMap.put(ONE_TIME, Boolean.TRUE);
        }
        jobDataMap.put(RIVER_NAME, this.riverName);
        jobDataMap.put(SETTINGS, this.settings);
        jobDataMap.put(ES_CLIENT, this.client);
        jobDataMap.put(RUNNING_JOB, this.runningJob);
        jobDataMap.put(SCRIPT_SERVICE, this.scriptService);
        HashMap hashMap = new HashMap();
        hashMap.put(RIVER_NAME, this.riverName);
        hashMap.put("client", this.client);
        executeScript(this.scriptService, this.settings.settings(), hashMap, "start");
        this.scheduleService.scheduleJob(JobBuilder.newJob(CrawlJob.class).withIdentity(this.id + JOB_ID_SUFFIX, this.groupId).usingJobData(jobDataMap).build(), TriggerBuilder.newTrigger().withIdentity(this.id + TRIGGER_ID_SUFFIX, this.groupId).withSchedule(CronScheduleBuilder.cronSchedule(str)).startNow().build());
    }

    public void close() {
        if (this.scheduleService == null) {
            return;
        }
        HashMap hashMap = new HashMap();
        hashMap.put(RIVER_NAME, this.riverName);
        hashMap.put("client", this.client);
        executeScript(this.scriptService, this.settings.settings(), hashMap, "close");
        logger.info("Unscheduling  CrawlJob...", new Object[0]);
        CrawlJob crawlJob = this.runningJob.get();
        if (crawlJob != null) {
            crawlJob.stop();
        }
        this.scheduleService.deleteJob(JobKey.jobKey(this.id + JOB_ID_SUFFIX, this.groupId));
    }

    protected static void executeScript(ScriptService scriptService, Map<String, Object> map, Map<String, Object> map2, String str) {
        Map map3 = (Map) SettingsUtils.get((Map) SettingsUtils.get(map, "crawl"), "script");
        String str2 = (String) SettingsUtils.get(map3, str);
        String str3 = (String) SettingsUtils.get(map3, "lang", "groovy");
        String str4 = (String) SettingsUtils.get(map3, "script_type", "inline");
        ScriptService.ScriptType scriptType = ScriptService.ScriptType.FILE.toString().equalsIgnoreCase(str4) ? ScriptService.ScriptType.FILE : ScriptService.ScriptType.INDEXED.toString().equalsIgnoreCase(str4) ? ScriptService.ScriptType.INDEXED : ScriptService.ScriptType.INLINE;
        if (StringUtils.isNotBlank(str2)) {
            HashMap hashMap = new HashMap(map2);
            hashMap.put("container", SingletonS2ContainerFactory.getContainer());
            hashMap.put(SETTINGS, map);
            try {
                logger.info("[{}] \"{}\" => {}", new Object[]{str, str2, scriptService.execute(scriptService.compile(str3, str2, scriptType), hashMap)});
            } catch (Exception e) {
                logger.warn("Failed to execute script: {}", e, new Object[]{str2});
            }
        }
    }
}
