package org.codelibs.elasticsearch.web.robot.transformer;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.codelibs.elasticsearch.util.settings.SettingsUtils;
import org.codelibs.elasticsearch.web.config.RiverConfig;
import org.codelibs.elasticsearch.web.config.ScrapingRule;
import org.codelibs.robot.RobotCrawlAccessException;
import org.codelibs.robot.RobotSystemException;
import org.codelibs.robot.builder.RequestDataBuilder;
import org.codelibs.robot.entity.AccessResultData;
import org.codelibs.robot.entity.RequestData;
import org.codelibs.robot.entity.ResponseData;
import org.codelibs.robot.entity.ResultData;
import org.codelibs.robot.helper.EncodingHelper;
import org.codelibs.robot.transformer.impl.HtmlTransformer;
import org.codelibs.robot.util.StreamUtil;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.script.ScriptService;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.seasar.framework.beans.BeanDesc;
import org.seasar.framework.beans.factory.BeanDescFactory;
import org.seasar.framework.beans.util.Beans;
import org.seasar.framework.container.SingletonS2Container;
import org.seasar.framework.container.annotation.tiger.InitMethod;
import org.seasar.framework.container.factory.SingletonS2ContainerFactory;
import org.seasar.framework.util.Base64Util;
import org.seasar.framework.util.FileUtil;
import org.seasar.framework.util.MethodUtil;
import org.seasar.framework.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/codelibs/elasticsearch/web/robot/transformer/ScrapingTransformer.class */
public class ScrapingTransformer extends HtmlTransformer {
    private static final long DEFAULT_MAX_ATTACHMENT_SIZE = 1000000;
    private static final String VALUE_QUERY_TYPE = "value";
    private static final String TYPE_QUERY_TYPE = "type";
    private static final String SCRIPT_QUERY_TYPE = "script";
    private static final String ARGS_QUERY_TYPE = "args";
    private static final String IS_ARRAY_PROP_NAME = "isArray";
    private static final String IS_CHILD_URL_PROP_NAME = "isChildUrl";
    private static final String TRIM_SPACES_PROP_NAME = "trimSpaces";
    private static final String TIMESTAMP_FIELD = "@timestamp";
    private static final String POSITION_FIELD = "position";
    private static final String ARRAY_PROPERTY_PREFIX = "[]";
    private static final Logger logger = LoggerFactory.getLogger(ScrapingTransformer.class);
    private static final String[] queryTypes = {"className", "data", "html", "id", "ownText", "tagName", "text", "val", "nodeName", "outerHtml", "attr", "baseUri", "absUrl"};
    protected RiverConfig riverConfig;
    public String[] copiedResonseDataFields = {"url", "parentUrl", "httpStatusCode", "method", "charSet", "contentLength", "mimeType", "executionTime", "lastModified"};
    protected ThreadLocal<Set<String>> childUrlSetLocal = new ThreadLocal<>();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/codelibs/elasticsearch/web/robot/transformer/ScrapingTransformer$ScriptInfo.class */
    public static class ScriptInfo {
        private String script;
        private String lang;
        private String scriptType;

        ScriptInfo(String str) {
            this(str, "groovy", "inline");
        }

        ScriptInfo(String str, String str2, String str3) {
            this.script = str;
            this.lang = str2;
            this.scriptType = str3;
        }

        public String getScript() {
            return this.script;
        }

        public String getLang() {
            return this.lang;
        }

        public String getScriptType() {
            return this.scriptType;
        }
    }

    @InitMethod
    public void init() {
        this.riverConfig = (RiverConfig) SingletonS2Container.getComponent(RiverConfig.class);
    }

    public ResultData transform(ResponseData responseData) {
        try {
            return super.transform(responseData);
        } finally {
            this.childUrlSetLocal.remove();
        }
    }

    protected void updateCharset(ResponseData responseData) {
        int i = this.preloadSizeForCharset;
        ScrapingRule scrapingRule = this.riverConfig.getScrapingRule(responseData);
        if (scrapingRule != null) {
            Integer num = (Integer) scrapingRule.getSetting("preloadSizeForCharset", 0);
            if (num.intValue() > 0) {
                i = num.intValue();
            }
        }
        String loadCharset = loadCharset(responseData.getResponseBody(), i);
        if (loadCharset != null) {
            responseData.setCharSet(loadCharset.trim());
        } else if (this.defaultEncoding == null) {
            responseData.setCharSet("UTF-8");
        } else if (responseData.getCharSet() == null) {
            responseData.setCharSet(this.defaultEncoding);
        }
        if (isSupportedCharset(responseData.getCharSet())) {
            return;
        }
        responseData.setCharSet("UTF-8");
    }

    protected String loadCharset(InputStream inputStream, int i) {
        String str = null;
        try {
            byte[] bArr = new byte[i];
            int read = new BufferedInputStream(inputStream).read(bArr);
            if (read != -1) {
                str = parseCharset(new String(bArr, 0, read));
            }
            try {
                str = ((EncodingHelper) SingletonS2Container.getComponent(EncodingHelper.class)).normalize(str);
            } catch (Exception e) {
            }
            return str;
        } catch (IOException e2) {
            throw new RobotCrawlAccessException("Could not load a content.", e2);
        }
    }

    protected void storeData(ResponseData responseData, ResultData resultData) {
        ScrapingRule scrapingRule = this.riverConfig.getScrapingRule(responseData);
        if (scrapingRule == null) {
            logger.info("No scraping rule.");
            return;
        }
        File file = null;
        try {
            try {
                file = File.createTempFile("river-web-", ".tmp");
                StreamUtil.drain(responseData.getResponseBody(), file);
                processData(scrapingRule, file, responseData, resultData);
                if (file == null || file.delete()) {
                    return;
                }
                logger.warn("Failed to delete " + file.getAbsolutePath());
            } catch (IOException e) {
                throw new RobotSystemException("Failed to create a temp file.", e);
            }
        } catch (Throwable th) {
            if (file != null && !file.delete()) {
                logger.warn("Failed to delete " + file.getAbsolutePath());
            }
            throw th;
        }
    }

    protected void processData(ScrapingRule scrapingRule, File file, ResponseData responseData, ResultData resultData) {
        Object executeScript;
        Map<String, Map<String, Object>> ruleMap = scrapingRule.getRuleMap();
        Document document = null;
        String charSet = responseData.getCharSet();
        if (charSet == null) {
            charSet = "UTF-8";
        }
        if (((Boolean) scrapingRule.getSetting("html", Boolean.TRUE)).booleanValue()) {
            BufferedInputStream bufferedInputStream = null;
            try {
                try {
                    bufferedInputStream = new BufferedInputStream(new FileInputStream(file));
                    document = Jsoup.parse(bufferedInputStream, charSet, responseData.getUrl());
                    IOUtils.closeQuietly(bufferedInputStream);
                } catch (IOException e) {
                    throw new RobotCrawlAccessException("Could not parse " + responseData.getUrl(), e);
                }
            } catch (Throwable th) {
                IOUtils.closeQuietly(bufferedInputStream);
                throw th;
            }
        }
        Map<String, Object> linkedHashMap = new LinkedHashMap<>();
        Beans.copy(responseData, linkedHashMap).includes(this.copiedResonseDataFields).excludesNull().excludesWhitespace().execute();
        if (logger.isDebugEnabled()) {
            logger.debug("ruleMap: " + ruleMap);
            logger.debug("dataMap: " + linkedHashMap);
        }
        for (Map.Entry<String, Map<String, Object>> entry : ruleMap.entrySet()) {
            String key = entry.getKey();
            Map<String, Object> value = entry.getValue();
            boolean booleanValue = ((Boolean) SettingsUtils.get(value, TRIM_SPACES_PROP_NAME, Boolean.FALSE)).booleanValue();
            boolean booleanValue2 = ((Boolean) SettingsUtils.get(value, IS_ARRAY_PROP_NAME, Boolean.FALSE)).booleanValue();
            boolean booleanValue3 = ((Boolean) SettingsUtils.get(value, IS_CHILD_URL_PROP_NAME, Boolean.FALSE)).booleanValue();
            List<String> arrayList = new ArrayList<>();
            Object obj = SettingsUtils.get(value, VALUE_QUERY_TYPE, (Object) null);
            String str = (String) SettingsUtils.get(value, TYPE_QUERY_TYPE, (Object) null);
            if (obj != null) {
                if (obj instanceof String) {
                    arrayList.add(trimSpaces(obj.toString(), booleanValue));
                } else if (obj instanceof List) {
                    Iterator it = ((List) obj).iterator();
                    while (it.hasNext()) {
                        arrayList.add(trimSpaces(it.next().toString(), booleanValue));
                    }
                }
            } else if ("data".equals(str) || "attachment".equals(str)) {
                long longValue = ((Long) SettingsUtils.get(value, "maxFileSize", Long.valueOf(DEFAULT_MAX_ATTACHMENT_SIZE))).longValue();
                long length = file.length();
                if (length <= longValue) {
                    arrayList.add(Base64Util.encode(FileUtil.getBytes(file)));
                    booleanValue2 = false;
                    booleanValue3 = false;
                } else {
                    logger.info("The max file size(" + length + "/" + longValue + " is exceeded: " + responseData.getUrl());
                }
            } else if (document != null) {
                processCssQuery(document, key, value, booleanValue, arrayList);
            }
            ScriptInfo scriptValue = getScriptValue(value);
            if (scriptValue == null) {
                executeScript = booleanValue2 ? arrayList : StringUtils.join(arrayList, " ");
            } else {
                Object client = this.riverConfig.getClient();
                Map<String, Object> hashMap = new HashMap<>();
                hashMap.put("container", SingletonS2ContainerFactory.getContainer());
                hashMap.put("client", client);
                hashMap.put("data", responseData);
                hashMap.put("result", resultData);
                hashMap.put("property", key);
                hashMap.put("parameters", value);
                hashMap.put("array", Boolean.valueOf(booleanValue2));
                hashMap.put("list", arrayList);
                if (booleanValue2) {
                    ArrayList arrayList2 = new ArrayList();
                    for (int i = 0; i < arrayList.size(); i++) {
                        Map<String, Object> hashMap2 = new HashMap<>(hashMap);
                        hashMap2.put("index", Integer.valueOf(i));
                        hashMap2.put(VALUE_QUERY_TYPE, StringUtils.join(arrayList, " "));
                        arrayList2.add(executeScript(scriptValue.getLang(), scriptValue.getScript(), scriptValue.getScriptType(), hashMap2));
                    }
                    executeScript = arrayList2;
                } else {
                    hashMap.put(VALUE_QUERY_TYPE, StringUtils.join(arrayList, " "));
                    executeScript = executeScript(scriptValue.getLang(), scriptValue.getScript(), scriptValue.getScriptType(), hashMap);
                }
            }
            addPropertyData(linkedHashMap, key, executeScript);
            if (booleanValue3) {
                Set<String> set = this.childUrlSetLocal.get();
                if (set == null) {
                    set = new HashSet();
                    this.childUrlSetLocal.set(set);
                }
                if (executeScript instanceof String) {
                    String str2 = (String) executeScript;
                    if (StringUtils.isNotBlank(str2)) {
                        set.add(str2);
                    }
                } else if (executeScript instanceof List) {
                    Iterator it2 = ((List) executeScript).iterator();
                    while (it2.hasNext()) {
                        String obj2 = it2.next().toString();
                        if (StringUtils.isNotBlank(obj2)) {
                            set.add(obj2);
                        }
                    }
                }
            }
        }
        storeIndex(responseData, linkedHashMap);
    }

    private Object executeScript(String str, String str2, String str3, Map<String, Object> map) {
        ScriptService.ScriptType scriptType = ScriptService.ScriptType.FILE.toString().equalsIgnoreCase(str3) ? ScriptService.ScriptType.FILE : ScriptService.ScriptType.INDEXED.toString().equalsIgnoreCase(str3) ? ScriptService.ScriptType.INDEXED : ScriptService.ScriptType.INLINE;
        ScriptService scriptService = this.riverConfig.getScriptService();
        return scriptService.executable(scriptService.compile(str, str2, scriptType), map).run();
    }

    protected ScriptInfo getScriptValue(Map<String, Object> map) {
        Object obj = SettingsUtils.get(map, SCRIPT_QUERY_TYPE, (Object) null);
        if (obj == null) {
            return null;
        }
        if (obj instanceof String) {
            return new ScriptInfo(obj.toString());
        }
        if (obj instanceof List) {
            return new ScriptInfo(StringUtils.join((List) obj, ""));
        }
        if (!(obj instanceof Map)) {
            return null;
        }
        Map map2 = (Map) obj;
        String str = (String) SettingsUtils.get(map2, SCRIPT_QUERY_TYPE);
        if (str == null) {
            return null;
        }
        return new ScriptInfo(str, (String) SettingsUtils.get(map2, "lang", "groovy"), (String) SettingsUtils.get(map2, "script_type", "inline"));
    }

    /* JADX WARN: Multi-variable type inference failed */
    protected void processCssQuery(Document document, String str, Map<String, Object> map, boolean z, List<String> list) {
        for (String str2 : queryTypes) {
            Object obj = SettingsUtils.get(map, str2, (Object) null);
            Element[] elementArr = null;
            if (obj instanceof String) {
                elementArr = getElements(new Element[]{document}, obj.toString());
            } else if (obj instanceof List) {
                elementArr = getElements(new Element[]{document}, (List) obj, str.startsWith(ARRAY_PROPERTY_PREFIX));
            }
            if (elementArr != null) {
                for (Element element : elementArr) {
                    if (element == null) {
                        list.add(null);
                    } else {
                        List<Object> list2 = (List) SettingsUtils.get(map, ARGS_QUERY_TYPE, Collections.emptyList());
                        try {
                            list.add(trimSpaces((String) MethodUtil.invoke(getQueryMethod(element, str2, list2), element, list2.toArray(new Object[list2.size()])), z));
                        } catch (Exception e) {
                            logger.warn("Could not invoke " + str2 + " on " + element, e);
                            list.add(null);
                        }
                    }
                }
                return;
            }
        }
    }

    protected Method getQueryMethod(Element element, String str, List<Object> list) {
        BeanDesc beanDesc = BeanDescFactory.getBeanDesc(element.getClass());
        if (list == null || list.isEmpty()) {
            return beanDesc.getMethod(str);
        }
        Class[] clsArr = new Class[list.size()];
        for (int i = 0; i < clsArr.length; i++) {
            clsArr[i] = String.class;
        }
        return beanDesc.getMethod(str, clsArr);
    }

    protected Element[] getElements(Element[] elementArr, List<String> list, boolean z) {
        Element[] elementArr2 = elementArr;
        for (String str : list) {
            ArrayList arrayList = new ArrayList();
            for (Element element : elementArr2) {
                if (element == null) {
                    arrayList.add(null);
                } else {
                    Element[] elements = getElements(new Element[]{element}, str);
                    if (elements.length == 0 && z) {
                        arrayList.add(null);
                    } else {
                        for (Element element2 : elements) {
                            arrayList.add(element2);
                        }
                    }
                }
            }
            elementArr2 = (Element[]) arrayList.toArray(new Element[arrayList.size()]);
        }
        return elementArr2;
    }

    protected Element[] getElements(Element[] elementArr, String str) {
        Element[] elementArr2 = elementArr;
        Matcher matcher = Pattern.compile(":eq\\(([0-9]+)\\)|:lt\\(([0-9]+)\\)|:gt\\(([0-9]+)\\)").matcher(str);
        StringBuffer stringBuffer = new StringBuffer();
        while (matcher.find()) {
            String group = matcher.group();
            matcher.appendReplacement(stringBuffer, "");
            if (stringBuffer.charAt(stringBuffer.length() - 1) != ' ') {
                try {
                    int parseInt = Integer.parseInt(matcher.group(1));
                    ArrayList arrayList = new ArrayList();
                    String stringBuffer2 = stringBuffer.toString();
                    for (Element element : elementArr2) {
                        Elements select = element.select(stringBuffer2);
                        if (group.startsWith(":eq")) {
                            if (parseInt < select.size()) {
                                arrayList.add(select.get(parseInt));
                            }
                        } else if (group.startsWith(":lt")) {
                            for (int i = 0; i < select.size() && i < parseInt; i++) {
                                arrayList.add(select.get(i));
                            }
                        } else if (group.startsWith(":gt")) {
                            for (int i2 = parseInt + 1; i2 < select.size(); i2++) {
                                arrayList.add(select.get(i2));
                            }
                        }
                    }
                    elementArr2 = (Element[]) arrayList.toArray(new Element[arrayList.size()]);
                    stringBuffer.setLength(0);
                } catch (NumberFormatException e) {
                    logger.warn("Invalid number: " + str, e);
                    stringBuffer.append(group);
                }
            } else {
                stringBuffer.append(group);
            }
        }
        matcher.appendTail(stringBuffer);
        String stringBuffer3 = stringBuffer.toString();
        if (StringUtil.isNotBlank(stringBuffer3)) {
            ArrayList arrayList2 = new ArrayList();
            for (Element element2 : elementArr2) {
                if (element2 == null) {
                    arrayList2.add(null);
                } else {
                    Elements select2 = element2.select(stringBuffer3);
                    for (int i3 = 0; i3 < select2.size(); i3++) {
                        arrayList2.add(select2.get(i3));
                    }
                }
            }
            elementArr2 = (Element[]) arrayList2.toArray(new Element[arrayList2.size()]);
        }
        return elementArr2;
    }

    protected String trimSpaces(String str, boolean z) {
        if (str == null) {
            return null;
        }
        return z ? str.replaceAll("\\s+", " ").trim() : str;
    }

    protected void addPropertyData(Map<String, Object> map, String str, Object obj) {
        Map<String, Object> map2 = map;
        String[] split = str.split("\\.");
        for (int i = 0; i < split.length - 1; i++) {
            String str2 = split[i];
            Map<String, Object> map3 = (Map) map2.get(str2);
            if (map3 == null) {
                map3 = new LinkedHashMap();
                map2.put(str2, map3);
            }
            map2 = map3;
        }
        map2.put(split[split.length - 1], obj);
    }

    protected void storeIndex(ResponseData responseData, Map<String, Object> map) {
        int size;
        String sessionId = responseData.getSessionId();
        String indexName = this.riverConfig.getIndexName(sessionId);
        String typeName = this.riverConfig.getTypeName(sessionId);
        boolean isOverwrite = this.riverConfig.isOverwrite(sessionId);
        Client client = this.riverConfig.getClient();
        if (logger.isDebugEnabled()) {
            logger.debug("Index: " + indexName + ", sessionId: " + sessionId + ", Data: " + map);
        }
        if (isOverwrite) {
            client.prepareDeleteByQuery(new String[]{indexName}).setQuery(QueryBuilders.termQuery("url", responseData.getUrl())).execute().actionGet();
            client.admin().indices().prepareRefresh(new String[]{indexName}).execute().actionGet();
        }
        Map<String, Object> map2 = (Map) map.remove(ARRAY_PROPERTY_PREFIX);
        if (map2 == null) {
            storeIndex(client, indexName, typeName, map);
            return;
        }
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        convertFlatMap("", map2, linkedHashMap);
        int i = 0;
        Iterator<Map.Entry<String, Object>> it = linkedHashMap.entrySet().iterator();
        while (it.hasNext()) {
            Object value = it.next().getValue();
            if ((value instanceof List) && (size = ((List) value).size()) > i) {
                i = size;
            }
        }
        for (int i2 = 0; i2 < i; i2++) {
            LinkedHashMap linkedHashMap2 = new LinkedHashMap();
            linkedHashMap2.put(POSITION_FIELD, Integer.valueOf(i2));
            deepCopy(map, linkedHashMap2);
            for (Map.Entry<String, Object> entry : linkedHashMap.entrySet()) {
                Object value2 = entry.getValue();
                if (value2 instanceof List) {
                    List list = (List) value2;
                    if (i2 < list.size()) {
                        addPropertyData(linkedHashMap2, entry.getKey(), list.get(i2));
                    }
                } else if (i2 == 0) {
                    addPropertyData(linkedHashMap2, entry.getKey(), value2);
                }
            }
            storeIndex(client, indexName, typeName, linkedHashMap2);
        }
    }

    protected void storeIndex(Client client, String str, String str2, Map<String, Object> map) {
        map.put(TIMESTAMP_FIELD, new Date());
        if (logger.isDebugEnabled()) {
            logger.debug(str + "/" + str2 + " : dataMap" + map);
        }
        try {
            client.prepareIndex(str, str2).setRefresh(true).setSource(XContentFactory.jsonBuilder().value(map)).execute().actionGet();
        } catch (Exception e) {
            logger.warn("Could not write a content into index.", e);
        }
    }

    protected void deepCopy(Map<String, Object> map, Map<String, Object> map2) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        convertFlatMap("", map, linkedHashMap);
        for (Map.Entry<String, Object> entry : linkedHashMap.entrySet()) {
            addPropertyData(map2, entry.getKey(), entry.getValue());
        }
    }

    protected void convertFlatMap(String str, Map<String, Object> map, Map<String, Object> map2) {
        for (Map.Entry<String, Object> entry : map.entrySet()) {
            Object value = entry.getValue();
            if (value instanceof Map) {
                convertFlatMap(str + entry.getKey() + ".", (Map) value, map2);
            } else {
                map2.put(str + entry.getKey(), value);
            }
        }
    }

    protected void storeChildUrls(ResponseData responseData, ResultData resultData) {
        Set<String> set = this.childUrlSetLocal.get();
        if (set == null) {
            super.storeChildUrls(responseData, resultData);
            return;
        }
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = set.iterator();
        while (it.hasNext()) {
            arrayList.add(RequestDataBuilder.newRequestData().get().url(it.next()).build());
        }
        resultData.addAllUrl(convertChildUrlList(arrayList));
        RequestData requestData = responseData.getRequestData();
        resultData.removeUrl(requestData);
        resultData.removeUrl(getDuplicateUrl(requestData));
    }

    public Object getData(AccessResultData accessResultData) {
        return null;
    }
}
