package itez.plat.site.service.impl;

import com.beust.jcommander.internal.Lists;
import com.beust.jcommander.internal.Sets;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import itez.core.runtime.service.Define;
import itez.core.runtime.service.EModelService;
import itez.core.wrapper.dbo.model.Query;
import itez.core.wrapper.dbo.model.Querys;
import itez.kit.EArr;
import itez.kit.EClean;
import itez.kit.EDate;
import itez.kit.EHttp;
import itez.kit.EProp;
import itez.kit.ERegex;
import itez.kit.EStr;
import itez.kit.EUid;
import itez.kit.fileup.EFileKit;
import itez.kit.restful.EMap;
import itez.plat.main.service.impl.ImportSeviceImpl;
import itez.plat.site.ModuleConfig;
import itez.plat.site.model.Channel;
import itez.plat.site.model.CollectorItem;
import itez.plat.site.model.CollectorSrc;
import itez.plat.site.model.CollectorTask;
import itez.plat.site.model.Content;
import itez.plat.site.service.ChannelService;
import itez.plat.site.service.CollectorItemService;
import itez.plat.site.service.CollectorSrcService;
import itez.plat.site.service.CollectorTaskService;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.imageio.ImageIO;
import org.apache.commons.lang3.ArrayUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.select.Elements;

@Singleton
@Define
/* loaded from: input_file:itez/plat/site/service/impl/CollectorTaskServiceImpl.class */
public class CollectorTaskServiceImpl extends EModelService<CollectorTask> implements CollectorTaskService {

    @Inject
    ChannelService chnSer;

    @Inject
    CollectorSrcService srcSer;

    @Inject
    CollectorItemService itemSer;
    private Whitelist whitelist = Whitelist.basic().addTags(new String[]{"a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "i", "img", "li", "ol", "p", "pre", "small", "span", "strike", "strong", "sub", "sup", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul"}).addAttributes("div", new String[]{"style", "class"}).addAttributes("p", new String[]{"style", "class"}).addAttributes("span", new String[]{"style", "class"}).addAttributes("i", new String[]{"style", "class"}).addAttributes("a", new String[]{"href", "title", "style", "class"}).addAttributes("img", new String[]{"src", "title", "style", "class"}).addAttributes("ul", new String[]{ImportSeviceImpl.COLS_KEY_TYPE, "style", "class"}).addAttributes("ol", new String[]{"start", ImportSeviceImpl.COLS_KEY_TYPE, "style", "class"}).addAttributes("table", new String[]{"summary", "style", "class"}).addAttributes("td", new String[]{"abbr", "axis", "colspan", "rowspan", "style", "class"}).addAttributes("th", new String[]{"abbr", "axis", "colspan", "rowspan", "scope", "style", "class"});

    @Override // itez.plat.site.service.CollectorTaskService
    public List<CollectorTask> getTasks(String str) {
        return select(Querys.and(Query.eq("channelCode", str)));
    }

    @Override // itez.plat.site.service.CollectorTaskService
    public CollectorTask getTask(String str, String str2, String str3) {
        return selectFirst(Querys.and(Query.eq("channelCode", str)).add(Query.eq("srcId", str2)).add(Query.eq("url", str3)));
    }

    @Override // itez.plat.site.service.CollectorTaskService
    public int collList(CollectorTask collectorTask) {
        String itemsHash = this.itemSer.getItemsHash(collectorTask.getId());
        Integer[] numArr = new Integer[0];
        if (EStr.notEmpty(itemsHash)) {
            numArr = (Integer[]) Arrays.stream(itemsHash.split(",")).map(str -> {
                return Integer.valueOf(Integer.parseInt(str));
            }).toArray(i -> {
                return new Integer[i];
            });
        }
        List<CollectorItem> newArrayList = Lists.newArrayList();
        collListPage(collectorTask, this.srcSer.mo124findById(collectorTask.getSrcId()), Sets.newHashSet(), numArr, newArrayList, collectorTask.getUrl(), 0);
        if (newArrayList.size() > 0) {
            dbo().tx(() -> {
                return EArr.vali(new int[]{dbo().batchSave(newArrayList, 50)});
            });
        }
        return newArrayList.size();
    }

    private void collListPage(CollectorTask collectorTask, CollectorSrc collectorSrc, Set<Integer> set, Integer[] numArr, List<CollectorItem> list, String str, int i) {
        String $domain = $domain();
        String urlContent = getUrlContent(str);
        int hashCode = urlContent.hashCode();
        if (set.contains(Integer.valueOf(hashCode))) {
            return;
        }
        set.add(Integer.valueOf(hashCode));
        Document parse = Jsoup.parse(new String(EHttp.me.getByte(urlContent), collectorSrc.getCharsetList().equals("UTF8") ? EStr.UTF_8 : EStr.GBK), urlContent);
        Elements select = parse.select(collectorSrc.getElList());
        if (select.size() > 0) {
            select.select(collectorSrc.getElLink()).forEach(element -> {
                String urlContent2 = getUrlContent(element.absUrl("href"));
                String text = element.text();
                if (EStr.notEmpty(text) && EStr.notEmpty(urlContent2) && !urlContent2.startsWith("javascript")) {
                    int hashCode2 = urlContent2.hashCode();
                    if (ArrayUtils.contains(numArr, Integer.valueOf(hashCode2))) {
                        return;
                    }
                    CollectorItem collectorItem = new CollectorItem();
                    collectorItem.setId(EUid.generator()).setDomain($domain).setTaskId(collectorTask.getId());
                    collectorItem.setUrl(urlContent2).setUrlHash(Integer.valueOf(hashCode2)).setCaption(text);
                    collectorItem.setState(false).setCdate(EDate.getDate());
                    list.add(collectorItem);
                }
            });
        }
        if (EStr.notEmpty(collectorSrc.getElPrev())) {
            Elements select2 = parse.select(collectorSrc.getElPrev());
            if ((i == 0 || i == -1) && select2.size() > 0) {
                String urlContent2 = getUrlContent(((Element) select2.get(0)).absUrl("href"));
                if (EStr.notEmpty(urlContent2) && !urlContent2.startsWith("javascript")) {
                    collListPage(collectorTask, collectorSrc, set, numArr, list, urlContent2, -1);
                }
            }
        }
        if (EStr.notEmpty(collectorSrc.getElNext())) {
            Elements select3 = parse.select(collectorSrc.getElNext());
            if ((i == 0 || i == 1) && select3.size() > 0) {
                String urlContent3 = getUrlContent(((Element) select3.get(0)).absUrl("href"));
                if (!EStr.notEmpty(urlContent3) || urlContent3.startsWith("javascript")) {
                    return;
                }
                collListPage(collectorTask, collectorSrc, set, numArr, list, urlContent3, 1);
            }
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // itez.plat.site.service.CollectorTaskService
    public int collItem(CollectorTask collectorTask, Boolean bool, Boolean bool2) {
        Channel channel = (Channel) this.chnSer.findByCode(collectorTask.getChannelCode());
        String $domain = $domain();
        String caption = $comp().getCaption();
        CollectorSrc findById = this.srcSer.mo124findById(collectorTask.getSrcId());
        List newArrayList = Lists.newArrayList();
        List<CollectorItem> items = this.itemSer.getItems(collectorTask.getId(), bool.booleanValue() ? null : false);
        Iterator<CollectorItem> it = items.iterator();
        while (it.hasNext()) {
            Content collItemDetail = collItemDetail($domain, channel, findById, bool2, it.next(), false);
            if (null != collItemDetail) {
                collItemDetail.setAuthor(caption);
                newArrayList.add(collItemDetail);
            }
        }
        if (newArrayList.size() > 0) {
            try {
                dbo().tx(() -> {
                    return EArr.vali(new int[]{dbo().batchSave(newArrayList, 50), dbo().batchUpdate(items, 50)});
                });
            } catch (Exception e) {
                throw e;
            }
        }
        return newArrayList.size();
    }

    @Override // itez.plat.site.service.CollectorTaskService
    public Content collItemDetail(String str, Channel channel, CollectorSrc collectorSrc, Boolean bool, CollectorItem collectorItem, boolean z) {
        String url = collectorItem.getUrl();
        byte[] bArr = EHttp.me.getByte(url);
        if (bArr == null) {
            if (z) {
                throw new RuntimeException("访问URL失败！");
            }
            return null;
        }
        Document parse = Jsoup.parse(new String(bArr, collectorSrc.getCharsetItem().equals("UTF8") ? EStr.UTF_8 : EStr.GBK), url);
        Elements select = parse.select(collectorSrc.getElCaption());
        Elements select2 = parse.select(collectorSrc.getElContent());
        if (select.size() == 0) {
            if (z) {
                throw new RuntimeException("无法匹配文章标题区域！");
            }
            return null;
        }
        if (select2.size() == 0) {
            if (z) {
                throw new RuntimeException("无法匹配文章正文区域！");
            }
            return null;
        }
        Date date = EDate.getDate();
        Date date2 = null;
        if (EStr.notEmpty(collectorSrc.getElDate())) {
            Elements select3 = parse.select(collectorSrc.getElDate());
            int size = select3.size();
            for (int i = 0; i < size; i++) {
                String text = ((Element) select3.get(i)).text();
                if (!EStr.isEmpty(text)) {
                    date2 = matchDate(text);
                    if (null != date2) {
                        break;
                    }
                }
            }
        }
        if (null == date2) {
            date2 = date;
        }
        String text2 = select.text();
        String str2 = "";
        Elements select4 = select2.select("img");
        int size2 = select4.size();
        for (int i2 = 0; i2 < size2; i2++) {
            Element element = (Element) select4.get(i2);
            String findUseful = EStr.findUseful(element.absUrl("src"), element.absUrl("data-src"));
            if (!EStr.isEmpty(findUseful) && findUseful.startsWith("http")) {
                String collImg = collImg(str, findUseful);
                if (!EStr.isEmpty(collImg)) {
                    if (EStr.isEmpty(str2)) {
                        str2 = collImg;
                    }
                    element.attr("src", collImg);
                }
            }
        }
        formatContent(select2);
        String clean = EClean.clean(select2.outerHtml(), this.whitelist);
        String str3 = null;
        String str4 = null;
        String str5 = null;
        if (EStr.notEmpty(collectorSrc.getElSubCaption())) {
            Elements select5 = parse.select(collectorSrc.getElSubCaption());
            if (select5.size() > 0) {
                str3 = select5.text();
            }
        }
        if (EStr.notEmpty(collectorSrc.getElSummary())) {
            Elements select6 = parse.select(collectorSrc.getElSummary());
            if (select6.size() > 0) {
                str4 = select6.text();
            }
        }
        if (EStr.notEmpty(collectorSrc.getElCover())) {
            Elements select7 = parse.select(collectorSrc.getElCover());
            if (select7.size() > 0) {
                str5 = EStr.findUseful(((Element) select7.get(0)).absUrl("src"), ((Element) select7.get(0)).absUrl("data-src"));
                if (EStr.notEmpty(str5)) {
                    str5 = collImg(str, str5);
                }
            }
        }
        if (EStr.isEmpty(str5) && EStr.notEmpty(str2)) {
            str5 = str2;
        }
        String generator = EUid.generator();
        Content content = new Content();
        content.setId(generator).setDomain(str).setChannelId(channel.getId()).setChannelCaption(channel.getCaption());
        content.setCaption(text2).setContent(clean);
        content.setSubCaption(str3).setSummary(str4).setPic(str5).setThum(str5);
        content.setCdate(bool.booleanValue() ? date2 : date).setMdate(bool.booleanValue() ? date2 : date);
        content.setCaptionColor("").setLink("").setSort(0).setUsed(1);
        collectorItem.setContId(generator).setCaption(text2).setState(true).setOdate(date2).setMdate(date);
        return content;
    }

    private void formatContent(Elements elements) {
        Iterator it = elements.iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            String attr = element.attr("style");
            if (EStr.notEmpty(attr)) {
                element.attr("style", attr.replaceAll("visibility: hidden", "visibility: visible"));
            }
        }
    }

    private String collImg(String str, String str2) {
        byte[] bArr = EHttp.me.getByte(str2);
        if (bArr == null) {
            return null;
        }
        String findFirst = ERegex.findFirst(str2, "[\\.\\=](jpg|png|gif|bmp)");
        if (EStr.isEmpty(findFirst)) {
            findFirst = "jpg";
        }
        String str3 = EProp.FileUploadTemp;
        String join = String.join(EStr.FileSep, str, ModuleConfig.MODULE_CODE);
        try {
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bArr);
            BufferedImage read = ImageIO.read(byteArrayInputStream);
            File file = new File(str3.concat("thu.").concat(findFirst));
            ImageIO.write(read, findFirst, file);
            byteArrayInputStream.close();
            return EFileKit.upload(file, join);
        } catch (Exception e) {
            if (!EProp.DevMode.booleanValue()) {
                return null;
            }
            e.printStackTrace();
            return null;
        }
    }

    private Date matchDate(String str) {
        if (EStr.isEmpty(str)) {
            return null;
        }
        String find = ERegex.find(str, "\\d{4}[-/年]\\d{1,2}[-/月]\\d{1,2}日?");
        if (EStr.isEmpty(find)) {
            return null;
        }
        if (find.indexOf("-") > 0) {
            return EDate.parse(find);
        }
        if (find.indexOf("/") > 0) {
            return EDate.parse(find, "yyyy/MM/dd");
        }
        if (find.indexOf("年") > 0) {
            return EDate.parse(find, "yyyy年MM月dd日");
        }
        return null;
    }

    @Override // itez.plat.site.service.CollectorTaskService
    public void delItem(String str, Boolean bool) {
        EMap by = EMap.by("taskId", str);
        if (bool.booleanValue()) {
            dbo().update(dbo().getSqlPara("site.DelCollectorContent", by));
        }
        dbo().update(dbo().getSqlPara("site.DelCollectorItem", by));
        CollectorTask findById = mo124findById(str);
        findById.setListState(0).setListSize(0);
        if (bool.booleanValue()) {
            findById.setItemState(0).setItemSize(0);
        }
        update(findById);
    }

    private String getUrlContent(String str) {
        if (EStr.isEmpty(str)) {
            return "";
        }
        int indexOf = str.indexOf("#");
        return indexOf == -1 ? str : str.substring(0, indexOf);
    }
}
