package com.cloudburo.grab.webcontent;

import de.l3s.boilerpipe.BoilerpipeProcessingException;
import de.l3s.boilerpipe.extractors.CommonExtractors;
import de.l3s.boilerpipe.sax.HTMLHighlighter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import org.jsoup.Jsoup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;

/* loaded from: input_file:com/cloudburo/grab/webcontent/Grabber.class */
public class Grabber {
    final HTMLHighlighter hh = HTMLHighlighter.newExtractingInstance();
    static final Logger logger = LoggerFactory.getLogger(Grabber.class);

    public GrabberRecord extractArticle(String str, boolean z) throws IOException, BoilerpipeProcessingException, SAXException {
        URL url = new URL(resolveGoogleRedirect(str));
        String process = this.hh.process(url, CommonExtractors.ARTICLE_EXTRACTOR);
        if (z) {
            process = Jsoup.parse(process).select("BODY").first().text().toString();
        }
        GrabberRecord grabberRecord = new GrabberRecord();
        grabberRecord.url = url;
        grabberRecord.content = process;
        return grabberRecord;
    }

    public String extractDefault(String str) throws IOException, BoilerpipeProcessingException, SAXException {
        return this.hh.process(new URL(resolveGoogleRedirect(str)), CommonExtractors.DEFAULT_EXTRACTOR);
    }

    public String extractCanloa(String str) throws IOException, BoilerpipeProcessingException, SAXException {
        return this.hh.process(new URL(resolveGoogleRedirect(str)), CommonExtractors.CANOLA_EXTRACTOR);
    }

    public String extractLargestContent(String str) throws IOException, BoilerpipeProcessingException, SAXException {
        return this.hh.process(new URL(resolveGoogleRedirect(str)), CommonExtractors.LARGEST_CONTENT_EXTRACTOR);
    }

    private String resolveGoogleRedirect(String str) throws MalformedURLException {
        if (str.contains("www.google.com/url")) {
            str = getQueryMap(new URL(str)).get("url");
        }
        logger.debug("Fetching with URL: " + str);
        return str;
    }

    private Map<String, String> getQueryMap(URL url) {
        String[] split = url.getQuery().split("&");
        HashMap hashMap = new HashMap();
        for (String str : split) {
            hashMap.put(str.split("=")[0], str.split("=")[1]);
        }
        return hashMap;
    }
}
