package org.archive.crawler.extractor;

import java.io.IOException;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.datamodel.CoreAttributeConstants;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.framework.CrawlController;
import org.archive.io.ReplayCharSequence;
import org.archive.io.warc.WARCConstants;
import org.archive.util.DevUtils;
import org.archive.util.TextUtils;
import org.archive.util.iterator.RegexpLineIterator;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/extractor/ExtractorCSS.class */
public class ExtractorCSS extends Extractor implements CoreAttributeConstants {
    private static final long serialVersionUID = -1540252885329424902L;
    private static Logger logger = Logger.getLogger("org.archive.crawler.extractor.ExtractorCSS");
    private static String ESCAPED_AMP = "&amp";
    static final String CSS_BACKSLASH_ESCAPE = "\\\\([,'\"\\(\\)\\s])";
    static final String CSS_URI_EXTRACTOR = "(?i)(?:@import (?:url[(]|)|url[(])\\s*([\\\"']?)([^\\\"'].{0,2083}?)\\1\\s*[);]";
    private long numberOfCURIsHandled;
    private long numberOfLinksExtracted;

    public ExtractorCSS(String str) {
        super(str, "CSS Extractor. Extracts links from Cascading Style Sheets (.css).");
        this.numberOfCURIsHandled = 0L;
        this.numberOfLinksExtracted = 0L;
    }

    @Override // org.archive.crawler.extractor.Extractor
    public void extract(CrawlURI crawlURI) {
        String contentType;
        if (isHttpTransactionContentToProcess(crawlURI) && (contentType = crawlURI.getContentType()) != null) {
            if (contentType.toLowerCase().indexOf("css") >= 0 || crawlURI.toString().toLowerCase().endsWith(".css")) {
                this.numberOfCURIsHandled++;
                ReplayCharSequence replayCharSequence = null;
                try {
                    replayCharSequence = crawlURI.getHttpRecorder().getReplayCharSequence();
                } catch (IOException e) {
                    logger.severe("Failed getting ReplayCharSequence: " + e.getMessage());
                }
                if (replayCharSequence == null) {
                    logger.warning("Failed getting ReplayCharSequence: " + crawlURI.toString());
                    return;
                }
                try {
                    this.numberOfLinksExtracted += processStyleCode(crawlURI, replayCharSequence, getController());
                    crawlURI.linkExtractorFinished();
                    if (replayCharSequence != null) {
                        try {
                            replayCharSequence.close();
                        } catch (IOException e2) {
                            logger.warning(TextUtils.exceptionToString("Failed close of ReplayCharSequence.", e2));
                        }
                    }
                } catch (Throwable th) {
                    if (replayCharSequence != null) {
                        try {
                            replayCharSequence.close();
                        } catch (IOException e3) {
                            logger.warning(TextUtils.exceptionToString("Failed close of ReplayCharSequence.", e3));
                        }
                    }
                    throw th;
                }
            }
        }
    }

    public static long processStyleCode(CrawlURI crawlURI, CharSequence charSequence, CrawlController crawlController) {
        long j = 0;
        Matcher matcher = null;
        try {
            try {
                matcher = TextUtils.getMatcher(CSS_URI_EXTRACTOR, charSequence);
                while (matcher.find()) {
                    String replaceAll = TextUtils.replaceAll(CSS_BACKSLASH_ESCAPE, TextUtils.replaceAll(ESCAPED_AMP, matcher.group(2), "&"), RegexpLineIterator.ENTRY);
                    j++;
                    try {
                        crawlURI.createAndAddLinkRelativeToBase(replaceAll, Link.EMBED_MISC, 'E');
                    } catch (URIException e) {
                        if (crawlController != null) {
                            crawlController.logUriError(e, crawlURI.getUURI(), replaceAll);
                        } else {
                            logger.info(crawlURI + ", " + replaceAll + WARCConstants.COLON_SPACE + e.getMessage());
                        }
                    }
                }
                TextUtils.recycleMatcher(matcher);
            } catch (StackOverflowError e2) {
                DevUtils.warnHandle(e2, "ExtractorCSS StackOverflowError");
                TextUtils.recycleMatcher(matcher);
            }
            return j;
        } catch (Throwable th) {
            TextUtils.recycleMatcher(matcher);
            throw th;
        }
    }

    @Override // org.archive.crawler.framework.Processor
    public String report() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Processor: org.archive.crawler.extractor.ExtractorCSS\n");
        stringBuffer.append("  Function:          Link extraction on Cascading Style Sheets (.css)\n");
        stringBuffer.append("  CrawlURIs handled: " + this.numberOfCURIsHandled + "\n");
        stringBuffer.append("  Links extracted:   " + this.numberOfLinksExtracted + "\n\n");
        return stringBuffer.toString();
    }
}
