package org.archive.crawler.extractor;

import java.util.logging.Logger;
import org.archive.crawler.datamodel.CrawlURI;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/extractor/AggressiveExtractorHTML.class */
public class AggressiveExtractorHTML extends ExtractorHTML {
    private static final long serialVersionUID = 3586060081186247087L;
    static Logger logger = Logger.getLogger(AggressiveExtractorHTML.class.getName());

    public AggressiveExtractorHTML(String str) {
        super(str, "Aggressive HTML extractor. Subclasses ExtractorHTML  so does all that it does, except in regard to javascript  blocks.  Here  it first processes as JS as its parent does, but then it  reruns through the JS treating it as HTML (May cause many  false positives). It finishes by applying heuristics  against script code looking for possible URIs. ");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.archive.crawler.extractor.ExtractorHTML
    public void processScript(CrawlURI crawlURI, CharSequence charSequence, int i) {
        super.processScript(crawlURI, charSequence, i);
        processGeneralTag(crawlURI, charSequence.subSequence(0, 6), charSequence.subSequence(i, charSequence.length()));
    }

    @Override // org.archive.crawler.extractor.ExtractorHTML, org.archive.crawler.framework.Processor
    public String report() {
        StringBuffer stringBuffer = new StringBuffer(256);
        stringBuffer.append("Processor: org.archive.crawler.extractor.ExtractorHTML2\n");
        stringBuffer.append("  Function:          Link extraction on HTML documents (including embedded CSS)\n");
        stringBuffer.append("  CrawlURIs handled: " + this.numberOfCURIsHandled + "\n");
        stringBuffer.append("  Links extracted:   " + this.numberOfLinksExtracted + "\n\n");
        return stringBuffer.toString();
    }
}
