package org.archive.crawler.extractor;

import java.util.logging.Logger;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.datamodel.CoreAttributeConstants;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.framework.Processor;
import org.archive.io.warc.WARCConstants;
import org.mortbay.http.HttpFields;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/extractor/ExtractorHTTP.class */
public class ExtractorHTTP extends Processor implements CoreAttributeConstants {
    private static final long serialVersionUID = 8499072198570554647L;
    private static final Logger LOGGER = Logger.getLogger(ExtractorHTTP.class.getName());
    protected long numberOfCURIsHandled;
    protected long numberOfLinksExtracted;

    public ExtractorHTTP(String str) {
        super(str, "HTTP extractor. Extracts URIs from HTTP response headers.");
        this.numberOfCURIsHandled = 0L;
        this.numberOfLinksExtracted = 0L;
    }

    @Override // org.archive.crawler.framework.Processor
    public void innerProcess(CrawlURI crawlURI) {
        if (!crawlURI.isHttpTransaction() || crawlURI.getFetchStatus() <= 0) {
            return;
        }
        this.numberOfCURIsHandled++;
        HttpMethod httpMethod = (HttpMethod) crawlURI.getObject(CoreAttributeConstants.A_HTTP_TRANSACTION);
        addHeaderLink(crawlURI, httpMethod.getResponseHeader(HttpFields.__Location));
        addHeaderLink(crawlURI, httpMethod.getResponseHeader(HttpFields.__ContentLocation));
    }

    protected void addHeaderLink(CrawlURI crawlURI, Header header) {
        if (header == null) {
            return;
        }
        try {
            crawlURI.createAndAddLink(header.getValue(), header.getName() + ":", 'R');
            this.numberOfLinksExtracted++;
        } catch (URIException e) {
            if (getController() != null) {
                getController().logUriError(e, crawlURI.getUURI(), header.getValue());
            } else {
                LOGGER.info(crawlURI + ", " + header.getValue() + WARCConstants.COLON_SPACE + e.getMessage());
            }
        }
    }

    @Override // org.archive.crawler.framework.Processor
    public String report() {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("Processor: org.archive.crawler.extractor.ExtractorHTTP\n");
        stringBuffer.append("  Function:          Extracts URIs from HTTP response headers\n");
        stringBuffer.append("  CrawlURIs handled: " + this.numberOfCURIsHandled + "\n");
        stringBuffer.append("  Links extracted:   " + this.numberOfLinksExtracted + "\n\n");
        return stringBuffer.toString();
    }
}
