package org.archive.crawler.deciderules;

import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.HttpMethod;
import org.archive.crawler.datamodel.CoreAttributeConstants;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.settings.SimpleType;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/deciderules/NotExceedsDocumentLengthTresholdDecideRule.class */
public class NotExceedsDocumentLengthTresholdDecideRule extends PredicatedDecideRule implements CoreAttributeConstants {
    private static final long serialVersionUID = -8774160016195991876L;
    public static final String ATTR_CONTENT_LENGTH_TRESHOLD = "content-length-treshold";
    public static final String ATTR_USE_AS_MIDFETCH = "use-as-midfetch-filter";
    public static final int HEADER_PREDICTS_MISSING = -1;
    private static final Logger logger = Logger.getLogger(NotExceedsDocumentLengthTresholdDecideRule.class.getName());
    static final Integer DEFAULT_CONTENT_LENGTH_TRESHOLD = -1;
    static final Boolean DEFAULT_USE_AS_MIDFETCH = new Boolean(true);

    public NotExceedsDocumentLengthTresholdDecideRule(String str) {
        super(str);
        setDescription("NotExceedsDocumentLengthTresholdDecideRule. REJECTs URIs with content length exceeding a given treshold. Either examines HTTP header content length or actual downloaded content length and returns false for documents exceeding a given length treshold.");
        addElementToDefinition(new SimpleType(ATTR_USE_AS_MIDFETCH, "Shall this rule be used as a midfetch rule? If true, this rule will determine content length based on HTTP header information, otherwise the size of the already downloaded content will be used.", DEFAULT_USE_AS_MIDFETCH));
        addElementToDefinition(new SimpleType(ATTR_CONTENT_LENGTH_TRESHOLD, "Max content-length this filter will allow to pass through. If -1, then no limit.", DEFAULT_CONTENT_LENGTH_TRESHOLD));
    }

    @Override // org.archive.crawler.deciderules.PredicatedDecideRule
    protected boolean evaluate(Object obj) {
        try {
            CrawlURI crawlURI = (CrawlURI) obj;
            int i = -1;
            if (!getIsMidfetchRule(obj).booleanValue()) {
                i = (int) crawlURI.getContentSize();
            } else {
                if (!crawlURI.containsKey(CoreAttributeConstants.A_HTTP_TRANSACTION)) {
                    if (!logger.isLoggable(Level.INFO)) {
                        return false;
                    }
                    logger.info("Error: Missing HttpMethod object in CrawlURI. " + crawlURI.toString());
                    return false;
                }
                HttpMethod httpMethod = (HttpMethod) crawlURI.getObject(CoreAttributeConstants.A_HTTP_TRANSACTION);
                String str = null;
                if (httpMethod.getResponseHeader("content-length") != null) {
                    str = httpMethod.getResponseHeader("content-length").getValue();
                }
                if (str != null && str.length() > 0) {
                    try {
                        i = Integer.parseInt(str);
                    } catch (NumberFormatException e) {
                    }
                }
                if (i == -1) {
                    return false;
                }
            }
            return makeDecision(i, obj).booleanValue();
        } catch (ClassCastException e2) {
            return false;
        }
    }

    protected Boolean makeDecision(int i, Object obj) {
        return Boolean.valueOf(i < getContentLengthTreshold(obj));
    }

    protected int getContentLengthTreshold(Object obj) {
        int intValue = ((Integer) getUncheckedAttribute(obj, ATTR_CONTENT_LENGTH_TRESHOLD)).intValue();
        if (intValue == -1) {
            return Integer.MAX_VALUE;
        }
        return intValue;
    }

    private Boolean getIsMidfetchRule(Object obj) {
        return Boolean.valueOf(((Boolean) getUncheckedAttribute(obj, ATTR_USE_AS_MIDFETCH)).booleanValue());
    }
}
