package org.archive.crawler.prefetch;

import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.management.AttributeNotFoundException;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.datamodel.CoreAttributeConstants;
import org.archive.crawler.datamodel.CrawlHost;
import org.archive.crawler.datamodel.CrawlServer;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.datamodel.CredentialStore;
import org.archive.crawler.datamodel.FetchStatusCodes;
import org.archive.crawler.datamodel.credential.Credential;
import org.archive.crawler.datamodel.credential.CredentialAvatar;
import org.archive.crawler.framework.Processor;
import org.archive.crawler.settings.SimpleType;
import org.archive.net.UURI;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/prefetch/PreconditionEnforcer.class */
public class PreconditionEnforcer extends Processor implements CoreAttributeConstants, FetchStatusCodes {
    private static final long serialVersionUID = 4636474153589079615L;
    public static final String ATTR_IP_VALIDITY_DURATION = "ip-validity-duration-seconds";
    public static final String ATTR_ROBOTS_VALIDITY_DURATION = "robot-validity-duration-seconds";
    public static final String ATTR_CALCULATE_ROBOTS_ONLY = "calculate-robots-only";
    private static final Logger logger = Logger.getLogger(PreconditionEnforcer.class.getName());
    private static final Integer DEFAULT_IP_VALIDITY_DURATION = new Integer(21600);
    private static final Integer DEFAULT_ROBOTS_VALIDITY_DURATION = new Integer(86400);
    public static final Boolean DEFAULT_CALCULATE_ROBOTS_ONLY = Boolean.FALSE;

    public PreconditionEnforcer(String str) {
        super(str, "Precondition enforcer");
        addElementToDefinition(new SimpleType(ATTR_IP_VALIDITY_DURATION, "The minimum interval for which a dns-record will be considered valid (in seconds). If the record's DNS TTL is larger, that will be used instead.", DEFAULT_IP_VALIDITY_DURATION)).setExpertSetting(true);
        addElementToDefinition(new SimpleType(ATTR_ROBOTS_VALIDITY_DURATION, "The time in seconds that fetched robots.txt information is considered to be valid. If the value is set to '0', then the robots.txt information will never expire.", DEFAULT_ROBOTS_VALIDITY_DURATION)).setExpertSetting(true);
        addElementToDefinition(new SimpleType(ATTR_CALCULATE_ROBOTS_ONLY, "Whether to only calculate the robots status of an URI, without actually applying any exclusions found. If true, exlcuded URIs will only be annotated in the crawl.log, but still fetched. Default is false. ", DEFAULT_CALCULATE_ROBOTS_ONLY)).setExpertSetting(true);
    }

    @Override // org.archive.crawler.framework.Processor
    protected void innerProcess(CrawlURI crawlURI) {
        if (considerDnsPreconditions(crawlURI)) {
            return;
        }
        String lowerCase = crawlURI.getUURI().getScheme().toLowerCase();
        if (!lowerCase.equals("http") && !lowerCase.equals("https")) {
            logger.fine("PolitenessEnforcer doesn't understand uri's of type " + lowerCase + " (ignoring)");
        } else if (considerRobotsPreconditions(crawlURI) || crawlURI.isPrerequisite() || !credentialPrecondition(crawlURI)) {
        }
    }

    private boolean considerRobotsPreconditions(CrawlURI crawlURI) {
        UURI uuri = crawlURI.getUURI();
        if (uuri != null) {
            try {
                if (uuri.getPath() != null && crawlURI.getUURI().getPath().equals("/robots.txt")) {
                    crawlURI.setPrerequisite(true);
                    return false;
                }
            } catch (URIException e) {
                logger.severe("Failed get of path for " + crawlURI);
            }
        }
        if (isRobotsExpired(crawlURI)) {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("No valid robots for " + getController().getServerCache().getServerFor(crawlURI) + "; deferring " + crawlURI);
            }
            try {
                crawlURI.markPrerequisite(crawlURI.getUURI().resolve("/robots.txt").toString(), getController().getPostprocessorChain());
                return true;
            } catch (URIException e2) {
                logger.severe("Failed resolve using " + crawlURI);
                throw new RuntimeException(e2);
            }
        }
        CrawlServer serverFor = getController().getServerCache().getServerFor(crawlURI);
        if (!serverFor.isValidRobots()) {
            crawlURI.skipToProcessorChain(getController().getPostprocessorChain());
            crawlURI.setFetchStatus(-61);
            crawlURI.putString("error", "robots.txt prerequisite failed");
            if (!logger.isLoggable(Level.FINE)) {
                return true;
            }
            logger.fine("robots.txt prerequisite failed " + crawlURI);
            return true;
        }
        if (!serverFor.getRobots().disallows(crawlURI, getController().getOrder().getUserAgent(crawlURI))) {
            return false;
        }
        if (((Boolean) getUncheckedAttribute(crawlURI, ATTR_CALCULATE_ROBOTS_ONLY)).booleanValue()) {
            crawlURI.addAnnotation("robotExcluded");
            return false;
        }
        crawlURI.setFetchStatus(FetchStatusCodes.S_ROBOTS_PRECLUDED);
        crawlURI.putString("error", "robots.txt exclusion");
        logger.fine("robots.txt precluded " + crawlURI);
        return true;
    }

    private boolean considerDnsPreconditions(CrawlURI crawlURI) {
        if (crawlURI.getUURI().getScheme().equals("dns")) {
            crawlURI.setPrerequisite(true);
            return false;
        }
        if (getController().getServerCache().getServerFor(crawlURI) == null) {
            crawlURI.setFetchStatus(-7);
            crawlURI.skipToProcessorChain(getController().getPostprocessorChain());
            return true;
        }
        CrawlHost hostFor = getController().getServerCache().getHostFor(crawlURI);
        if (hostFor == null || (hostFor.hasBeenLookedUp() && hostFor.getIP() == null)) {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("no dns for " + hostFor + " cancelling processing for CrawlURI " + crawlURI.toString());
            }
            crawlURI.setFetchStatus(-6);
            crawlURI.skipToProcessorChain(getController().getPostprocessorChain());
            return true;
        }
        if (!isIpExpired(crawlURI) || crawlURI.getUURI().getScheme().equals("dns")) {
            return false;
        }
        logger.fine("Deferring processing of CrawlURI " + crawlURI.toString() + " for dns lookup.");
        try {
            crawlURI.markPrerequisite("dns:" + hostFor.getHostName(), getController().getPostprocessorChain());
            return true;
        } catch (URIException e) {
            throw new RuntimeException(e);
        }
    }

    public long getIPValidityDuration(CrawlURI crawlURI) {
        Integer num;
        try {
            num = (Integer) getAttribute(ATTR_IP_VALIDITY_DURATION, crawlURI);
        } catch (AttributeNotFoundException e) {
            num = DEFAULT_IP_VALIDITY_DURATION;
        }
        return num.longValue();
    }

    public boolean isIpExpired(CrawlURI crawlURI) {
        CrawlHost hostFor = getController().getServerCache().getHostFor(crawlURI);
        if (!hostFor.hasBeenLookedUp()) {
            return true;
        }
        if (hostFor.getIpTTL() == -1) {
            return false;
        }
        long iPValidityDuration = getIPValidityDuration(crawlURI);
        if (iPValidityDuration == 0) {
            return false;
        }
        if (iPValidityDuration <= 0) {
            iPValidityDuration = DEFAULT_IP_VALIDITY_DURATION.intValue();
        }
        long ipTTL = hostFor.getIpTTL();
        if (ipTTL > iPValidityDuration) {
            iPValidityDuration = ipTTL;
        }
        if (iPValidityDuration > 0) {
            iPValidityDuration *= 1000;
        }
        return iPValidityDuration + hostFor.getIpFetched() < System.currentTimeMillis();
    }

    public long getRobotsValidityDuration(CrawlURI crawlURI) {
        Integer num;
        try {
            num = (Integer) getAttribute(ATTR_ROBOTS_VALIDITY_DURATION, crawlURI);
        } catch (AttributeNotFoundException e) {
            logger.severe(e.getLocalizedMessage());
            num = DEFAULT_ROBOTS_VALIDITY_DURATION;
        }
        return num.longValue() * 1000;
    }

    public boolean isRobotsExpired(CrawlURI crawlURI) {
        long robotsFetchedTime = getController().getServerCache().getServerFor(crawlURI).getRobotsFetchedTime();
        if (robotsFetchedTime == -1) {
            return true;
        }
        long robotsValidityDuration = getRobotsValidityDuration(crawlURI);
        return robotsValidityDuration != 0 && robotsFetchedTime + robotsValidityDuration < System.currentTimeMillis();
    }

    private boolean credentialPrecondition(CrawlURI crawlURI) {
        boolean z = false;
        CredentialStore credentialStore = CredentialStore.getCredentialStore(getSettingsHandler());
        if (credentialStore == null) {
            logger.severe("No credential store for " + crawlURI);
            return false;
        }
        Iterator it2 = credentialStore.iterator(crawlURI);
        if (it2 == null) {
            return false;
        }
        while (true) {
            if (!it2.hasNext()) {
                break;
            }
            Credential credential = (Credential) it2.next();
            if (credential.isPrerequisite(crawlURI)) {
                credential.attach(crawlURI);
                crawlURI.setPost(credential.isPost(crawlURI));
                break;
            }
            if (credential.rootUriMatch(getController(), crawlURI) && credential.hasPrerequisite(crawlURI) && !authenticated(credential, crawlURI)) {
                String prerequisite = credential.getPrerequisite(crawlURI);
                if (prerequisite == null || prerequisite.length() <= 0) {
                    logger.severe(getController().getServerCache().getServerFor(crawlURI).getName() + " has  credential(s) of type " + credential + " but prereq is null.");
                } else {
                    try {
                        crawlURI.markPrerequisite(prerequisite, getController().getPostprocessorChain());
                        z = true;
                        if (logger.isLoggable(Level.FINE)) {
                            logger.fine("Queueing prereq " + prerequisite + " of type " + credential + " for " + crawlURI);
                        }
                    } catch (URIException e) {
                        logger.severe("unable to set credentials prerequisite " + prerequisite);
                        getController().logUriError(e, crawlURI.getUURI(), prerequisite);
                        return false;
                    }
                }
            }
        }
        return z;
    }

    private boolean authenticated(Credential credential, CrawlURI crawlURI) {
        boolean z = false;
        CrawlServer serverFor = getController().getServerCache().getServerFor(crawlURI);
        if (!serverFor.hasCredentialAvatars()) {
            return false;
        }
        Iterator it2 = serverFor.getCredentialAvatars().iterator();
        while (it2.hasNext()) {
            try {
                if (((CredentialAvatar) it2.next()).match(credential.getClass(), credential.getKey(crawlURI))) {
                    z = true;
                }
            } catch (AttributeNotFoundException e) {
                logger.severe("Failed getting key for " + credential + " for " + crawlURI);
            }
        }
        return z;
    }
}
