package org.archive.crawler.prefetch;

import java.util.logging.Level;
import java.util.logging.Logger;
import org.archive.crawler.admin.CrawlJob;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.datamodel.FetchStatusCodes;
import org.archive.crawler.framework.Processor;
import org.archive.crawler.settings.SimpleType;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/prefetch/RuntimeLimitEnforcer.class */
public class RuntimeLimitEnforcer extends Processor implements FetchStatusCodes {
    private static final long serialVersionUID = 1;
    protected Logger logger;
    protected static final long DEFAULT_RUNTIME_SECONDS = 86400;
    public static final String ATTR_RUNTIME_SECONDS = "runtime-sec".intern();
    public static final String ATTR_END_OPERATION = "end-operation".intern();
    protected static final String OP_PAUSE = "Pause job".intern();
    protected static final String OP_TERMINATE = "Terminate job".intern();
    protected static final String OP_BLOCK_URIS = "Block URIs".intern();
    protected static final String DEFAULT_END_OPERATION = OP_PAUSE;
    protected static final String[] AVAILABLE_END_OPERATIONS = {OP_PAUSE, OP_TERMINATE, OP_BLOCK_URIS};

    public RuntimeLimitEnforcer(String str) {
        super(str, "A processor that halts further progress once a fixed amount of time has elapsed since the start of a crawl. It is possible to configure this processor per host, but it should be noted that Heritrix does not track runtime per host seperately. Especially when using facilities like the BdbFrontier's hold-queues, the actual amount of time spent crawling a host may have little relevance to total elapsed time. Note however that using overrides and/or refinements only makes sense when using the 'Block URIs' end operation. The pause and terminate operations have global impact once encountered.");
        this.logger = Logger.getLogger(RuntimeLimitEnforcer.class.getName());
        addElementToDefinition(new SimpleType(ATTR_RUNTIME_SECONDS, "The amount of time, in seconds, that the crawl will be allowed to run before this processor performs it's 'end operation.'", Long.valueOf(DEFAULT_RUNTIME_SECONDS)));
        addElementToDefinition(new SimpleType(ATTR_END_OPERATION, "The action that the processor takes once the runtime has elapsed.\n Operation: Pause job - Pauses the crawl. A change (increase) to the runtime duration will make it pausible to resume the crawl. Attempts to resume the crawl without modifying the run time will cause it to be immediately paused again.\n Operation: Terminate job - Terminates the job. Equivalent to using the max-time setting on the CrawlController.\n Operation: Block URIs - Blocks each URI with an -5002 (blocked by custom processor) fetch status code. This will cause all the URIs queued to wind up in the crawl.log.", DEFAULT_END_OPERATION, AVAILABLE_END_OPERATIONS));
    }

    @Override // org.archive.crawler.framework.Processor
    protected void innerProcess(CrawlURI crawlURI) throws InterruptedException {
        long runtime = getRuntime(crawlURI);
        if (getController().getStatistics().crawlDuration() > runtime) {
            String str = (String) getUncheckedAttribute(crawlURI, ATTR_END_OPERATION);
            if (str == null) {
                this.logger.log(Level.SEVERE, "Null value for " + ATTR_END_OPERATION + " when processing " + crawlURI.toString());
                return;
            }
            if (str.equals(OP_PAUSE)) {
                getController().requestCrawlPause();
                return;
            }
            if (str.equals(OP_TERMINATE)) {
                getController().requestCrawlStop(CrawlJob.STATUS_FINISHED_TIME_LIMIT);
            } else if (str.equals(OP_BLOCK_URIS)) {
                crawlURI.setFetchStatus(FetchStatusCodes.S_BLOCKED_BY_RUNTIME_LIMIT);
                crawlURI.addAnnotation("Runtime exceeded " + runtime + "ms");
                crawlURI.skipToProcessorChain(getController().getPostprocessorChain());
            }
        }
    }

    protected long getRuntime(CrawlURI crawlURI) {
        Object uncheckedAttribute = getUncheckedAttribute(crawlURI, ATTR_RUNTIME_SECONDS);
        if (uncheckedAttribute != null) {
            return ((Long) uncheckedAttribute).longValue() * 1000;
        }
        this.logger.log(Level.SEVERE, "Null value for " + ATTR_RUNTIME_SECONDS + " when processing " + crawlURI.toString());
        return Long.MAX_VALUE;
    }
}
