package org.archive.crawler.prefetch;

import javax.management.AttributeNotFoundException;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.datamodel.FetchStatusCodes;
import org.archive.crawler.framework.Scoper;
import org.archive.crawler.settings.SimpleType;
import org.archive.util.TextUtils;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/prefetch/Preselector.class */
public class Preselector extends Scoper implements FetchStatusCodes {
    private static final long serialVersionUID = 3738560264369561017L;
    public static final String ATTR_RECHECK_SCOPE = "recheck-scope";
    public static final String ATTR_BLOCK_ALL = "block-all";
    public static final String ATTR_BLOCK_BY_REGEXP = "block-by-regexp";
    public static final String ATTR_ALLOW_BY_REGEXP = "allow-by-regexp";

    public Preselector(String str) {
        super(str, "Preselector. Does one last bit of checking to make sure that the current URI should be fetched.");
        addElementToDefinition(new SimpleType(ATTR_RECHECK_SCOPE, "Recheck if uri is in scope. This is meaningful if the scope is altered during a crawl. URIs are checked against the scope when they are added to queues. Setting this value to true forces the URI to be checked against the scope when it is comming out of the queue, possibly after the scope is altered.", new Boolean(false))).setExpertSetting(true);
        addElementToDefinition(new SimpleType(ATTR_BLOCK_ALL, "Block all URIs from being processed. This is most likely to be used in overrides to easily reject certain hosts from being processed.", new Boolean(false))).setExpertSetting(true);
        addElementToDefinition(new SimpleType(ATTR_BLOCK_BY_REGEXP, "Block all URIs matching the regular expression from being processed.", "")).setExpertSetting(true);
        addElementToDefinition(new SimpleType(ATTR_ALLOW_BY_REGEXP, "Allow only URIs matching the regular expression to be processed.", "")).setExpertSetting(true);
    }

    @Override // org.archive.crawler.framework.Processor
    protected void innerProcess(CrawlURI crawlURI) {
        try {
            if (((Boolean) getAttribute(ATTR_BLOCK_ALL, crawlURI)).booleanValue()) {
                crawlURI.setFetchStatus(FetchStatusCodes.S_BLOCKED_BY_USER);
                crawlURI.skipToProcessorChain(getController().getPostprocessorChain());
            }
        } catch (AttributeNotFoundException e) {
        }
        try {
            String str = (String) getAttribute(ATTR_ALLOW_BY_REGEXP, crawlURI);
            if (str != null && !str.equals("") && !TextUtils.matches(str, crawlURI.toString())) {
                crawlURI.setFetchStatus(FetchStatusCodes.S_BLOCKED_BY_USER);
                crawlURI.skipToProcessorChain(getController().getPostprocessorChain());
            }
        } catch (AttributeNotFoundException e2) {
        }
        try {
            String str2 = (String) getAttribute(ATTR_BLOCK_BY_REGEXP, crawlURI);
            if (str2 != null && !str2.equals("") && TextUtils.matches(str2, crawlURI.toString())) {
                crawlURI.setFetchStatus(FetchStatusCodes.S_BLOCKED_BY_USER);
                crawlURI.skipToProcessorChain(getController().getPostprocessorChain());
            }
        } catch (AttributeNotFoundException e3) {
        }
        try {
            if (((Boolean) getAttribute(ATTR_RECHECK_SCOPE, crawlURI)).booleanValue() && !isInScope(crawlURI)) {
                crawlURI.setFetchStatus(FetchStatusCodes.S_OUT_OF_SCOPE);
                crawlURI.skipToProcessorChain(getController().getPostprocessorChain());
            }
        } catch (AttributeNotFoundException e4) {
        }
    }
}
