package org.archive.crawler.scope;

import java.util.Iterator;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.filter.FilePatternFilter;
import org.archive.crawler.filter.TransclusionFilter;
import org.archive.crawler.framework.Filter;
import org.archive.io.warc.WARCConstants;
import org.archive.net.UURI;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/scope/PathScope.class */
public class PathScope extends SeedCachingScope {
    private static final long serialVersionUID = -2217024073240277527L;
    private static Logger logger = Logger.getLogger("org.archive.crawler.basic.PathScope");
    public static final String ATTR_TRANSITIVE_FILTER = "transitiveFilter";
    public static final String ATTR_ADDITIONAL_FOCUS_FILTER = "additionalScopeFocus";
    Filter additionalFocusFilter;
    Filter transitiveFilter;

    public PathScope(String str) {
        super(str);
        setDescription("PathScope: A scope for path crawls *Deprecated* Use DecidingScope instead. Crawls made with this scope will be limited to a specific portion of the hosts its seeds provide. More specifically the paths those seeds provide. For example if one of the seeds is 'archive.org/example/' all URIs under the path 'examples' will be crawled (like 'archive.org/examples/hello.html') but not URIs in other paths or root (i.e. 'archive.org/index.html).");
        this.additionalFocusFilter = (Filter) addElementToDefinition(new FilePatternFilter("additionalScopeFocus"));
        this.transitiveFilter = (Filter) addElementToDefinition(new TransclusionFilter("transitiveFilter"));
    }

    @Override // org.archive.crawler.scope.ClassicScope
    protected boolean transitiveAccepts(Object obj) {
        if (this.transitiveFilter == null) {
            return true;
        }
        return this.transitiveFilter.accepts(obj);
    }

    @Override // org.archive.crawler.scope.ClassicScope
    protected boolean focusAccepts(Object obj) {
        UURI from = UURI.from(obj);
        if (from == null) {
            return false;
        }
        Iterator<UURI> seedsIterator = seedsIterator();
        while (seedsIterator.hasNext()) {
            UURI next = seedsIterator.next();
            if (isSameHost(next, from)) {
                try {
                    if (next.getPath() == null || from.getPath() == null) {
                    }
                } catch (URIException e) {
                    logger.severe("Failed get path on " + ((Object) from) + " or " + ((Object) next) + WARCConstants.COLON_SPACE + e.getMessage());
                }
                try {
                    if (next.getPath().regionMatches(0, from.getPath(), 0, next.getPath().lastIndexOf(47))) {
                        checkClose(seedsIterator);
                        return true;
                    }
                    continue;
                } catch (URIException e2) {
                    logger.severe("Failed get path on " + ((Object) from) + " or " + ((Object) next) + WARCConstants.COLON_SPACE + e2.getMessage());
                }
            }
        }
        checkClose(seedsIterator);
        return false;
    }

    @Override // org.archive.crawler.scope.ClassicScope
    protected boolean additionalFocusAccepts(Object obj) {
        return this.additionalFocusFilter.accepts(obj);
    }
}
