package org.archive.crawler.frontier;

import java.io.IOException;
import java.util.Hashtable;
import java.util.logging.Logger;
import javax.management.AttributeNotFoundException;
import javax.management.MBeanException;
import javax.management.ReflectionException;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.event.CrawlURIDispositionListener;
import org.archive.crawler.filter.OrFilter;
import org.archive.crawler.filter.URIRegExpFilter;
import org.archive.crawler.framework.CrawlController;
import org.archive.crawler.framework.Frontier;
import org.archive.crawler.framework.exceptions.FatalConfigurationException;
import org.archive.crawler.scope.ClassicScope;
import org.archive.crawler.settings.CrawlerSettings;
import org.archive.crawler.settings.SimpleType;
import org.archive.net.UURIFactory;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/frontier/DomainSensitiveFrontier.class */
public class DomainSensitiveFrontier extends BdbFrontier implements CrawlURIDispositionListener {
    private static final long serialVersionUID = -3330190056282726202L;
    public static final String ATTR_MAX_DOCS = "max-docs";
    public static final String ATTR_COUNTER_MODE = "counter-mode";
    public static final String COUNT_OVERRIDE = "count-per-override";
    public static final String DEFAULT_MODE = "count-per-override";
    private Hashtable<String, Long> hostCounters;
    private boolean countPerOverride;
    private String counterMode;
    private static final Logger logger = Logger.getLogger(DomainSensitiveFrontier.class.getName());
    public static final String COUNT_HOST = "count-per-host";
    public static final String COUNT_DOMAIN = "count-per-domain";
    public static final String[] ATTR_AVAILABLE_MODES = {"count-per-override", COUNT_HOST, COUNT_DOMAIN};

    public DomainSensitiveFrontier(String str) {
        super(Frontier.ATTR_NAME, "DomainSensitiveFrontier. *Deprecated* Use BdbFrontier+QuotaEnforcer instead. Overrides BdbFrontier to add specification of number of documents to download (Expects 'exclude-filter' to be part of CrawlScope).");
        this.hostCounters = new Hashtable<>();
        this.countPerOverride = true;
        addElementToDefinition(new SimpleType(ATTR_MAX_DOCS, "Maximum number of documents to download for host or domain (Zero means no limit).", new Long(0L))).setOverrideable(true);
        addElementToDefinition(new SimpleType(ATTR_COUNTER_MODE, "If count-per-override, acts like the crawl order maximum download count and the crawler will download this total amount of docs only. Override to change the max count for the overridden domain or host. Else if count-per-host the crawler will download max-docs per host. Add an override to change max count on a per-domain or a per-host basis.For example, if you set max-docs to 30 in this mode, the crawler will download 30 docs from each host in scope. If you  override for kb.se setting max-docs to 20, it will instead download only 20 docs from each host of kb.se. (It can be a larger as well as a smaller value here.). Finally count-per-domain behaves similar to count-per-host, but instead sets max on a per-domain basis.Here you can do overrides on the domain-level, but not on the host-level. So if you here set max-docs to 30 the crawler will download 30 docs from each domain in scope. If you  override for kb.se setting max-docs to 20, it will instead download only 20 docs in total from the whole kb.se domain. (It can be a larger as well as a smaller value here.)", "count-per-override", ATTR_AVAILABLE_MODES)).setOverrideable(false);
    }

    @Override // org.archive.crawler.frontier.BdbFrontier, org.archive.crawler.frontier.WorkQueueFrontier, org.archive.crawler.frontier.AbstractFrontier, org.archive.crawler.framework.Frontier
    public void initialize(CrawlController crawlController) throws FatalConfigurationException, IOException {
        super.initialize(crawlController);
        this.controller.addCrawlURIDispositionListener(this);
        try {
            this.counterMode = (String) getAttribute(ATTR_COUNTER_MODE);
            if (this.counterMode.equalsIgnoreCase(COUNT_DOMAIN) || this.counterMode.equalsIgnoreCase(COUNT_HOST)) {
                this.countPerOverride = false;
            } else {
                this.countPerOverride = true;
            }
        } catch (MBeanException e) {
            e.printStackTrace();
        } catch (ReflectionException e2) {
            e2.printStackTrace();
        } catch (AttributeNotFoundException e3) {
            e3.printStackTrace();
        }
    }

    private synchronized boolean checkDownloadLimits(CrawlURI crawlURI) {
        String substring;
        boolean z = false;
        boolean z2 = false;
        if (crawlURI.getUURI().getScheme().equals("dns")) {
            return false;
        }
        try {
            String host = crawlURI.getUURI().getHost();
            CrawlerSettings settings = this.controller.getSettingsHandler().getSettings(host);
            do {
                if (this.counterMode.equalsIgnoreCase("count-per-override")) {
                    substring = settings.getScope() != null ? settings.getScope() : "root";
                } else {
                    substring = this.counterMode.equalsIgnoreCase(COUNT_HOST) ? host : host.substring(host.lastIndexOf(".", host.lastIndexOf(".") - 1) + 1, host.length());
                }
                long longValue = ((Long) getAttribute(settings, ATTR_MAX_DOCS)).longValue();
                long longValue2 = this.hostCounters.get(substring) != null ? this.hostCounters.get(substring).longValue() : 0L;
                if (longValue > 0 && longValue2 >= longValue) {
                    logger.fine("Discarding Queue: " + host + UURIFactory.SPACE);
                    crawlURI.addAnnotation("dsfLimit");
                    if (!z) {
                        WorkQueue queueFor = getQueueFor(crawlURI);
                        queueFor.unpeek();
                        decrementQueuedCount(0 + queueFor.deleteMatching(this, ".*"));
                        z = true;
                    }
                    OrFilter orFilter = (OrFilter) this.controller.getScope().getAttribute(ClassicScope.ATTR_EXCLUDE_FILTER);
                    String str = substring.equalsIgnoreCase("root") ? ".*" : "^((https?://)?[a-zA-Z0-9\\.]*)" + substring + "($|/.*)";
                    logger.fine("Adding filter: [" + str + "].");
                    orFilter.addFilter(this.controller.getSettingsHandler().getSettings(null), new URIRegExpFilter(crawlURI.toString(), str));
                    z2 = true;
                }
                CrawlerSettings parent = settings.getParent();
                settings = parent;
                if (parent == null) {
                    break;
                }
            } while (this.countPerOverride);
        } catch (Exception e) {
            logger.severe("ERROR: checkDownloadLimits(), while processing {" + crawlURI.toString() + UURIFactory.RCURBRACKET + e.getClass() + "message: " + e.getMessage() + ".  Stack trace:");
            e.printStackTrace();
        }
        return z2;
    }

    protected synchronized void incrementHostCounters(CrawlURI crawlURI) {
        String substring;
        if (crawlURI.getUURI().toString().startsWith("dns:")) {
            return;
        }
        try {
            String host = crawlURI.getUURI().getHost();
            CrawlerSettings settings = this.controller.getSettingsHandler().getSettings(host);
            do {
                if (this.counterMode.equalsIgnoreCase("count-per-override")) {
                    substring = settings.getScope() != null ? settings.getScope() : "root";
                } else {
                    substring = this.counterMode.equalsIgnoreCase(COUNT_HOST) ? host : host.substring(host.lastIndexOf(".", host.lastIndexOf(".") - 1) + 1, host.length());
                }
                this.hostCounters.put(substring, new Long((this.hostCounters.get(substring) != null ? this.hostCounters.get(substring).longValue() : 0L) + 1));
                CrawlerSettings parent = settings.getParent();
                settings = parent;
                if (parent == null) {
                    break;
                }
            } while (this.countPerOverride);
        } catch (Exception e) {
            logger.severe("ERROR: incrementHostCounters() " + e.getMessage());
        }
    }

    @Override // org.archive.crawler.event.CrawlURIDispositionListener
    public void crawledURISuccessful(CrawlURI crawlURI) {
        incrementHostCounters(crawlURI);
        checkDownloadLimits(crawlURI);
    }

    @Override // org.archive.crawler.event.CrawlURIDispositionListener
    public void crawledURINeedRetry(CrawlURI crawlURI) {
    }

    @Override // org.archive.crawler.event.CrawlURIDispositionListener
    public void crawledURIDisregard(CrawlURI crawlURI) {
    }

    @Override // org.archive.crawler.event.CrawlURIDispositionListener
    public void crawledURIFailure(CrawlURI crawlURI) {
    }
}
