package org.archive.crawler.datamodel;

import java.io.File;
import java.io.Serializable;
import java.util.logging.Logger;
import javax.management.AttributeNotFoundException;
import org.archive.crawler.framework.CrawlController;
import org.archive.crawler.framework.CrawlScope;
import org.archive.crawler.framework.Frontier;
import org.archive.crawler.framework.Processor;
import org.archive.crawler.framework.exceptions.FatalConfigurationException;
import org.archive.crawler.settings.MapType;
import org.archive.crawler.settings.ModuleType;
import org.archive.crawler.settings.SimpleType;
import org.archive.crawler.settings.Type;
import org.archive.crawler.url.canonicalize.BaseRule;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/datamodel/CrawlOrder.class */
public class CrawlOrder extends ModuleType implements Serializable {
    private static final long serialVersionUID = -6715840285961511669L;
    public static final String ATTR_NAME = "crawl-order";
    public static final String ATTR_SETTINGS_DIRECTORY = "settings-directory";
    public static final String ATTR_DISK_PATH = "disk-path";
    public static final String ATTR_LOGS_PATH = "logs-path";
    public static final String ATTR_CHECKPOINTS_PATH = "checkpoints-path";
    public static final String ATTR_STATE_PATH = "state-path";
    public static final String ATTR_SCRATCH_PATH = "scratch-path";
    public static final String ATTR_RECOVER_PATH = "recover-path";
    public static final String ATTR_RECOVER_RETAIN_FAILURES = "recover-retain-failures";
    public static final String ATTR_MAX_BYTES_DOWNLOAD = "max-bytes-download";
    public static final String ATTR_MAX_DOCUMENT_DOWNLOAD = "max-document-download";
    public static final String ATTR_MAX_TIME_SEC = "max-time-sec";
    public static final String ATTR_MAX_TOE_THREADS = "max-toe-threads";
    public static final String ATTR_HTTP_HEADERS = "http-headers";
    public static final String ATTR_USER_AGENT = "user-agent";
    public static final String ATTR_FROM = "from";
    public static final String ATTR_PRE_FETCH_PROCESSORS = "pre-fetch-processors";
    public static final String ATTR_FETCH_PROCESSORS = "fetch-processors";
    public static final String ATTR_EXTRACT_PROCESSORS = "extract-processors";
    public static final String ATTR_WRITE_PROCESSORS = "write-processors";
    public static final String ATTR_POST_PROCESSORS = "post-processors";
    public static final String ATTR_LOGGERS = "loggers";
    public static final String ATTR_RULES = "uri-canonicalization-rules";
    public static final String ATTR_RECORDER_OUT_BUFFER = "recorder-out-buffer-bytes";
    public static final String ATTR_RECORDER_IN_BUFFER = "recorder-in-buffer-bytes";
    public static final String ATTR_BDB_CACHE_PERCENT = "bdb-cache-percent";
    public static final String ATTR_CHECKPOINT_COPY_BDBJE_LOGS = "checkpoint-copy-bdbje-logs";
    private transient MapType httpHeaders;
    private transient MapType loggers;
    private transient CrawlController controller;
    private static Logger logger = Logger.getLogger("org.archive.crawler.datamodel.CrawlOrder");
    public static final Boolean DEFAULT_CHECKPOINT_COPY_BDBJE_LOGS = Boolean.TRUE;
    private static final Integer DEFAULT_BDB_CACHE_PERCENT = new Integer(0);
    private static String ACCEPTABLE_USER_AGENT = "\\S+.*\\(.*\\+http(s)?://\\S+\\.\\S+.*\\).*";
    private static String ACCEPTABLE_FROM = "\\S+@\\S+\\.\\S+";

    public CrawlOrder() {
        super(ATTR_NAME, "Heritrix crawl order. This forms the root of the settings framework.");
        Type addElementToDefinition = addElementToDefinition(new SimpleType(ATTR_SETTINGS_DIRECTORY, "Directory where override settings are kept. The settings for many modules can be overridden based on the domain or subdomain of the URI being processed. This setting specifies a file level directory to store those settings. The path is relative to 'disk-path' unless an absolute path is provided.", "settings"));
        addElementToDefinition.setOverrideable(false);
        addElementToDefinition.setExpertSetting(true);
        Type addElementToDefinition2 = addElementToDefinition(new SimpleType(ATTR_DISK_PATH, "Directory where logs, arcs and other run time files will be kept. If this path is a relative path, it will be relative to the crawl order.", ""));
        addElementToDefinition2.setOverrideable(false);
        addElementToDefinition2.setExpertSetting(true);
        Type addElementToDefinition3 = addElementToDefinition(new SimpleType(ATTR_LOGS_PATH, "Directory where crawler log files will be kept. If this path is a relative path, it will be relative to the 'disk-path'.", "logs"));
        addElementToDefinition3.setOverrideable(false);
        addElementToDefinition3.setExpertSetting(true);
        Type addElementToDefinition4 = addElementToDefinition(new SimpleType(ATTR_CHECKPOINTS_PATH, "Directory where crawler checkpoint files will be kept. If this path is a relative path, it will be relative to the 'disk-path'.", "checkpoints"));
        addElementToDefinition4.setOverrideable(false);
        addElementToDefinition4.setExpertSetting(true);
        Type addElementToDefinition5 = addElementToDefinition(new SimpleType(ATTR_STATE_PATH, "Directory where crawler-state files will be kept. If this path is a relative path, it will be relative to the 'disk-path'.", "state"));
        addElementToDefinition5.setOverrideable(false);
        addElementToDefinition5.setExpertSetting(true);
        Type addElementToDefinition6 = addElementToDefinition(new SimpleType(ATTR_SCRATCH_PATH, "Directory where discardable temporary files will be kept. If this path is a relative path, it will be relative to the 'disk-path'.", "scratch"));
        addElementToDefinition6.setOverrideable(false);
        addElementToDefinition6.setExpertSetting(true);
        addElementToDefinition(new SimpleType(ATTR_MAX_BYTES_DOWNLOAD, "Maximum number of bytes to download. Once this number is exceeded the crawler will stop. A value of zero means no upper limit.", new Long(0L))).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_MAX_DOCUMENT_DOWNLOAD, "Maximum number of documents to download. Once this number is exceeded the crawler will stop. A value of zero means no upper limit.", new Long(0L))).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_MAX_TIME_SEC, "Maximum amount of time to crawl (in seconds). Once this much time has elapsed the crawler will stop. A value of zero means no upper limit.", new Long(0L))).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_MAX_TOE_THREADS, "Maximum number of threads processing URIs at the same time.", new Integer(100))).setOverrideable(false);
        Type addElementToDefinition7 = addElementToDefinition(new SimpleType(ATTR_RECORDER_OUT_BUFFER, "Size in bytes of in-memory buffer to record outbound traffic. One such buffer is reserved for every ToeThread.", new Integer(4096)));
        addElementToDefinition7.setOverrideable(false);
        addElementToDefinition7.setExpertSetting(true);
        Type addElementToDefinition8 = addElementToDefinition(new SimpleType(ATTR_RECORDER_IN_BUFFER, "Size in bytes of in-memory buffer to record inbound traffic. One such buffer is reserved for every ToeThread.", new Integer(65536)));
        addElementToDefinition8.setOverrideable(false);
        addElementToDefinition8.setExpertSetting(true);
        Type addElementToDefinition9 = addElementToDefinition(new SimpleType(ATTR_BDB_CACHE_PERCENT, "Percentage of heap to allocate to BerkeleyDB JE cache. Default of zero means no preference (accept BDB's default, usually 60%, or the je.maxMemoryPercent property value).", DEFAULT_BDB_CACHE_PERCENT));
        addElementToDefinition9.setExpertSetting(true);
        addElementToDefinition9.setOverrideable(false);
        addElementToDefinition(new CrawlScope());
        this.httpHeaders = (MapType) addElementToDefinition(new MapType(ATTR_HTTP_HEADERS, "HTTP headers. Information that will be used when constructing the HTTP headers of the crawler's HTTP requests."));
        this.httpHeaders.addElementToDefinition(new SimpleType(ATTR_USER_AGENT, "User agent to act as. Field must contain valid URL that links to website of person or organization running the crawl. Replace 'PROJECT_URL_HERE' in initial template. E.g. If organization is Library of Congress, a valid user agent would be:'Mozilla/5.0 (compatible; loc-crawler/0.11.0 +http://loc.gov)'. Note, you must preserve the '+' before the 'http'.", "Mozilla/5.0 (compatible; heritrix/@VERSION@ +PROJECT_URL_HERE)"));
        this.httpHeaders.addElementToDefinition(new SimpleType(ATTR_FROM, "Contact information. This field must contain a valid e-mail address for the person or organization responsiblefor this crawl: e.g. 'webmaster@loc.gov'", "CONTACT_EMAIL_ADDRESS_HERE"));
        addElementToDefinition(new RobotsHonoringPolicy());
        addElementToDefinition(new ModuleType(Frontier.ATTR_NAME, "Frontier")).setLegalValueType(Frontier.class);
        MapType mapType = (MapType) addElementToDefinition(new MapType(ATTR_RULES, "Ordered list of url canonicalization rules. Rules are applied in the order listed from top to bottom.", BaseRule.class));
        mapType.setOverrideable(true);
        mapType.setExpertSetting(true);
        addElementToDefinition(new MapType(ATTR_PRE_FETCH_PROCESSORS, "Processors to run prior to fetching anything from the network.", Processor.class)).setOverrideable(false);
        addElementToDefinition(new MapType(ATTR_FETCH_PROCESSORS, "Processors that fetch documents.", Processor.class)).setOverrideable(false);
        addElementToDefinition(new MapType(ATTR_EXTRACT_PROCESSORS, "Processors that extract new URIs from fetched documents.", Processor.class)).setOverrideable(false);
        addElementToDefinition(new MapType(ATTR_WRITE_PROCESSORS, "Processors that write documents to archives.", Processor.class)).setOverrideable(false);
        addElementToDefinition(new MapType(ATTR_POST_PROCESSORS, "Processors that do cleanup and feed the frontier with new URIs.", Processor.class)).setOverrideable(false);
        this.loggers = (MapType) addElementToDefinition(new MapType(ATTR_LOGGERS, "Statistics tracking modules. Any number of specialized statistics tracker that monitor a crawl and write logs, reports and/or provide information to the user interface."));
        Type addElementToDefinition10 = addElementToDefinition(new SimpleType(ATTR_RECOVER_PATH, "Optional. Points at recover log (or recover.gz log) OR the checkpoint directory to use recovering a crawl.", ""));
        addElementToDefinition10.setOverrideable(false);
        addElementToDefinition10.setExpertSetting(true);
        Type addElementToDefinition11 = addElementToDefinition(new SimpleType(ATTR_CHECKPOINT_COPY_BDBJE_LOGS, "When true, on a checkpoint, we copy off the bdbje log files to the checkpoint directory. To recover a checkpoint, just set the recover-path to point at the checkpoint directory to recover.  This is default setting. But if crawl is large, copying bdbje log files can take tens of minutes and even upwards of an hour (Copying bdbje log files will consume bulk of time checkpointing). If this setting is false, we do NOT copy bdbje logs on checkpoint AND we set bdbje to NEVER delete log files (instead we have it rename files-to-delete with a '.del'extension). Assumption is that when this setting is false, an external process is managing the removal of bdbje log files and that come time to recover from a checkpoint, the files that comprise a checkpoint are manually assembled. This is an expert setting.", DEFAULT_CHECKPOINT_COPY_BDBJE_LOGS));
        addElementToDefinition11.setOverrideable(false);
        addElementToDefinition11.setExpertSetting(true);
        Type addElementToDefinition12 = addElementToDefinition(new SimpleType(ATTR_RECOVER_RETAIN_FAILURES, "When recovering via the recover.log, should failures in the log be retained in the recovered crawl, preventing the corresponding URIs from being retried. Default is false, meaning failures are forgotten, and the corresponding URIs will be retried in the recovered crawl.", Boolean.FALSE));
        addElementToDefinition12.setOverrideable(false);
        addElementToDefinition12.setExpertSetting(true);
        Type addElementToDefinition13 = addElementToDefinition(new CredentialStore(CredentialStore.ATTR_NAME));
        addElementToDefinition13.setOverrideable(true);
        addElementToDefinition13.setExpertSetting(true);
    }

    public String getUserAgent(CrawlURI crawlURI) {
        return (String) this.httpHeaders.getUncheckedAttribute(crawlURI, ATTR_USER_AGENT);
    }

    public String getFrom(CrawlURI crawlURI) {
        String str = null;
        try {
            str = (String) this.httpHeaders.getAttribute(ATTR_FROM, crawlURI);
        } catch (AttributeNotFoundException e) {
            logger.severe(e.getMessage());
        }
        return str;
    }

    public int getMaxToes() {
        Integer num = null;
        try {
            num = (Integer) getAttribute((Object) null, ATTR_MAX_TOE_THREADS);
        } catch (AttributeNotFoundException e) {
            logger.severe(e.getMessage());
        }
        return num.intValue();
    }

    public RobotsHonoringPolicy getRobotsHonoringPolicy() {
        try {
            return (RobotsHonoringPolicy) getAttribute((Object) null, RobotsHonoringPolicy.ATTR_NAME);
        } catch (AttributeNotFoundException e) {
            logger.severe(e.getMessage());
            return null;
        }
    }

    public String getCrawlOrderName() {
        return getSettingsHandler().getSettingsObject(null).getName();
    }

    public CrawlController getController() {
        return this.controller;
    }

    public void setController(CrawlController crawlController) {
        this.controller = crawlController;
    }

    public MapType getLoggers() {
        return this.loggers;
    }

    public void checkUserAgentAndFrom() throws FatalConfigurationException {
        String userAgent = getUserAgent(null);
        String from = getFrom(null);
        if (!userAgent.matches(ACCEPTABLE_USER_AGENT) || !from.matches(ACCEPTABLE_FROM)) {
            throw new FatalConfigurationException("unacceptable user-agent  or from (Reedit your order file).");
        }
    }

    public File getCheckpointsDirectory() {
        try {
            return getDirectoryRelativeToDiskPath((String) getAttribute((Object) null, ATTR_CHECKPOINTS_PATH));
        } catch (AttributeNotFoundException e) {
            e.printStackTrace();
            return null;
        }
    }

    private File getDirectoryRelativeToDiskPath(String str) {
        try {
            return new File(getSettingsHandler().getPathRelativeToWorkingDirectory((String) getAttribute((Object) null, ATTR_DISK_PATH)), str);
        } catch (AttributeNotFoundException e) {
            e.printStackTrace();
            return null;
        }
    }

    public File getSettingsDir(String str) throws AttributeNotFoundException {
        String str2 = (String) getAttribute((Object) null, str);
        File file = new File(str2);
        if (!file.isAbsolute()) {
            file = getDirectoryRelativeToDiskPath(str2);
        }
        if (!file.exists()) {
            file.mkdirs();
        }
        return file;
    }
}
