package org.archive.crawler.datamodel;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.Serializable;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
import java.util.zip.Checksum;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.datamodel.CrawlSubstats;
import org.archive.crawler.datamodel.credential.CredentialAvatar;
import org.archive.crawler.framework.Checkpointer;
import org.archive.crawler.framework.ToeThread;
import org.archive.crawler.settings.CrawlerSettings;
import org.archive.crawler.settings.SettingsHandler;
import org.archive.io.ReplayInputStream;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/datamodel/CrawlServer.class */
public class CrawlServer implements Serializable, CrawlSubstats.HasCrawlSubstats {
    private static final long serialVersionUID = -989714570750970369L;
    public static final long ROBOTS_NOT_FETCHED = -1;
    public static final long MIN_ROBOTS_RETRIES = 2;
    private final String server;
    private int port;
    private transient SettingsHandler settingsHandler;
    private RobotsExclusionPolicy robots;
    Checksum robotstxtChecksum;
    long robotsFetched = -1;
    boolean validRobots = false;
    CrawlSubstats substats = new CrawlSubstats();
    protected int consecutiveConnectionErrors = 0;
    private transient Set<CredentialAvatar> avatars = null;

    public CrawlServer(String str) {
        this.server = str;
        int lastIndexOf = this.server.lastIndexOf(":");
        if (lastIndexOf < 0) {
            this.port = -1;
            return;
        }
        try {
            this.port = Integer.parseInt(this.server.substring(lastIndexOf + 1));
        } catch (NumberFormatException e) {
            this.port = -1;
        }
    }

    public RobotsExclusionPolicy getRobots() {
        return this.robots;
    }

    public void setRobots(RobotsExclusionPolicy robotsExclusionPolicy) {
        this.robots = robotsExclusionPolicy;
    }

    public String toString() {
        return "CrawlServer(" + this.server + ")";
    }

    public void updateRobots(CrawlURI crawlURI) {
        BufferedReader bufferedReader;
        RobotsHonoringPolicy robotsHonoringPolicy = this.settingsHandler.getOrder().getRobotsHonoringPolicy();
        this.robotsFetched = System.currentTimeMillis();
        boolean z = crawlURI.getFetchStatus() > 0 && crawlURI.isHttpTransaction();
        if (!z && crawlURI.getFetchAttempts() < 2) {
            this.validRobots = false;
            return;
        }
        CrawlerSettings settings = getSettings(crawlURI);
        int type = robotsHonoringPolicy.getType(settings);
        if (type == 1) {
            this.robots = RobotsExclusionPolicy.ALLOWALL;
            this.validRobots = true;
            return;
        }
        if (!z) {
            this.validRobots = false;
            return;
        }
        if (!crawlURI.is2XXSuccess()) {
            this.robots = RobotsExclusionPolicy.ALLOWALL;
            this.validRobots = true;
            return;
        }
        ReplayInputStream replayInputStream = null;
        try {
            try {
                if (type == 2) {
                    bufferedReader = new BufferedReader(new StringReader(robotsHonoringPolicy.getCustomRobots(settings)));
                } else {
                    replayInputStream = crawlURI.getHttpRecorder().getRecordedInput().getContentReplayInputStream();
                    replayInputStream.setToResponseBodyStart();
                    bufferedReader = new BufferedReader(new InputStreamReader(replayInputStream));
                }
                this.robots = RobotsExclusionPolicy.policyFor(settings, bufferedReader, robotsHonoringPolicy);
                this.validRobots = true;
                if (replayInputStream != null) {
                    replayInputStream.close();
                }
            } catch (Throwable th) {
                if (replayInputStream != null) {
                    replayInputStream.close();
                }
                throw th;
            }
        } catch (IOException e) {
            this.robots = RobotsExclusionPolicy.ALLOWALL;
            this.validRobots = true;
            crawlURI.addLocalizedError(getName(), e, "robots.txt parsing IOException");
        }
    }

    public long getRobotsFetchedTime() {
        return this.robotsFetched;
    }

    public String getName() {
        return this.server;
    }

    public int getPort() {
        return this.port;
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        Thread currentThread = Thread.currentThread();
        if (currentThread instanceof Checkpointer.CheckpointingThread) {
            this.settingsHandler = ((Checkpointer.CheckpointingThread) currentThread).getController().getSettingsHandler();
        } else {
            if (!(currentThread instanceof ToeThread)) {
                throw new RuntimeException("CrawlServer must deserialize in a ToeThread or CheckpointingThread");
            }
            this.settingsHandler = ((ToeThread) Thread.currentThread()).getController().getSettingsHandler();
        }
        postDeserialize();
    }

    private void postDeserialize() {
        if (this.robots != null) {
            this.robots.honoringPolicy = this.settingsHandler.getOrder().getRobotsHonoringPolicy();
        }
    }

    public SettingsHandler getSettingsHandler() {
        return this.settingsHandler;
    }

    private CrawlerSettings getSettings(CandidateURI candidateURI) {
        try {
            return this.settingsHandler.getSettings(candidateURI.getUURI().getReferencedHost(), candidateURI.getUURI());
        } catch (URIException e) {
            return null;
        }
    }

    public void setSettingsHandler(SettingsHandler settingsHandler) {
        this.settingsHandler = settingsHandler;
    }

    public void incrementConsecutiveConnectionErrors() {
        this.consecutiveConnectionErrors++;
    }

    public void resetConsecutiveConnectionErrors() {
        this.consecutiveConnectionErrors = 0;
    }

    public Set getCredentialAvatars() {
        return this.avatars;
    }

    public boolean hasCredentialAvatars() {
        return this.avatars != null && this.avatars.size() > 0;
    }

    public void addCredentialAvatar(CredentialAvatar credentialAvatar) {
        if (this.avatars == null) {
            this.avatars = new HashSet();
        }
        this.avatars.add(credentialAvatar);
    }

    public boolean isValidRobots() {
        return this.validRobots;
    }

    public static String getServerKey(CandidateURI candidateURI) throws URIException {
        String authorityMinusUserinfo = candidateURI.getUURI().getAuthorityMinusUserinfo();
        if (authorityMinusUserinfo == null) {
            authorityMinusUserinfo = candidateURI.getUURI().getCurrentHierPath();
            if (authorityMinusUserinfo != null && !authorityMinusUserinfo.matches("[-_\\w\\.:]+")) {
                authorityMinusUserinfo = null;
            }
        }
        if (authorityMinusUserinfo != null && candidateURI.getUURI().getScheme().equals("https") && !authorityMinusUserinfo.matches(".+:[0-9]+")) {
            authorityMinusUserinfo = authorityMinusUserinfo + "::443";
        }
        return authorityMinusUserinfo;
    }

    @Override // org.archive.crawler.datamodel.CrawlSubstats.HasCrawlSubstats
    public CrawlSubstats getSubstats() {
        return this.substats;
    }
}
