package org.archive.crawler.postprocessor;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.framework.Processor;
import org.archive.crawler.settings.SimpleType;
import org.archive.util.IoUtils;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/postprocessor/LowDiskPauseProcessor.class */
public class LowDiskPauseProcessor extends Processor {
    private static final long serialVersionUID = 3338337700768396302L;
    public static final String ATTR_MONITOR_MOUNTS = "monitor-mounts";
    public static final String DEFAULT_MONITOR_MOUNTS = "";
    public static final String ATTR_PAUSE_THRESHOLD = "pause-threshold-kb";
    public static final int DEFAULT_PAUSE_THRESHOLD = 512000;
    public static final String ATTR_RECHECK_THRESHOLD = "recheck-threshold-kb";
    public static final int DEFAULT_RECHECK_THRESHOLD = 204800;
    protected int contentSinceCheck;
    private static final Logger logger = Logger.getLogger(LowDiskPauseProcessor.class.getName());
    public static final Pattern VALID_DF_OUTPUT = Pattern.compile("(?s)^Filesystem\\s+1K-blocks\\s+Used\\s+Available\\s+Use%\\s+Mounted on\\n.*");
    public static final Pattern AVAILABLE_EXTRACTOR = Pattern.compile("(?m)\\s(\\d+)\\s+\\d+%\\s+(\\S+)$");

    public LowDiskPauseProcessor(String str) {
        super(str, "LowDiskPause processor");
        this.contentSinceCheck = 0;
        addElementToDefinition(new SimpleType(ATTR_MONITOR_MOUNTS, "Space-delimited list of filessystem mounts whose 'available' space should be monitored via 'df' (if available).", "")).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_PAUSE_THRESHOLD, "When available space on any monitored mounts falls below this threshold, the crawl will be paused. ", new Integer(DEFAULT_PAUSE_THRESHOLD)));
        addElementToDefinition(new SimpleType(ATTR_RECHECK_THRESHOLD, "Available space via 'df' is rechecked after every increment of this much content (uncompressed) is observed. ", new Integer(DEFAULT_RECHECK_THRESHOLD))).setOverrideable(false);
    }

    @Override // org.archive.crawler.framework.Processor
    protected void innerProcess(CrawlURI crawlURI) {
        this.contentSinceCheck = (int) (this.contentSinceCheck + crawlURI.getContentSize());
        synchronized (this) {
            if (this.contentSinceCheck / 1024 > ((Integer) getUncheckedAttribute(null, ATTR_RECHECK_THRESHOLD)).intValue()) {
                checkAvailableSpace(crawlURI);
                this.contentSinceCheck = 0;
            }
        }
    }

    private void checkAvailableSpace(CrawlURI crawlURI) {
        try {
            String readFullyAsString = IoUtils.readFullyAsString(Runtime.getRuntime().exec("df -k").getInputStream());
            if (!VALID_DF_OUTPUT.matcher(readFullyAsString).matches()) {
                logger.severe("'df -k' output unacceptable for low-disk checking");
                return;
            }
            List asList = Arrays.asList(((String) getUncheckedAttribute(null, ATTR_MONITOR_MOUNTS)).split("\\s*"));
            Matcher matcher = AVAILABLE_EXTRACTOR.matcher(readFullyAsString);
            while (true) {
                if (!matcher.find()) {
                    break;
                }
                String group = matcher.group(2);
                if (asList.contains(group)) {
                    long parseLong = Long.parseLong(matcher.group(1));
                    int intValue = ((Integer) getUncheckedAttribute(null, ATTR_PAUSE_THRESHOLD)).intValue();
                    if (parseLong < intValue) {
                        getController().requestCrawlPause();
                        logger.log(Level.SEVERE, "Low Disk Pause", parseLong + "K available on " + group + " (below threshold " + intValue + "K)");
                        break;
                    }
                }
            }
        } catch (IOException e) {
            crawlURI.addLocalizedError(getName(), e, "problem checking available space via 'df'");
        }
    }
}
