package org.archive.crawler.framework;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.StringWriter;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Logger;
import javax.management.AttributeNotFoundException;
import javax.management.MBeanException;
import javax.management.ReflectionException;
import javax.xml.transform.SourceLocator;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.archive.crawler.Heritrix;
import org.archive.crawler.datamodel.CoreAttributeConstants;
import org.archive.crawler.datamodel.CrawlHost;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.datamodel.FetchStatusCodes;
import org.archive.crawler.deciderules.recrawl.IdenticalDigestDecideRule;
import org.archive.crawler.event.CrawlStatusListener;
import org.archive.crawler.settings.SimpleType;
import org.archive.crawler.settings.StringList;
import org.archive.crawler.settings.Type;
import org.archive.crawler.settings.XMLSettingsHandler;
import org.archive.io.ObjectPlusFilesInputStream;
import org.archive.io.WriterPool;
import org.archive.io.WriterPoolMember;
import org.archive.io.arc.ARCConstants;
import org.archive.net.UURIFactory;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/framework/WriterPoolProcessor.class */
public abstract class WriterPoolProcessor extends Processor implements CoreAttributeConstants, CrawlStatusListener, FetchStatusCodes {
    private final Logger logger;
    public static final String ATTR_COMPRESS = "compress";
    public static final boolean DEFAULT_COMPRESS = true;
    public static final String ATTR_PREFIX = "prefix";
    public static final String ATTR_PATH = "path";
    public static final String ATTR_SUFFIX = "suffix";
    public static final String ATTR_MAX_SIZE_BYTES = "max-size-bytes";
    public static final String ATTR_POOL_MAX_ACTIVE = "pool-max-active";
    public static final String ATTR_POOL_MAX_WAIT = "pool-max-wait";
    public static final String ATTR_MAX_BYTES_WRITTEN = "total-bytes-to-write";
    public static final String ATTR_SKIP_IDENTICAL_DIGESTS = "skip-identical-digests";
    protected static final String ANNOTATION_UNWRITTEN = "unwritten";
    private static final int DEFAULT_MAX_FILE_SIZE = 100000000;
    private static final String[] DEFAULT_PATH = {"crawl-store"};
    private transient WriterPool pool;
    private long totalBytesWritten;
    private transient List<String> cachedMetadata;

    public WriterPoolProcessor(String str) {
        this(str, "Pool of files processor");
    }

    public WriterPoolProcessor(String str, String str2) {
        super(str, str2);
        this.logger = Logger.getLogger(getClass().getName());
        this.pool = null;
        this.totalBytesWritten = 0L;
        this.cachedMetadata = null;
        addElementToDefinition(new SimpleType(ATTR_COMPRESS, "Compress files when writing to disk.", new Boolean(true))).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_PREFIX, "File prefix. The text supplied here will be used as a prefix naming writer files.  For example if the prefix is 'IAH', then file names will look like IAH-20040808101010-0001-HOSTNAME.arc.gz ...if writing ARCs (The prefix will be separated from the date by a hyphen).", WriterPoolMember.DEFAULT_PREFIX));
        addElementToDefinition(new SimpleType(ATTR_SUFFIX, "Suffix to tag onto files. If value is '${HOSTNAME}', will use hostname for suffix. If empty, no suffix will be added.", "${HOSTNAME}")).setOverrideable(false);
        addElementToDefinition(new SimpleType("max-size-bytes", "Max size of each file", new Long(ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE))).setOverrideable(false);
        addElementToDefinition(new StringList("path", "Where to files. Supply absolute or relative path.  If relative, files will be written relative to the disk-pathsetting. If more than one path specified, we'll round-robin dropping files to each.  This setting is safe to change midcrawl (You can remove and add new dirs as the crawler progresses).", getDefaultPath())).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_POOL_MAX_ACTIVE, "Maximum active files in pool. This setting cannot be varied over the life of a crawl.", new Integer(5))).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_POOL_MAX_WAIT, "Maximum time to wait on pool element (milliseconds). This setting cannot be varied over the life of a crawl.", new Integer(WriterPool.DEFAULT_MAXIMUM_WAIT))).setOverrideable(false);
        Type addElementToDefinition = addElementToDefinition(new SimpleType("total-bytes-to-write", "Total file bytes to write to disk. Once the size of all files on disk has exceeded this limit, this processor will stop the crawler. A value of zero means no upper limit.", new Long(0L)));
        addElementToDefinition.setOverrideable(false);
        addElementToDefinition.setExpertSetting(true);
        Type addElementToDefinition2 = addElementToDefinition(new SimpleType(ATTR_SKIP_IDENTICAL_DIGESTS, "Whether to skip the writing of a record when URI history information is available and indicates the prior fetch had an identical content digest. Default is false.", new Boolean(false)));
        addElementToDefinition2.setOverrideable(true);
        addElementToDefinition2.setExpertSetting(true);
    }

    protected String[] getDefaultPath() {
        return DEFAULT_PATH;
    }

    @Override // org.archive.crawler.framework.Processor
    public synchronized void initialTasks() {
        getSettingsHandler().getOrder().getController().addCrawlStatusListener(this);
        setupPool(new AtomicInteger());
        if (getSettingsHandler().getOrder().getController().isCheckpointRecover()) {
            checkpointRecover();
        }
    }

    protected AtomicInteger getSerialNo() {
        return getPool().getSerialNo();
    }

    protected abstract void setupPool(AtomicInteger atomicInteger);

    @Override // org.archive.crawler.framework.Processor
    protected abstract void innerProcess(CrawlURI crawlURI);

    /* JADX INFO: Access modifiers changed from: protected */
    public void checkBytesWritten() {
        long maxToWrite = getMaxToWrite();
        if (maxToWrite > 0 && maxToWrite <= this.totalBytesWritten) {
            getController().requestCrawlStop("Finished - Maximum bytes (" + Long.toString(maxToWrite) + ") written");
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean shouldWrite(CrawlURI crawlURI) {
        boolean z;
        if (((Boolean) getUncheckedAttribute(crawlURI, ATTR_SKIP_IDENTICAL_DIGESTS)).booleanValue() && IdenticalDigestDecideRule.hasIdenticalDigest(crawlURI)) {
            crawlURI.addAnnotation("unwritten:identicalDigest");
            return false;
        }
        String lowerCase = crawlURI.getUURI().getScheme().toLowerCase();
        if (lowerCase.equals("dns")) {
            z = crawlURI.getFetchStatus() == 1;
        } else if (lowerCase.equals("http") || lowerCase.equals("https")) {
            z = crawlURI.getFetchStatus() > 0 && crawlURI.isHttpTransaction();
        } else {
            if (!lowerCase.equals("ftp")) {
                crawlURI.addAnnotation("unwritten:scheme");
                return false;
            }
            z = crawlURI.getFetchStatus() == 200;
        }
        if (z) {
            return true;
        }
        crawlURI.addAnnotation("unwritten:status");
        return false;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getHostAddress(CrawlURI crawlURI) {
        if (crawlURI.getUURI().getScheme().toLowerCase().equals("dns")) {
            return crawlURI.getString(CoreAttributeConstants.A_DNS_SERVER_IP_LABEL);
        }
        CrawlHost hostFor = getController().getServerCache().getHostFor(crawlURI);
        if (hostFor == null) {
            throw new NullPointerException("Crawlhost is null for " + crawlURI + UURIFactory.SPACE + ((Object) crawlURI.getVia()));
        }
        if (hostFor.getIP() == null) {
            throw new NullPointerException("Address is null for " + crawlURI + UURIFactory.SPACE + ((Object) crawlURI.getVia()) + ". Address " + (hostFor.getIpFetched() == -2 ? "was never looked up." : (System.currentTimeMillis() - hostFor.getIpFetched()) + " ms ago."));
        }
        return hostFor.getIP().getHostAddress();
    }

    public Object getAttributeUnchecked(String str) {
        Object obj = null;
        try {
            obj = super.getAttribute(str);
        } catch (MBeanException e) {
            this.logger.warning(e.getLocalizedMessage());
        } catch (ReflectionException e2) {
            this.logger.warning(e2.getLocalizedMessage());
        } catch (AttributeNotFoundException e3) {
            this.logger.warning(e3.getLocalizedMessage());
        }
        return obj;
    }

    public long getMaxSize() {
        Object attributeUnchecked = getAttributeUnchecked("max-size-bytes");
        return attributeUnchecked == null ? ARCConstants.DEFAULT_MAX_ARC_FILE_SIZE : ((Long) attributeUnchecked).longValue();
    }

    public String getPrefix() {
        Object attributeUnchecked = getAttributeUnchecked(ATTR_PREFIX);
        return attributeUnchecked == null ? WriterPoolMember.DEFAULT_PREFIX : (String) attributeUnchecked;
    }

    public List<File> getOutputDirs() {
        Object attributeUnchecked = getAttributeUnchecked("path");
        List<String> asList = attributeUnchecked == null ? Arrays.asList(DEFAULT_PATH) : (StringList) attributeUnchecked;
        ArrayList arrayList = new ArrayList();
        for (String str : asList) {
            File file = new File(str);
            if (!file.isAbsolute()) {
                file = new File(getController().getDisk(), str);
            }
            if (!file.exists()) {
                try {
                    file.mkdirs();
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
            arrayList.add(file);
        }
        return arrayList;
    }

    public boolean isCompressed() {
        Object attributeUnchecked = getAttributeUnchecked(ATTR_COMPRESS);
        if (attributeUnchecked == null) {
            return true;
        }
        return ((Boolean) attributeUnchecked).booleanValue();
    }

    public int getPoolMaximumActive() {
        Object attributeUnchecked = getAttributeUnchecked(ATTR_POOL_MAX_ACTIVE);
        if (attributeUnchecked == null) {
            return 5;
        }
        return ((Integer) attributeUnchecked).intValue();
    }

    public int getPoolMaximumWait() {
        Object attributeUnchecked = getAttributeUnchecked(ATTR_POOL_MAX_WAIT);
        return attributeUnchecked == null ? WriterPool.DEFAULT_MAXIMUM_WAIT : ((Integer) attributeUnchecked).intValue();
    }

    public String getSuffix() {
        Object attributeUnchecked = getAttributeUnchecked(ATTR_SUFFIX);
        String str = attributeUnchecked == null ? "${HOSTNAME}" : (String) attributeUnchecked;
        if (str != null && str.trim().equals("${HOSTNAME}")) {
            String str2 = "localhost.localdomain";
            try {
                str2 = InetAddress.getLocalHost().getHostName();
            } catch (UnknownHostException e) {
                this.logger.severe("Failed getHostAddress for this host: " + e);
            }
            str = str2;
        }
        return str;
    }

    public long getMaxToWrite() {
        Object attributeUnchecked = getAttributeUnchecked("total-bytes-to-write");
        if (attributeUnchecked == null) {
            return 0L;
        }
        return ((Long) attributeUnchecked).longValue();
    }

    @Override // org.archive.crawler.event.CrawlStatusListener
    public void crawlEnding(String str) {
        this.pool.close();
    }

    @Override // org.archive.crawler.event.CrawlStatusListener
    public void crawlEnded(String str) {
    }

    @Override // org.archive.crawler.event.CrawlStatusListener
    public void crawlStarted(String str) {
    }

    protected String getCheckpointStateFile() {
        return getClass().getName() + ".state";
    }

    @Override // org.archive.crawler.event.CrawlStatusListener
    public void crawlCheckpoint(File file) throws IOException {
        int i = getSerialNo().get();
        if (this.pool.getNumActive() > 0) {
            i = getSerialNo().incrementAndGet();
        }
        saveCheckpointSerialNumber(file, i);
        try {
            this.pool.close();
            setupPool(new AtomicInteger(i));
        } catch (Throwable th) {
            setupPool(new AtomicInteger(i));
            throw th;
        }
    }

    @Override // org.archive.crawler.event.CrawlStatusListener
    public void crawlPausing(String str) {
    }

    @Override // org.archive.crawler.event.CrawlStatusListener
    public void crawlPaused(String str) {
    }

    @Override // org.archive.crawler.event.CrawlStatusListener
    public void crawlResuming(String str) {
    }

    private void readObject(ObjectInputStream objectInputStream) throws IOException, ClassNotFoundException {
        objectInputStream.defaultReadObject();
        ((ObjectPlusFilesInputStream) objectInputStream).registerFinishTask(new Runnable() { // from class: org.archive.crawler.framework.WriterPoolProcessor.1
            @Override // java.lang.Runnable
            public void run() {
                WriterPoolProcessor.this.setupPool(new AtomicInteger());
            }
        });
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public WriterPool getPool() {
        return this.pool;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setPool(WriterPool writerPool) {
        this.pool = writerPool;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public long getTotalBytesWritten() {
        return this.totalBytesWritten;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void setTotalBytesWritten(long j) {
        this.totalBytesWritten = j;
    }

    protected void checkpointRecover() {
        int loadCheckpointSerialNumber = loadCheckpointSerialNumber();
        if (loadCheckpointSerialNumber != -1) {
            getSerialNo().set(loadCheckpointSerialNumber);
        }
    }

    protected int loadCheckpointSerialNumber() {
        short s = -1;
        File file = new File(getSettingsHandler().getOrder().getController().getCheckpointRecover().getDirectory(), getCheckpointStateFile());
        if (file.exists()) {
            DataInputStream dataInputStream = null;
            try {
                try {
                    dataInputStream = new DataInputStream(new FileInputStream(file));
                    s = dataInputStream.readShort();
                    if (dataInputStream != null) {
                        try {
                            dataInputStream.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                } catch (Throwable th) {
                    if (dataInputStream != null) {
                        try {
                            dataInputStream.close();
                        } catch (IOException e2) {
                            e2.printStackTrace();
                            throw th;
                        }
                    }
                    throw th;
                }
            } catch (FileNotFoundException e3) {
                e3.printStackTrace();
                if (dataInputStream != null) {
                    try {
                        dataInputStream.close();
                    } catch (IOException e4) {
                        e4.printStackTrace();
                    }
                }
            } catch (IOException e5) {
                e5.printStackTrace();
                if (dataInputStream != null) {
                    try {
                        dataInputStream.close();
                    } catch (IOException e6) {
                        e6.printStackTrace();
                    }
                }
            }
        } else {
            this.logger.info(file.getAbsolutePath() + " doesn't exist so cannot restore Writer serial number.");
        }
        return s;
    }

    protected void saveCheckpointSerialNumber(File file, int i) throws IOException {
        DataOutputStream dataOutputStream = new DataOutputStream(new FileOutputStream(new File(file, getCheckpointStateFile())));
        try {
            dataOutputStream.writeShort(i);
            dataOutputStream.close();
        } catch (Throwable th) {
            dataOutputStream.close();
            throw th;
        }
    }

    public synchronized List<String> getMetadata() {
        return this.cachedMetadata != null ? this.cachedMetadata : cacheMetadata();
    }

    protected synchronized List<String> cacheMetadata() {
        if (this.cachedMetadata != null) {
            return this.cachedMetadata;
        }
        if (getFirstrecordStylesheet() == null || getFirstrecordStylesheet().length() == 0) {
            this.cachedMetadata = new ArrayList(1);
            this.cachedMetadata.add("");
            return this.cachedMetadata;
        }
        ArrayList arrayList = null;
        if (!XMLSettingsHandler.class.isInstance(getSettingsHandler())) {
            this.logger.warning("Expected xml settings handler (No warcinfo).");
            return null;
        }
        File orderFile = ((XMLSettingsHandler) getSettingsHandler()).getOrderFile();
        if (orderFile.exists() && orderFile.canRead()) {
            arrayList = new ArrayList(1);
            arrayList.add(getFirstrecordBody(orderFile));
        } else {
            this.logger.severe("File " + orderFile.getAbsolutePath() + " is does not exist or is not readable.");
        }
        this.cachedMetadata = arrayList;
        return this.cachedMetadata;
    }

    protected String getFirstrecordStylesheet() {
        return null;
    }

    protected String getFirstrecordBody(File file) {
        String str = null;
        try {
            Transformer newTransformer = TransformerFactory.newInstance().newTemplates(new StreamSource(getClass().getResourceAsStream(getFirstrecordStylesheet()))).newTransformer();
            newTransformer.setParameter("software", "Heritrix " + Heritrix.getVersion() + " http://crawler.archive.org");
            newTransformer.setParameter("ip", InetAddress.getLocalHost().getHostAddress());
            newTransformer.setParameter("hostname", InetAddress.getLocalHost().getHostName());
            StreamSource streamSource = new StreamSource(new FileInputStream(file));
            StringWriter stringWriter = new StringWriter();
            newTransformer.transform(streamSource, new StreamResult(stringWriter));
            str = stringWriter.toString();
        } catch (FileNotFoundException e) {
            this.logger.severe("Failed transform, file not found " + e);
        } catch (UnknownHostException e2) {
            this.logger.severe("Failed transform, unknown host " + e2);
        } catch (TransformerConfigurationException e3) {
            this.logger.severe("Failed transform " + e3);
        } catch (TransformerException e4) {
            SourceLocator locator = e4.getLocator();
            this.logger.severe("Transform error " + e4 + ", col " + locator.getColumnNumber() + ", line " + locator.getLineNumber() + ", publicId " + locator.getPublicId() + ", systemId " + locator.getSystemId());
        }
        return str;
    }
}
