package org.archive.crawler.writer;

import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.management.AttributeNotFoundException;
import javax.management.MBeanException;
import javax.management.ReflectionException;
import org.archive.crawler.datamodel.CoreAttributeConstants;
import org.archive.crawler.datamodel.CrawlHost;
import org.archive.crawler.datamodel.CrawlURI;
import org.archive.crawler.framework.Processor;
import org.archive.crawler.settings.SimpleType;
import org.archive.io.ReplayInputStream;
import org.archive.net.UURIFactory;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/writer/Kw3WriterProcessor.class */
public class Kw3WriterProcessor extends Processor implements CoreAttributeConstants, Kw3Constants {
    private static final long serialVersionUID = 7171448068924684594L;
    public static final String ATTR_PATH = "path";
    private static final String DEFAULT_PATH = "arcs";
    public static final String ATTR_MAX_SIZE_BYTES = "max-size-bytes";
    public static final int DEFAULT_MAX_FILE_SIZE = 10000000;
    public static final String ATTR_CHMOD = "chmod";
    public static final String ATTR_CHMOD_VALUE = "chmod-value";
    public static final String DEFAULT_CHMOD_VALUE = "777";
    public static final String ATTR_MAX_BYTES_WRITTEN = "total-bytes-to-write";
    public static final String ATTR_COLLECTION = "collection";
    public static final String DEFAULT_COLLECTION_VALUE = "kw3";
    public static final String ATTR_HARVESTER = "harvester";
    public static final String DEFAULT_HARVESTER_VALUE = "heritrix";
    private File arcsDir;
    private boolean chmod;
    private String chmodValue;
    private int maxSize;
    private String collection;
    private String harvester;
    private static String COLON = ":";
    private static String WS = UURIFactory.SPACE;
    private static String LF = "\n";
    private static final Logger logger = Logger.getLogger(Kw3WriterProcessor.class.getName());
    private static String BOUNDARY_START = "KulturArw3_";

    public Kw3WriterProcessor(String str) {
        super(str, "Kw3Writer processor. A writer that writes files in the MIME format of The Swedish National Library.  See this class's javadoc forformat exposition.");
        addElementToDefinition(new SimpleType("path", "Top-level directory for archive files.", DEFAULT_PATH)).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_COLLECTION, "Name of collection.", DEFAULT_COLLECTION_VALUE)).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_HARVESTER, "Name of the harvester that is used for the web harvesting.", DEFAULT_HARVESTER_VALUE)).setOverrideable(false);
        addElementToDefinition(new SimpleType("max-size-bytes", "Max size of each file", new Integer(DEFAULT_MAX_FILE_SIZE))).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_CHMOD, "Should permissions be changed for the newly created dirs", new Boolean(true))).setOverrideable(false);
        addElementToDefinition(new SimpleType(ATTR_CHMOD_VALUE, "What should the permissions be set to. Given as three octal digits, as to the UNIX 'chmod' command. Ex. 777 for all permissions to everyone.", DEFAULT_CHMOD_VALUE)).setOverrideable(false);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.archive.crawler.framework.Processor
    public void initialTasks() {
        try {
            String str = (String) getAttribute("path");
            this.arcsDir = new File(str);
            if (!this.arcsDir.isAbsolute()) {
                this.arcsDir = new File(getController().getDisk(), str);
            }
            this.collection = (String) getAttribute(ATTR_COLLECTION);
            this.harvester = (String) getAttribute(ATTR_HARVESTER);
            this.chmod = ((Boolean) getAttribute(ATTR_CHMOD)).booleanValue();
            this.chmodValue = (String) getAttribute(ATTR_CHMOD_VALUE);
            this.maxSize = ((Integer) getAttribute("max-size-bytes")).intValue();
        } catch (ReflectionException e) {
            logger.log(Level.WARNING, "attribute error", e);
        } catch (MBeanException e2) {
            logger.log(Level.WARNING, "attribute error", e2);
        } catch (AttributeNotFoundException e3) {
            logger.log(Level.WARNING, "attribute error", e3);
        }
    }

    @Override // org.archive.crawler.framework.Processor
    protected void innerProcess(CrawlURI crawlURI) {
        if (crawlURI.isSuccess()) {
            String lowerCase = crawlURI.getUURI().getScheme().toLowerCase();
            if ("http".equalsIgnoreCase(lowerCase) || "https".equalsIgnoreCase(lowerCase)) {
                try {
                    writeMimeFile(crawlURI);
                } catch (IOException e) {
                    logger.log(Level.WARNING, "i/o error", (Throwable) e);
                }
            }
        }
    }

    protected void writeMimeFile(CrawlURI crawlURI) throws IOException {
        ReplayInputStream replayInputStream = null;
        OutputStream outputStream = null;
        try {
            String str = BOUNDARY_START + stringToMD5(crawlURI.toString());
            replayInputStream = crawlURI.getHttpRecorder().getRecordedInput().getReplayInputStream();
            outputStream = initOutputStream(crawlURI);
            writeArchiveInfoPart(str, crawlURI, replayInputStream, outputStream);
            writeHeaderPart(str, replayInputStream, outputStream);
            writeContentPart(str, crawlURI, replayInputStream, outputStream);
            outputStream.write(("\n--" + str + "--\n").getBytes());
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            if (outputStream != null) {
                outputStream.close();
            }
        } catch (Throwable th) {
            if (replayInputStream != null) {
                replayInputStream.close();
            }
            if (outputStream != null) {
                outputStream.close();
            }
            throw th;
        }
    }

    protected OutputStream initOutputStream(CrawlURI crawlURI) throws IOException {
        String crawlURI2 = crawlURI.toString();
        int port = crawlURI.getUURI().getPort();
        String host = (port == 80 || port <= 0) ? crawlURI.getUURI().getHost() : crawlURI.getUURI().getHost() + ":" + port;
        long j = crawlURI.getLong(CoreAttributeConstants.A_FETCH_BEGAN_TIME) / 1000;
        File file = new File(this.arcsDir, stringToMD5(host).substring(0, 2) + "/" + host + "/current");
        if (!file.exists()) {
            file.mkdirs();
            if (this.chmod) {
                chmods(file, this.arcsDir);
            }
        }
        return new FastBufferedOutputStream(new FileOutputStream(new File(file, stringToMD5(crawlURI2) + "." + j)));
    }

    protected void writeArchiveInfoPart(String str, CrawlURI crawlURI, ReplayInputStream replayInputStream, OutputStream outputStream) throws IOException {
        String crawlURI2 = crawlURI.toString();
        String hostAddress = getHostAddress(crawlURI);
        long headerSize = replayInputStream.getHeaderSize();
        long contentSize = replayInputStream.getContentSize();
        long currentTimeMillis = System.currentTimeMillis() / 1000;
        int fetchStatus = crawlURI.getFetchStatus();
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        replayInputStream.readHeaderTo(byteArrayOutputStream);
        String stringToMD5 = stringToMD5(byteArrayOutputStream.toString());
        Object contentDigest = crawlURI.getContentDigest();
        if (contentDigest != null) {
            contentDigest = getHexString((byte[]) contentDigest);
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("MIME-version: 1.1" + LF);
        stringBuffer.append("Content-Type: multipart/mixed; boundary=" + str + LF);
        stringBuffer.append("HTTP-Part: ArchiveInfo" + LF);
        stringBuffer.append(Kw3Constants.COLLECTION_KEY + COLON + WS + this.collection + LF);
        stringBuffer.append(Kw3Constants.HARVESTER_KEY + COLON + WS + this.harvester + LF);
        stringBuffer.append(Kw3Constants.URL_KEY + COLON + WS + crawlURI2 + LF);
        stringBuffer.append(Kw3Constants.IP_ADDRESS_KEY + COLON + WS + hostAddress + LF);
        stringBuffer.append(Kw3Constants.HEADER_LENGTH_KEY + COLON + WS + headerSize + LF);
        stringBuffer.append(Kw3Constants.HEADER_MD5_KEY + COLON + WS + stringToMD5 + LF);
        stringBuffer.append(Kw3Constants.CONTENT_LENGTH_KEY + COLON + WS + contentSize + LF);
        stringBuffer.append(Kw3Constants.CONTENT_MD5_KEY + COLON + WS + contentDigest + LF);
        stringBuffer.append(Kw3Constants.ARCHIVE_TIME_KEY + COLON + WS + currentTimeMillis + LF);
        stringBuffer.append(Kw3Constants.STATUS_CODE_KEY + COLON + WS + fetchStatus + LF + LF);
        outputStream.write(stringBuffer.toString().getBytes());
    }

    protected void writeHeaderPart(String str, ReplayInputStream replayInputStream, OutputStream outputStream) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("--" + str + LF);
        stringBuffer.append("Content-Type: text/plain; charset=\"US-ascii\"" + LF);
        stringBuffer.append("HTTP-Part: Header" + LF + LF);
        outputStream.write(stringBuffer.toString().getBytes());
        replayInputStream.readHeaderTo(outputStream);
    }

    protected void writeContentPart(String str, CrawlURI crawlURI, ReplayInputStream replayInputStream, OutputStream outputStream) throws IOException {
        String crawlURI2 = crawlURI.toString();
        String contentType = crawlURI.getContentType();
        long contentSize = replayInputStream.getContentSize();
        if (contentSize == 0) {
            return;
        }
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append("--" + str + LF);
        stringBuffer.append("Content-Type: " + contentType + LF);
        stringBuffer.append("HTTP-Part: Content" + LF + LF);
        outputStream.write(stringBuffer.toString().getBytes());
        if (contentSize <= this.maxSize) {
            replayInputStream.readContentTo(outputStream);
        } else {
            replayInputStream.readContentTo(outputStream, this.maxSize);
            logger.info(" Truncated url: " + crawlURI2 + ", Size: " + contentSize + ", Content-type: " + contentType);
        }
    }

    private String stringToMD5(String str) {
        try {
            byte[] bytes = str.getBytes();
            MessageDigest messageDigest = MessageDigest.getInstance("MD5");
            messageDigest.update(bytes);
            return getHexString(messageDigest.digest());
        } catch (NoSuchAlgorithmException e) {
            logger.log(Level.WARNING, "md5 error", (Throwable) e);
            return null;
        }
    }

    private String getHexString(byte[] bArr) {
        StringBuffer stringBuffer = new StringBuffer();
        for (byte b : bArr) {
            String hexString = Integer.toHexString(b & 255);
            if (hexString.length() < 2) {
                stringBuffer.append("0" + hexString);
            } else {
                stringBuffer.append(hexString);
            }
        }
        return stringBuffer.toString();
    }

    private void chmods(File file, File file2) {
        String absolutePath = file2.getAbsolutePath();
        chmod(file, this.chmodValue);
        File parentFile = file.getParentFile();
        while (true) {
            File file3 = parentFile;
            if (file3.getAbsolutePath().equalsIgnoreCase(absolutePath)) {
                return;
            }
            chmod(file3, this.chmodValue);
            parentFile = file3.getParentFile();
        }
    }

    private void chmod(File file, String str) {
        try {
            Process exec = Runtime.getRuntime().exec("chmod " + str + UURIFactory.SPACE + file.getAbsolutePath());
            exec.waitFor();
            exec.getInputStream().close();
            exec.getOutputStream().close();
            exec.getErrorStream().close();
        } catch (IOException e) {
            logger.log(Level.WARNING, "chmod failed", (Throwable) e);
        } catch (InterruptedException e2) {
            logger.log(Level.WARNING, "chmod failed", (Throwable) e2);
        }
    }

    private String getHostAddress(CrawlURI crawlURI) {
        CrawlHost hostFor = getController().getServerCache().getHostFor(crawlURI);
        if (hostFor == null) {
            throw new NullPointerException("Crawlhost is null for " + crawlURI + UURIFactory.SPACE + ((Object) crawlURI.getVia()));
        }
        if (hostFor.getIP() == null) {
            throw new NullPointerException("Address is null for " + crawlURI + UURIFactory.SPACE + ((Object) crawlURI.getVia()) + ". Address " + (hostFor.getIpFetched() == -2 ? "was never looked up." : (System.currentTimeMillis() - hostFor.getIpFetched()) + " ms ago."));
        }
        return hostFor.getIP().getHostAddress();
    }
}
