package it.unimi.dsi.law.warc.io;

import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream;
import it.unimi.dsi.fastutil.io.MeasurableInputStream;
import it.unimi.dsi.fastutil.objects.Object2ObjectMap;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenCustomHashMap;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.law.bubing.util.BURL;
import it.unimi.dsi.law.warc.util.Util;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.EnumSet;
import java.util.Map;
import java.util.UUID;
import java.util.zip.CRC32;

/* loaded from: input_file:it/unimi/dsi/law/warc/io/WarcRecord.class */
public class WarcRecord {
    public static final boolean DEBUG = false;
    public static final boolean ASSERTS = true;
    public static final boolean USE_POSITION_INSTEAD_OF_SKIP = false;
    public static final Object2ObjectMap<byte[], ContentType> BYTE_REPRESENTATION_TO_CONTENT_TYPE;
    public static final Object2ObjectMap<byte[], RecordType> BYTE_REPRESENTATION_TO_RECORD_TYPE;
    public static final int DEFAULT_BUFFER_SIZE = 4096;
    private static final Charset ANVL_CHARSET;
    public static final byte[] WARC_ID;
    public static final byte[] UUID_FIELD_NAME;
    public static final byte[] CRLF;
    final EnumSet<FastBufferedInputStream.LineTerminator> LINE_TERMINATOR;
    private static final SimpleDateFormat DATE_FORMAT;
    private final byte[] buffer;
    private final MinimalisticParser minimalisticParser;
    private long positionOfLastHeader;
    private long dataLengthInLastHeader;
    protected CRC32 crc;
    public final Header header;
    public MeasurableInputStream block;
    static final /* synthetic */ boolean $assertionsDisabled;

    /* loaded from: input_file:it/unimi/dsi/law/warc/io/WarcRecord$ContentType.class */
    public enum ContentType {
        HTTP("message/http"),
        HTTPS("message/https");

        public final byte[] byteRepresentation;

        ContentType(String str) {
            this.byteRepresentation = Util.getASCIIBytes(str);
        }
    }

    /* loaded from: input_file:it/unimi/dsi/law/warc/io/WarcRecord$FormatException.class */
    public static class FormatException extends Exception {
        private static final long serialVersionUID = -1;

        public FormatException(String str) {
            super(str);
        }
    }

    /* loaded from: input_file:it/unimi/dsi/law/warc/io/WarcRecord$Header.class */
    public static class Header {
        public long dataLength;
        public RecordType recordType;
        public URI subjectUri;
        public Date creationDate;
        public ContentType contentType;
        public UUID recordId;
        public final Map<String, String> anvlFields = new Object2ObjectOpenCustomHashMap(Util.CASE_INSENSITIVE_STRING_HASH_STRATEGY);

        public void copy(Header header) {
            this.dataLength = header.dataLength;
            this.recordType = header.recordType;
            this.subjectUri = header.subjectUri;
            this.creationDate = header.creationDate;
            this.contentType = header.contentType;
            this.recordId = header.recordId;
            this.anvlFields.clear();
            this.anvlFields.putAll(header.anvlFields);
        }

        public int hashCode() {
            return this.recordId.hashCode();
        }

        public boolean equals(Object obj) {
            return (obj instanceof Header) && this.recordId.equals(((Header) obj).recordId);
        }

        public String toString() {
            MutableString mutableString = new MutableString();
            mutableString.append("dataLength: ");
            mutableString.append(this.dataLength);
            mutableString.append(", recordType: ");
            mutableString.append(this.recordType);
            mutableString.append(", subjectUri: ");
            mutableString.append(this.subjectUri);
            mutableString.append(", creationDate: ");
            mutableString.append(this.creationDate);
            mutableString.append(", contentType: ");
            mutableString.append(this.contentType);
            mutableString.append(", recordId: ");
            mutableString.append(this.recordId);
            mutableString.append(", anvlFields: ");
            mutableString.append(this.anvlFields);
            return mutableString.toString();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:it/unimi/dsi/law/warc/io/WarcRecord$MinimalisticParser.class */
    public static class MinimalisticParser {
        private static final boolean DEBUG = false;
        private int start;
        private int end;
        private int length;
        private byte[] buf;

        private MinimalisticParser() {
        }

        public void setInput(byte[] bArr) {
            setInput(bArr, 0, bArr.length);
        }

        public void setInput(byte[] bArr, int i, int i2) {
            this.buf = bArr;
            this.start = i;
            this.end = this.start;
            this.length = i2;
        }

        public void positionAtNextWord() {
            this.start = this.end;
            while (this.start < this.length && Character.isWhitespace(this.buf[this.start])) {
                this.start++;
            }
            this.end = this.start;
            while (this.end < this.length && !Character.isWhitespace(this.buf[this.end])) {
                this.end++;
            }
        }

        public boolean startsWith(byte[] bArr) {
            int i = 0;
            int length = bArr.length;
            while (i < length && this.start + i < this.length && this.buf[this.start + i] == bArr[i]) {
                i++;
            }
            return i == length;
        }

        public int asInt() {
            int i = 0;
            for (int i2 = this.start; i2 < this.end; i2++) {
                i = (i * 10) + (this.buf[i2] - 48);
            }
            return i;
        }

        public String asAsciiSting() {
            return Util.getString(this.buf, this.start, this.end - this.start);
        }

        public byte[] asByteArray() {
            byte[] bArr = new byte[this.end - this.start];
            System.arraycopy(this.buf, this.start, bArr, 0, this.end - this.start);
            return bArr;
        }
    }

    /* loaded from: input_file:it/unimi/dsi/law/warc/io/WarcRecord$RecordType.class */
    public enum RecordType {
        WARCINFO("warcinfo"),
        RESPONSE("response"),
        RESOURCE("resource"),
        REQUEST("request"),
        METADATA("metadata"),
        REVISIT("revisit"),
        CONVERSION("conversion"),
        CONTINUATION("continuation");

        public final byte[] byteRepresentation;

        RecordType(String str) {
            this.byteRepresentation = Util.getASCIIBytes(str);
        }
    }

    public WarcRecord(byte[] bArr) {
        this.LINE_TERMINATOR = EnumSet.of(FastBufferedInputStream.LineTerminator.CR_LF);
        this.minimalisticParser = new MinimalisticParser();
        this.crc = null;
        this.header = new Header();
        this.buffer = bArr;
        this.positionOfLastHeader = -1L;
    }

    public WarcRecord() {
        this(new byte[DEFAULT_BUFFER_SIZE]);
    }

    public void copy(WarcRecord warcRecord) {
        this.header.copy(warcRecord.header);
        this.block = warcRecord.block;
    }

    public void resetRead() {
        this.positionOfLastHeader = -1L;
    }

    public long skip(FastBufferedInputStream fastBufferedInputStream) throws IOException, FormatException {
        if (readHeaderLine(fastBufferedInputStream) == -1) {
            return -1L;
        }
        long j = this.positionOfLastHeader + this.dataLengthInLastHeader;
        if (!$assertionsDisabled && j < fastBufferedInputStream.position()) {
            throw new AssertionError();
        }
        fastBufferedInputStream.skip(j - fastBufferedInputStream.position());
        return this.dataLengthInLastHeader;
    }

    public long read(FastBufferedInputStream fastBufferedInputStream) throws IOException, FormatException {
        if (readHeaderLine(fastBufferedInputStream) == -1) {
            return -1L;
        }
        this.header.dataLength = this.dataLengthInLastHeader;
        this.minimalisticParser.positionAtNextWord();
        this.header.recordType = (RecordType) BYTE_REPRESENTATION_TO_RECORD_TYPE.get(this.minimalisticParser.asByteArray());
        this.minimalisticParser.positionAtNextWord();
        this.header.subjectUri = BURL.parse(this.minimalisticParser.asAsciiSting());
        this.minimalisticParser.positionAtNextWord();
        try {
            this.header.creationDate = DATE_FORMAT.parse(this.minimalisticParser.asAsciiSting());
            this.minimalisticParser.positionAtNextWord();
            this.header.contentType = (ContentType) BYTE_REPRESENTATION_TO_CONTENT_TYPE.get(this.minimalisticParser.asByteArray());
            this.minimalisticParser.positionAtNextWord();
            String asAsciiSting = this.minimalisticParser.asAsciiSting();
            if (!this.minimalisticParser.startsWith(UUID_FIELD_NAME)) {
                throw new FormatException("Unknown type of record-id." + asAsciiSting);
            }
            try {
                this.header.recordId = UUID.fromString(asAsciiSting.substring(UUID_FIELD_NAME.length + 1));
                this.header.anvlFields.clear();
                Util.readANVLHeaders(fastBufferedInputStream, this.header.anvlFields, ANVL_CHARSET);
                long position = (this.header.dataLength - (fastBufferedInputStream.position() - this.positionOfLastHeader)) - 4;
                if (!$assertionsDisabled && position < 0) {
                    throw new AssertionError();
                }
                this.block = new BoundedCountingInputStream(fastBufferedInputStream, position);
                return this.dataLengthInLastHeader;
            } catch (IllegalArgumentException e) {
                throw new FormatException("Error parsing record-id '" + asAsciiSting + "'; " + e.getMessage());
            }
        } catch (ParseException e2) {
            throw new FormatException("Error parsing creation-date: " + e2.getMessage());
        }
    }

    public void write(OutputStream outputStream) throws IOException {
        FastByteArrayOutputStream fastByteArrayOutputStream = new FastByteArrayOutputStream(this.buffer);
        this.header.dataLength = prebufferHeader(fastByteArrayOutputStream);
        byte[] aSCIIBytes = Util.getASCIIBytes(Long.toString(this.header.dataLength));
        outputStream.write(WARC_ID);
        outputStream.write(32);
        outputStream.write(aSCIIBytes);
        outputStream.write(32);
        outputStream.write(fastByteArrayOutputStream.array, 0, fastByteArrayOutputStream.length);
        if (this.crc != null) {
            this.crc.update(WARC_ID);
            this.crc.update(32);
            this.crc.update(aSCIIBytes);
            this.crc.update(32);
            this.crc.update(fastByteArrayOutputStream.array, 0, fastByteArrayOutputStream.length);
        }
        long length = this.block.length();
        do {
            int read = this.block.read(this.buffer, 0, (int) Math.min(length, this.buffer.length));
            if (read == -1) {
                break;
            }
            outputStream.write(this.buffer, 0, read);
            if (this.crc != null) {
                this.crc.update(this.buffer, 0, read);
            }
            length -= read;
        } while (length > 0);
        if (!$assertionsDisabled && length != 0) {
            throw new AssertionError(length);
        }
        outputStream.write(CRLF);
        outputStream.write(CRLF);
        if (this.crc != null) {
            this.crc.update(CRLF);
            this.crc.update(CRLF);
        }
    }

    public String toString() {
        return this.header.toString();
    }

    private long prebufferHeader(FastByteArrayOutputStream fastByteArrayOutputStream) throws IOException {
        fastByteArrayOutputStream.write(this.header.recordType.byteRepresentation);
        fastByteArrayOutputStream.write(32);
        fastByteArrayOutputStream.write(BURL.toByteArray(this.header.subjectUri));
        fastByteArrayOutputStream.write(32);
        fastByteArrayOutputStream.write(Util.getASCIIBytes(DATE_FORMAT.format(this.header.creationDate)));
        fastByteArrayOutputStream.write(32);
        fastByteArrayOutputStream.write(this.header.contentType.byteRepresentation);
        fastByteArrayOutputStream.write(32);
        fastByteArrayOutputStream.write(UUID_FIELD_NAME);
        fastByteArrayOutputStream.write(58);
        fastByteArrayOutputStream.write(Util.getASCIIBytes(this.header.recordId.toString()));
        fastByteArrayOutputStream.write(CRLF);
        Util.writeANVLHeaders(fastByteArrayOutputStream, this.header.anvlFields, ANVL_CHARSET);
        fastByteArrayOutputStream.write(CRLF);
        long length = (int) (WARC_ID.length + fastByteArrayOutputStream.length + this.block.length() + 6);
        int digits = Util.digits(length);
        return length + (Util.digits(length + ((long) digits)) == digits ? digits : digits + 1);
    }

    private long readHeaderLine(FastBufferedInputStream fastBufferedInputStream) throws IOException, FormatException {
        int readLine;
        if (this.positionOfLastHeader != -1) {
            long j = this.positionOfLastHeader + this.dataLengthInLastHeader;
            if (!$assertionsDisabled && j < fastBufferedInputStream.position()) {
                throw new AssertionError();
            }
            fastBufferedInputStream.skip(j - fastBufferedInputStream.position());
        }
        int i = 0;
        byte[] bArr = this.buffer;
        do {
            this.positionOfLastHeader = fastBufferedInputStream.position();
            while (true) {
                readLine = fastBufferedInputStream.readLine(bArr, i, bArr.length - i, this.LINE_TERMINATOR);
                if (readLine != bArr.length - i) {
                    break;
                }
                i += readLine;
                bArr = ByteArrays.grow(bArr, bArr.length + 1);
            }
            if (readLine == -1) {
                return -1L;
            }
            i += readLine;
        } while (i == 0);
        this.minimalisticParser.setInput(bArr, 0, i);
        this.minimalisticParser.positionAtNextWord();
        if (!this.minimalisticParser.startsWith(WARC_ID)) {
            throw new FormatException("Missing or incorrect warc-id.");
        }
        this.minimalisticParser.positionAtNextWord();
        this.dataLengthInLastHeader = this.minimalisticParser.asInt();
        return this.dataLengthInLastHeader;
    }

    static {
        $assertionsDisabled = !WarcRecord.class.desiredAssertionStatus();
        BYTE_REPRESENTATION_TO_CONTENT_TYPE = new Object2ObjectOpenCustomHashMap(ByteArrays.HASH_STRATEGY);
        BYTE_REPRESENTATION_TO_RECORD_TYPE = new Object2ObjectOpenCustomHashMap(ByteArrays.HASH_STRATEGY);
        for (ContentType contentType : ContentType.values()) {
            BYTE_REPRESENTATION_TO_CONTENT_TYPE.put(contentType.byteRepresentation, contentType);
        }
        for (RecordType recordType : RecordType.values()) {
            BYTE_REPRESENTATION_TO_RECORD_TYPE.put(recordType.byteRepresentation, recordType);
        }
        ANVL_CHARSET = Charset.forName("UTF-8");
        WARC_ID = Util.getASCIIBytes("warc/0.9");
        UUID_FIELD_NAME = Util.getASCIIBytes("uuid");
        CRLF = new byte[]{13, 10};
        DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss");
    }
}
