package it.unimi.dsi.law.warc.io;

import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.law.bubing.util.BURL;
import it.unimi.dsi.law.warc.io.WarcRecord;
import it.unimi.dsi.law.warc.util.HttpResponse;
import it.unimi.dsi.law.warc.util.Util;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import org.apache.commons.lang.ArrayUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:it/unimi/dsi/law/warc/io/GZWarcRecord.class */
public class GZWarcRecord extends WarcRecord {
    private static final Logger LOGGER;
    public static final boolean ASSERTS = true;
    public static final boolean USE_POSITION_INSTEAD_OF_SKIP = false;
    private static final byte XFL = 9;
    private static final byte FTEXT = 1;
    private static final byte FHCRC = 2;
    private static final byte FEXTRA = 4;
    private static final byte FNAME = 8;
    private static final byte FCOMMENT = 16;
    private static final byte[] GZIP_START;
    private static final byte[] XFL_OS;
    private static final byte[] SKIP_LEN;
    private static final short SUB_LEN = 8;
    private static final short XLEN = 12;
    private static final short TRAILER_LEN = 8;
    private static final int FIX_LEN;
    private static final int PARTIAL_UNCOMPRESSED_READ_THRESHOLD = 16;
    private static final int UNCOMPRESSED_RECORD_STREAM_BUFFER_SIZE = 1024;
    private static final int HEADER_BUFFER_SIZE = 16384;
    private long positionOfLastGZHeader;
    private long compressedDataLengthInLastGZHeader;
    private FastBufferedInputStream uncompressedRecordStream;
    static final /* synthetic */ boolean $assertionsDisabled;
    private final Deflater deflater = new Deflater(9, true);
    private final Inflater inflater = new Inflater(true);
    private final FastByteArrayOutputStream compressedOutputStream = new FastByteArrayOutputStream();
    private final byte[] headerBuffer = new byte[HEADER_BUFFER_SIZE];
    public final GZHeader gzheader = new GZHeader();

    /* loaded from: input_file:it/unimi/dsi/law/warc/io/GZWarcRecord$GZHeader.class */
    public static class GZHeader {
        public int compressedSkipLength;
        public int uncompressedSkipLength;
        public int mtime;
        public byte[] name;
        public byte[] comment;

        public String toString() {
            MutableString mutableString = new MutableString();
            mutableString.append("compressedSkipLength: ");
            mutableString.append(this.compressedSkipLength);
            mutableString.append(", uncompressedSkipLength: ");
            mutableString.append(this.uncompressedSkipLength);
            mutableString.append(", mtime: ");
            mutableString.append(this.mtime);
            mutableString.append(", name: ");
            mutableString.append(this.name == null ? "<null>" : Util.getString(this.name));
            mutableString.append(", comment: ");
            mutableString.append(this.comment == null ? "<null>" : Util.getString(this.comment));
            return mutableString.toString();
        }

        public int hashCode() {
            return Util.getString(this.name).hashCode();
        }

        public boolean equals(Object obj) {
            if (!(obj instanceof GZHeader)) {
                return false;
            }
            GZHeader gZHeader = (GZHeader) obj;
            return this.compressedSkipLength == gZHeader.compressedSkipLength && this.uncompressedSkipLength == gZHeader.uncompressedSkipLength && this.mtime == gZHeader.mtime && Arrays.equals(this.name, gZHeader.name) && Arrays.equals(this.comment, gZHeader.comment);
        }
    }

    public GZWarcRecord() {
        this.crc = new CRC32();
        this.positionOfLastGZHeader = -1L;
    }

    @Override // it.unimi.dsi.law.warc.io.WarcRecord
    public void resetRead() {
        this.positionOfLastGZHeader = -1L;
    }

    @Override // it.unimi.dsi.law.warc.io.WarcRecord
    public long skip(FastBufferedInputStream fastBufferedInputStream) throws IOException, WarcRecord.FormatException {
        if (readGZHeader(fastBufferedInputStream) == -1) {
            return -1L;
        }
        this.uncompressedRecordStream = null;
        readGZTrailer(fastBufferedInputStream);
        return this.gzheader.compressedSkipLength;
    }

    @Override // it.unimi.dsi.law.warc.io.WarcRecord
    public long read(FastBufferedInputStream fastBufferedInputStream) throws IOException, WarcRecord.FormatException {
        if (readGZHeader(fastBufferedInputStream) == -1) {
            return -1L;
        }
        this.inflater.reset();
        this.crc.reset();
        long position = (this.gzheader.compressedSkipLength - (fastBufferedInputStream.position() - this.positionOfLastGZHeader)) - 8;
        if (!$assertionsDisabled && position <= 0) {
            throw new AssertionError();
        }
        this.uncompressedRecordStream = new FastBufferedInputStream(new BoundedCountingInputStream(new InflaterInputStream(new BoundedCountingInputStream(fastBufferedInputStream, position), this.inflater), this.gzheader.uncompressedSkipLength, this.crc), UNCOMPRESSED_RECORD_STREAM_BUFFER_SIZE);
        super.resetRead();
        super.read(this.uncompressedRecordStream);
        return this.compressedDataLengthInLastGZHeader;
    }

    @Override // it.unimi.dsi.law.warc.io.WarcRecord
    public void write(OutputStream outputStream) throws IOException {
        int length;
        byte[] bArr = this.headerBuffer;
        this.deflater.reset();
        this.compressedOutputStream.reset();
        DeflaterOutputStream deflaterOutputStream = new DeflaterOutputStream((OutputStream) this.compressedOutputStream, this.deflater);
        this.crc.reset();
        super.write(deflaterOutputStream);
        deflaterOutputStream.finish();
        byte[] aSCIIBytes = Util.getASCIIBytes(this.header.recordId.toString());
        String str = this.header.anvlFields.get(HttpResponse.DIGEST_HEADER);
        this.gzheader.name = aSCIIBytes;
        if (str != null) {
            byte[] aSCIIBytes2 = Util.getASCIIBytes(str);
            length = aSCIIBytes2.length;
            System.arraycopy(aSCIIBytes2, 0, bArr, 0, length);
        } else {
            length = aSCIIBytes.length;
            System.arraycopy(aSCIIBytes, 0, bArr, 0, length);
        }
        int i = length;
        int i2 = length + 1;
        bArr[i] = 9;
        byte[] byteArray = BURL.toByteArray(this.header.subjectUri);
        System.arraycopy(byteArray, 0, bArr, i2, byteArray.length);
        int length2 = i2 + byteArray.length;
        this.gzheader.comment = ArrayUtils.subarray(bArr, 0, length2);
        this.gzheader.compressedSkipLength = FIX_LEN + this.gzheader.name.length + 1 + this.gzheader.comment.length + 1 + this.compressedOutputStream.length;
        this.gzheader.uncompressedSkipLength = (int) (this.header.dataLength & (-1));
        this.gzheader.mtime = (int) (this.header.creationDate.getTime() / 1000);
        outputStream.write(GZIP_START);
        writeLEInt(outputStream, this.gzheader.mtime);
        outputStream.write(XFL_OS);
        writeLEShort(outputStream, (short) 12);
        outputStream.write(SKIP_LEN);
        writeLEShort(outputStream, (short) 8);
        writeLEInt(outputStream, this.gzheader.compressedSkipLength);
        writeLEInt(outputStream, this.gzheader.uncompressedSkipLength);
        outputStream.write(this.gzheader.name);
        outputStream.write(0);
        outputStream.write(this.gzheader.comment);
        outputStream.write(0);
        outputStream.write(this.compressedOutputStream.array, 0, this.compressedOutputStream.length);
        writeLEInt(outputStream, (int) (this.crc.getValue() & (-1)));
        writeLEInt(outputStream, this.gzheader.uncompressedSkipLength);
    }

    public void checkCRC(FastBufferedInputStream fastBufferedInputStream) throws IOException, WarcRecord.FormatException {
        if (this.positionOfLastGZHeader == -1 || this.uncompressedRecordStream == null) {
            throw new IllegalStateException();
        }
        consumeUncompressedRecord();
        readGZTrailer(fastBufferedInputStream);
    }

    @Override // it.unimi.dsi.law.warc.io.WarcRecord
    public String toString() {
        return this.gzheader.toString() + "\n" + this.header.toString();
    }

    private long readGZHeader(FastBufferedInputStream fastBufferedInputStream) throws IOException, WarcRecord.FormatException {
        byte[] bArr = this.headerBuffer;
        if (this.positionOfLastGZHeader != -1) {
            readGZTrailer(fastBufferedInputStream);
        }
        this.positionOfLastGZHeader = fastBufferedInputStream.position();
        if (fastBufferedInputStream.read(bArr, 0, 4) == -1) {
            return -1L;
        }
        if (bArr[0] != GZIP_START[0] || bArr[1] != GZIP_START[1]) {
            throw new WarcRecord.FormatException("Missing GZip magic numbers, found: " + ((int) bArr[0]) + " " + ((int) bArr[1]));
        }
        if (bArr[2] != 8) {
            throw new WarcRecord.FormatException("Unknown compression method: " + ((int) bArr[2]));
        }
        byte b = bArr[3];
        this.gzheader.mtime = readLEInt(fastBufferedInputStream);
        fastBufferedInputStream.read(bArr, 0, 2);
        this.gzheader.compressedSkipLength = -1;
        if ((b & 4) != 0) {
            short readLEShort = readLEShort(fastBufferedInputStream);
            while (true) {
                short s = readLEShort;
                if (s <= 0) {
                    break;
                }
                fastBufferedInputStream.read(bArr, 0, 2);
                short readLEShort2 = readLEShort(fastBufferedInputStream);
                if (bArr[0] == SKIP_LEN[0] && bArr[1] == SKIP_LEN[1]) {
                    GZHeader gZHeader = this.gzheader;
                    int readLEInt = readLEInt(fastBufferedInputStream);
                    gZHeader.compressedSkipLength = readLEInt;
                    this.compressedDataLengthInLastGZHeader = readLEInt;
                    this.gzheader.uncompressedSkipLength = readLEInt(fastBufferedInputStream);
                } else {
                    fastBufferedInputStream.read(bArr, 0, readLEShort2);
                }
                readLEShort = (short) (s - (readLEShort2 + 4));
            }
        }
        if (this.gzheader.compressedSkipLength < 0) {
            throw new WarcRecord.FormatException("Missing SL extra field, or negative compressed-skip-length");
        }
        if ((b & 8) != 0) {
            int i = 0;
            while (true) {
                int read = fastBufferedInputStream.read();
                if (read == 0) {
                    break;
                }
                int i2 = i;
                i++;
                bArr[i2] = (byte) read;
            }
            this.gzheader.name = ArrayUtils.subarray(bArr, 0, i);
        }
        if ((b & 16) != 0) {
            int i3 = 0;
            while (true) {
                int read2 = fastBufferedInputStream.read();
                if (read2 == 0) {
                    break;
                }
                int i4 = i3;
                i3++;
                bArr[i4] = (byte) read2;
            }
            this.gzheader.comment = ArrayUtils.subarray(bArr, 0, i3);
        }
        if ((b & 2) != 0) {
            fastBufferedInputStream.read(bArr, 0, 2);
        }
        return this.compressedDataLengthInLastGZHeader;
    }

    private void readGZTrailer(FastBufferedInputStream fastBufferedInputStream) throws IOException, WarcRecord.FormatException {
        if (this.positionOfLastGZHeader == -1) {
            return;
        }
        boolean z = false;
        if (this.uncompressedRecordStream != null) {
            long length = this.uncompressedRecordStream.length() - this.uncompressedRecordStream.position();
            if (!$assertionsDisabled && length < 0) {
                throw new AssertionError();
            }
            if (0 >= length || length >= 16) {
                LOGGER.debug("Omitting CRC check, since the last read was partial.");
            } else {
                consumeUncompressedRecord();
                z = true;
            }
        } else {
            LOGGER.debug("Omitting CRC check, since coming from a skip.");
        }
        long j = (this.positionOfLastGZHeader + this.compressedDataLengthInLastGZHeader) - 8;
        if (!$assertionsDisabled && j < fastBufferedInputStream.position()) {
            throw new AssertionError();
        }
        fastBufferedInputStream.skip(j - fastBufferedInputStream.position());
        int readLEInt = readLEInt(fastBufferedInputStream);
        if (z) {
            int value = (int) (this.crc.getValue() & (-1));
            if (readLEInt != value) {
                throw new WarcRecord.FormatException("CRC32 mismatch, expected: " + readLEInt + ", actual: " + value);
            }
            LOGGER.debug("CRC check OK.");
        }
        int readLEInt2 = readLEInt(fastBufferedInputStream);
        if (this.gzheader.uncompressedSkipLength != readLEInt2) {
            throw new WarcRecord.FormatException("Length mismatch between (warc) extra gzip fields uncompressed-skip-length (" + this.gzheader.uncompressedSkipLength + ") and ISIZE (" + readLEInt2 + ")");
        }
        this.positionOfLastGZHeader = -1L;
    }

    private void consumeUncompressedRecord() throws IOException {
        if (!$assertionsDisabled && this.uncompressedRecordStream.length() - this.uncompressedRecordStream.position() < 4) {
            throw new AssertionError();
        }
        do {
        } while (this.uncompressedRecordStream.read(new byte[UNCOMPRESSED_RECORD_STREAM_BUFFER_SIZE]) != -1);
        this.uncompressedRecordStream.skip(Long.MAX_VALUE);
    }

    private static int readLEInt(InputStream inputStream) throws IOException {
        return (inputStream.read() & 255) | ((inputStream.read() & 255) << 8) | ((inputStream.read() & 255) << 16) | ((inputStream.read() & 255) << 24);
    }

    private static short readLEShort(InputStream inputStream) throws IOException {
        return (short) (((byte) inputStream.read()) | (((byte) inputStream.read()) << 8));
    }

    private static void writeLEInt(OutputStream outputStream, int i) throws IOException {
        outputStream.write((byte) i);
        outputStream.write((byte) ((i >> 8) & 255));
        outputStream.write((byte) ((i >> 16) & 255));
        outputStream.write((byte) ((i >> 24) & 255));
    }

    private static void writeLEShort(OutputStream outputStream, short s) throws IOException {
        outputStream.write((byte) s);
        outputStream.write((byte) ((s >> 8) & 255));
    }

    static {
        $assertionsDisabled = !GZWarcRecord.class.desiredAssertionStatus();
        LOGGER = LoggerFactory.getLogger(GZWarcRecord.class);
        GZIP_START = new byte[]{31, -117, 8, 28};
        XFL_OS = new byte[]{9, -1};
        SKIP_LEN = new byte[]{115, 108};
        FIX_LEN = GZIP_START.length + 4 + XFL_OS.length + 14 + 8;
    }
}
