package org.archive.io.warc.v10;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.archive.io.ArchiveFileConstants;
import org.archive.io.ArchiveRecord;
import org.archive.io.ArchiveRecordHeader;
import org.archive.io.warc.WARCConstants;
import org.archive.util.LongWrapper;
import org.archive.util.anvl.ANVLRecord;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/io/warc/v10/WARCRecord.class */
public class WARCRecord extends ArchiveRecord implements WARCConstants {
    private static final Pattern HEADER_LINE = Pattern.compile("^WARC/([0-9]+\\.[0-9]+(?:\\.[0-9]+)?)[\\t ]+([0-9]+)[\\t ]+(request|response|warcinfo|resource|metadata|revisit|conversion)[\\t ]+([^\\t ]+)[\\t ]+([0-9]{14})[\\t ]+([^\\t ]+)[\\t ]+(.+)$");
    private Pattern WHITESPACE;

    public WARCRecord(InputStream inputStream, String str, long j) throws IOException {
        this(inputStream, str, j, true, false);
    }

    public WARCRecord(InputStream inputStream, ArchiveRecordHeader archiveRecordHeader) throws IOException {
        super(inputStream, archiveRecordHeader, 0, true, false);
        this.WHITESPACE = Pattern.compile("\\s");
    }

    public WARCRecord(InputStream inputStream, String str, long j, boolean z, boolean z2) throws IOException {
        super(inputStream, null, 0, z, z2);
        this.WHITESPACE = Pattern.compile("\\s");
        setHeader(parseHeaders(inputStream, str, j, z2));
    }

    protected ArchiveRecordHeader parseHeaders(final InputStream inputStream, String str, long j, boolean z) throws IOException {
        final HashMap hashMap = new HashMap();
        hashMap.put(ArchiveFileConstants.ABSOLUTE_OFFSET_KEY, new Long(j));
        hashMap.put(ArchiveFileConstants.READER_IDENTIFIER_FIELD_KEY, str);
        int parseHeaderLine = parseHeaderLine(inputStream, hashMap, z);
        final LongWrapper longWrapper = new LongWrapper(0L);
        parseNamedFields(new InputStream() { // from class: org.archive.io.warc.v10.WARCRecord.1
            @Override // java.io.InputStream
            public int read() throws IOException {
                int read = inputStream.read();
                if (read != -1) {
                    longWrapper.longValue++;
                }
                return read;
            }
        }, hashMap);
        final int i = (int) (parseHeaderLine + longWrapper.longValue);
        incrementPosition(i);
        return new ArchiveRecordHeader() { // from class: org.archive.io.warc.v10.WARCRecord.2
            private Map<Object, Object> fields;
            private int contentBegin;

            {
                this.fields = hashMap;
                this.contentBegin = i;
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public String getDate() {
                return (String) this.fields.get(ArchiveFileConstants.DATE_FIELD_KEY);
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public String getDigest() {
                return (String) this.fields.get(WARCConstants.NAMED_FIELD_CHECKSUM_LABEL);
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public String getReaderIdentifier() {
                return (String) this.fields.get(ArchiveFileConstants.READER_IDENTIFIER_FIELD_KEY);
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public Set getHeaderFieldKeys() {
                return this.fields.keySet();
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public Map getHeaderFields() {
                return this.fields;
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public Object getHeaderValue(String str2) {
                return this.fields.get(str2);
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public long getLength() {
                Object obj = this.fields.get("length");
                if (obj == null) {
                    return -1L;
                }
                return ((Long) obj).longValue();
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public String getMimetype() {
                return (String) this.fields.get("content-type");
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public long getOffset() {
                Object obj = this.fields.get(ArchiveFileConstants.ABSOLUTE_OFFSET_KEY);
                if (obj == null) {
                    return -1L;
                }
                return ((Long) obj).longValue();
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public String getRecordIdentifier() {
                return (String) this.fields.get(ArchiveFileConstants.RECORD_IDENTIFIER_FIELD_KEY);
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public String getUrl() {
                return (String) this.fields.get(ArchiveFileConstants.URL_FIELD_KEY);
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public String getVersion() {
                return (String) this.fields.get(ArchiveFileConstants.VERSION_FIELD_KEY);
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public int getContentBegin() {
                return this.contentBegin;
            }

            @Override // org.archive.io.ArchiveRecordHeader
            public String toString() {
                return this.fields.toString();
            }
        };
    }

    protected int parseHeaderLine(InputStream inputStream, Map<Object, Object> map, boolean z) throws IOException {
        byte[] readLine = readLine(inputStream, z);
        if (readLine.length <= 2) {
            throw new IOException("No Header Line found");
        }
        String str = new String(readLine, 0, readLine.length - 2, "ISO-8859-1");
        Matcher matcher = HEADER_LINE.matcher(str);
        if (!matcher.matches()) {
            throw new IOException("Failed parse of Header Line: " + str);
        }
        for (int i = 0; i < HEADER_FIELD_KEYS.length; i++) {
            if (i == 1) {
                map.put(HEADER_FIELD_KEYS[i], Long.valueOf(Long.parseLong(matcher.group(i + 1))));
            } else {
                map.put(HEADER_FIELD_KEYS[i], matcher.group(i + 1));
            }
        }
        return readLine.length;
    }

    protected byte[] readLine(InputStream inputStream, boolean z) throws IOException {
        boolean z2 = false;
        boolean z3 = z;
        int i = 0;
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(1024);
        int i2 = -1;
        while (!z2) {
            int i3 = i;
            i++;
            if (i3 >= 102400) {
                throw new IOException("Read 102400 bytes without finding CRLF");
            }
            int i4 = i2;
            i2 = inputStream.read();
            if (i2 == -1) {
                throw new IOException("End-Of-Stream before CRLF:\n" + new String(byteArrayOutputStream.toByteArray()));
            }
            if (isLF((char) i2) && isCR((char) i4)) {
                z2 = true;
            } else if (z3 || !Character.isWhitespace(i2)) {
                if (isCR((char) i4)) {
                    throw new IOException("CR in middle of Header:\n" + new String(byteArrayOutputStream.toByteArray()));
                }
                if (!z3) {
                    z3 = true;
                }
            }
            byteArrayOutputStream.write(i2);
        }
        return byteArrayOutputStream.toByteArray();
    }

    protected void parseNamedFields(InputStream inputStream, Map<Object, Object> map) throws IOException {
        map.putAll(ANVLRecord.load(inputStream).asMap());
    }

    public static boolean isCROrLF(char c) {
        return isCR(c) || isLF(c);
    }

    public static boolean isCR(char c) {
        return c == "\r\n".charAt(0);
    }

    public static boolean isLF(char c) {
        return c == "\r\n".charAt(1);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.archive.io.ArchiveRecord
    public String getMimetype4Cdx(ArchiveRecordHeader archiveRecordHeader) {
        return this.WHITESPACE.matcher(super.getMimetype4Cdx(archiveRecordHeader)).replaceAll("");
    }
}
