package com.github.bottomlessarchive.warc.service;

import com.github.bottomlessarchive.warc.service.content.domain.WarcContentBlock;
import com.github.bottomlessarchive.warc.service.http.HttpParser;
import com.github.bottomlessarchive.warc.service.record.WarcRecordFactory;
import com.github.bottomlessarchive.warc.service.record.domain.WarcRecord;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Optional;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.input.BoundedInputStream;
import org.apache.http.HttpException;
import org.apache.http.message.HeaderGroup;

/* loaded from: input_file:com/github/bottomlessarchive/warc/service/WarcReader.class */
public class WarcReader {
    public static final Charset DEFAULT_CHARSET = StandardCharsets.ISO_8859_1;
    private final WarcRecordFactory warcRecordFactory;
    private final InputStream input;
    private final Charset charset;
    private BoundedInputStream lastRecordStream;

    public WarcReader(URL url) {
        this(url, DEFAULT_CHARSET);
    }

    public WarcReader(URL url, Charset charset) {
        this(url, charset, true);
    }

    public WarcReader(URL url, Charset charset, boolean z) {
        this(buildConnection(url), charset, z);
    }

    public WarcReader(URLConnection uRLConnection, Charset charset, boolean z) {
        this(z ? new AvailableInputStream(openConnection(uRLConnection)) : openConnection(uRLConnection), charset, z);
    }

    public WarcReader(InputStream inputStream) {
        this(inputStream, DEFAULT_CHARSET);
    }

    public WarcReader(InputStream inputStream, Charset charset) {
        this(inputStream, charset, true);
    }

    public WarcReader(InputStream inputStream, Charset charset, boolean z) {
        this.warcRecordFactory = new WarcRecordFactory();
        if (z) {
            try {
                this.input = new GZIPInputStream(inputStream);
            } catch (IOException e) {
                throw new WarcNetworkException("Unable to open WARC input stream!", e);
            }
        } else {
            this.input = inputStream;
        }
        this.charset = charset;
    }

    public Optional<WarcRecord<WarcContentBlock>> readRecord() {
        if (this.lastRecordStream != null) {
            try {
                this.lastRecordStream.skip(Long.MAX_VALUE);
                HttpParser.readLine(this.input, this.charset);
                HttpParser.readLine(this.input, this.charset);
            } catch (IOException e) {
                throw new WarcParsingException("Unable to parse the next WARC record!", e);
            }
        }
        return parse();
    }

    protected Optional<WarcRecord<WarcContentBlock>> parse() {
        try {
            String readLine = HttpParser.readLine(this.input, this.charset);
            if (readLine == null) {
                return Optional.empty();
            }
            if (!readLine.toLowerCase().startsWith("warc/")) {
                throw new WarcFormatException("Warc version is missing");
            }
            HeaderGroup headerGroup = new HeaderGroup();
            try {
                headerGroup.setHeaders(HttpParser.parseHeaders(this.input, this.charset));
                try {
                    this.lastRecordStream = new BoundedInputStream(this.input, Long.parseLong(headerGroup.getFirstHeader("Content-Length").getValue()));
                    return Optional.of(this.warcRecordFactory.createWarcRecord(headerGroup, this.lastRecordStream));
                } catch (NumberFormatException e) {
                    throw new WarcFormatException("Cannot parse warc Content-Length");
                }
            } catch (IOException | HttpException e2) {
                throw new WarcFormatException("Cannot parse warc headers");
            }
        } catch (IOException e3) {
            throw new WarcFormatException("Illegal warc format");
        }
    }

    private static URLConnection buildConnection(URL url) {
        try {
            URLConnection openConnection = url.openConnection();
            openConnection.setConnectTimeout(120000);
            openConnection.setReadTimeout(120000);
            return openConnection;
        } catch (IOException e) {
            throw new WarcNetworkException("Unable to open WARC input stream!", e);
        }
    }

    private static InputStream openConnection(URLConnection uRLConnection) {
        try {
            return uRLConnection.getInputStream();
        } catch (IOException e) {
            throw new WarcNetworkException("Unable to open WARC input stream!", e);
        }
    }
}
