package com.google.cloud.dataflow.sdk.io;

import com.google.cloud.dataflow.sdk.annotations.Experimental;
import com.google.cloud.dataflow.sdk.coders.AvroCoder;
import com.google.cloud.dataflow.sdk.io.BlockBasedSource;
import com.google.cloud.dataflow.sdk.io.Read;
import com.google.cloud.dataflow.sdk.options.PipelineOptions;
import com.google.cloud.dataflow.sdk.repackaged.com.google.common.base.Preconditions;
import com.google.cloud.dataflow.sdk.util.AvroUtils;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.ReadableByteChannel;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileConstants;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.reflect.ReflectData;
import org.apache.avro.reflect.ReflectDatumReader;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;

@Experimental(Experimental.Kind.SOURCE_SINK)
/* loaded from: input_file:com/google/cloud/dataflow/sdk/io/AvroSource.class */
public class AvroSource<T> extends BlockBasedSource<T> {
    static final long DEFAULT_MIN_BUNDLE_SIZE = 128000;
    private final String readSchemaString;
    private final String fileSchemaString;
    private final Class<T> type;
    private final String codec;
    private final byte[] syncMarker;
    private transient AvroCoder<T> coder;
    private transient Schema fileSchema;
    private transient Schema readSchema;

    /* JADX INFO: Access modifiers changed from: package-private */
    @Experimental(Experimental.Kind.SOURCE_SINK)
    /* loaded from: input_file:com/google/cloud/dataflow/sdk/io/AvroSource$AvroBlock.class */
    public static class AvroBlock<T> extends BlockBasedSource.Block<T> {
        private final long numRecords;
        private T currentRecord;
        private long currentRecordIndex = 0;
        private final DatumReader<T> reader;
        private final BinaryDecoder decoder;

        private static InputStream decodeAsInputStream(byte[] bArr, String str) throws IOException {
            String str2;
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bArr);
            boolean z = -1;
            switch (str.hashCode()) {
                case -898026669:
                    if (str.equals(DataFileConstants.SNAPPY_CODEC)) {
                        z = false;
                        break;
                    }
                    break;
                case 3842:
                    if (str.equals("xz")) {
                        z = 2;
                        break;
                    }
                    break;
                case 3392903:
                    if (str.equals(DataFileConstants.NULL_CODEC)) {
                        z = 4;
                        break;
                    }
                    break;
                case 94243987:
                    if (str.equals("bzip2")) {
                        z = 3;
                        break;
                    }
                    break;
                case 1545112619:
                    if (str.equals("deflate")) {
                        z = true;
                        break;
                    }
                    break;
            }
            switch (z) {
                case false:
                    return new SnappyCompressorInputStream(byteArrayInputStream);
                case true:
                    return new InflaterInputStream(byteArrayInputStream, new Inflater(true));
                case true:
                    return new XZCompressorInputStream(byteArrayInputStream);
                case true:
                    return new BZip2CompressorInputStream(byteArrayInputStream);
                case true:
                    return byteArrayInputStream;
                default:
                    String valueOf = String.valueOf(str);
                    if (valueOf.length() != 0) {
                        str2 = "Unsupported codec: ".concat(valueOf);
                    } else {
                        str2 = r3;
                        String str3 = new String("Unsupported codec: ");
                    }
                    throw new IllegalArgumentException(str2);
            }
        }

        AvroBlock(byte[] bArr, long j, AvroSource<T> avroSource) throws IOException {
            this.numRecords = j;
            this.reader = avroSource.createDatumReader();
            this.decoder = DecoderFactory.get().binaryDecoder(decodeAsInputStream(bArr, avroSource.getCodec()), (BinaryDecoder) null);
        }

        @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource.Block
        public T getCurrentRecord() {
            return this.currentRecord;
        }

        @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource.Block
        public boolean readNextRecord() throws IOException {
            if (this.currentRecordIndex >= this.numRecords) {
                return false;
            }
            this.currentRecord = this.reader.read(null, this.decoder);
            this.currentRecordIndex++;
            return true;
        }

        @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource.Block
        public double getFractionOfBlockConsumed() {
            return this.currentRecordIndex / this.numRecords;
        }
    }

    @Experimental(Experimental.Kind.SOURCE_SINK)
    /* loaded from: input_file:com/google/cloud/dataflow/sdk/io/AvroSource$AvroReader.class */
    public static class AvroReader<T> extends BlockBasedSource.BlockBasedReader<T> {
        private AvroBlock<T> currentBlock;
        private long currentBlockOffset;
        private long currentBlockSizeBytes;
        private long currentOffset;
        private PushbackInputStream stream;
        private final byte[] readBuffer;
        private BinaryDecoder decoder;

        /* JADX INFO: Access modifiers changed from: package-private */
        /* loaded from: input_file:com/google/cloud/dataflow/sdk/io/AvroSource$AvroReader$Seeker.class */
        public static class Seeker {
            private byte[] marker;
            private byte[] searchBuffer;
            private int available = 0;

            public Seeker(byte[] bArr) {
                this.marker = bArr;
                this.searchBuffer = new byte[bArr.length];
            }

            public int find(byte[] bArr, int i) {
                for (int i2 = 0; i2 < i; i2++) {
                    System.arraycopy(this.searchBuffer, 1, this.searchBuffer, 0, this.searchBuffer.length - 1);
                    this.searchBuffer[this.searchBuffer.length - 1] = bArr[i2];
                    this.available = Math.min(this.available + 1, this.searchBuffer.length);
                    if (ByteBuffer.wrap(this.searchBuffer, this.searchBuffer.length - this.available, this.available).equals(ByteBuffer.wrap(this.marker))) {
                        this.available = 0;
                        return i2;
                    }
                }
                return -1;
            }
        }

        public AvroReader(AvroSource<T> avroSource) {
            super(avroSource);
            this.currentBlockOffset = 0L;
            this.currentBlockSizeBytes = 0L;
            this.currentOffset = 0L;
            this.readBuffer = new byte[20];
        }

        @Override // com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader, com.google.cloud.dataflow.sdk.io.OffsetBasedSource.OffsetBasedReader, com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader, com.google.cloud.dataflow.sdk.io.Source.Reader
        public synchronized AvroSource<T> getCurrentSource() {
            return (AvroSource) super.getCurrentSource();
        }

        @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource.BlockBasedReader
        public boolean readNextBlock() throws IOException {
            this.currentOffset += advancePastNextSyncMarker(this.stream, getCurrentSource().getSyncMarker());
            this.currentBlockOffset = this.currentOffset - getCurrentSource().getSyncMarker().length;
            int read = this.stream.read(this.readBuffer);
            if (read <= 0) {
                return false;
            }
            this.decoder = DecoderFactory.get().binaryDecoder(this.readBuffer, this.decoder);
            long readLong = this.decoder.readLong();
            long readLong2 = this.decoder.readLong();
            int length = this.readBuffer.length - this.decoder.inputStream().available();
            this.stream.unread(this.readBuffer, length, read - length);
            byte[] bArr = new byte[(int) readLong2];
            this.stream.read(bArr);
            this.currentBlock = new AvroBlock<>(bArr, readLong, getCurrentSource());
            this.currentBlockSizeBytes = readLong2;
            this.currentOffset += length + readLong2;
            return true;
        }

        @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource.BlockBasedReader
        public AvroBlock<T> getCurrentBlock() {
            return this.currentBlock;
        }

        @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource.BlockBasedReader
        public long getCurrentBlockOffset() {
            return this.currentBlockOffset;
        }

        @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource.BlockBasedReader
        public long getCurrentBlockSize() {
            return this.currentBlockSizeBytes;
        }

        private PushbackInputStream createStream(ReadableByteChannel readableByteChannel) {
            return new PushbackInputStream(Channels.newInputStream(readableByteChannel), getCurrentSource().getSyncMarker().length + this.readBuffer.length);
        }

        /* JADX INFO: Access modifiers changed from: protected */
        @Override // com.google.cloud.dataflow.sdk.io.FileBasedSource.FileBasedReader
        public void startReading(ReadableByteChannel readableByteChannel) throws IOException {
            this.stream = createStream(readableByteChannel);
            this.currentOffset = getCurrentSource().getStartOffset();
        }

        static long advancePastNextSyncMarker(PushbackInputStream pushbackInputStream, byte[] bArr) throws IOException {
            int read;
            Seeker seeker = new Seeker(bArr);
            byte[] bArr2 = new byte[bArr.length];
            long j = 0;
            int i = -1;
            do {
                read = pushbackInputStream.read(bArr2);
                if (read >= 0) {
                    i = seeker.find(bArr2, read);
                    j += read;
                }
                if (i >= 0) {
                    break;
                }
            } while (read > 0);
            if (i >= 0) {
                pushbackInputStream.unread(bArr2, i + 1, read - (i + 1));
                j -= read - (i + 1);
            }
            return j;
        }

        @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource.BlockBasedReader, com.google.cloud.dataflow.sdk.io.OffsetBasedSource.OffsetBasedReader, com.google.cloud.dataflow.sdk.io.BoundedSource.BoundedReader
        public /* bridge */ /* synthetic */ Double getFractionConsumed() {
            return super.getFractionConsumed();
        }
    }

    public static <T> Read.Bounded<T> readFromFileWithClass(String str, Class<T> cls) {
        return Read.from(new AvroSource(str, DEFAULT_MIN_BUNDLE_SIZE, ReflectData.get().getSchema(cls).toString(), cls, null, null));
    }

    public static AvroSource<GenericRecord> from(String str) {
        return new AvroSource<>(str, DEFAULT_MIN_BUNDLE_SIZE, null, GenericRecord.class, null, null);
    }

    public AvroSource<GenericRecord> withSchema(String str) {
        return new AvroSource<>(getFileOrPatternSpec(), getMinBundleSize(), str, GenericRecord.class, this.codec, this.syncMarker);
    }

    public AvroSource<GenericRecord> withSchema(Schema schema) {
        return new AvroSource<>(getFileOrPatternSpec(), getMinBundleSize(), schema.toString(), GenericRecord.class, this.codec, this.syncMarker);
    }

    public <X> AvroSource<X> withSchema(Class<X> cls) {
        return new AvroSource<>(getFileOrPatternSpec(), getMinBundleSize(), ReflectData.get().getSchema(cls).toString(), cls, this.codec, this.syncMarker);
    }

    public AvroSource<T> withMinBundleSize(long j) {
        return new AvroSource<>(getFileOrPatternSpec(), j, this.readSchemaString, this.type, this.codec, this.syncMarker);
    }

    private AvroSource(String str, long j, String str2, Class<T> cls, String str3, byte[] bArr) {
        super(str, j);
        this.coder = null;
        this.readSchemaString = str2;
        this.codec = str3;
        this.syncMarker = bArr;
        this.type = cls;
        this.fileSchemaString = null;
    }

    private AvroSource(String str, long j, long j2, long j3, String str2, Class<T> cls, String str3, byte[] bArr, String str4) {
        super(str, j, j2, j3);
        this.coder = null;
        this.readSchemaString = str2;
        this.codec = str3;
        this.syncMarker = bArr;
        this.type = cls;
        this.fileSchemaString = str4;
    }

    @Override // com.google.cloud.dataflow.sdk.io.FileBasedSource, com.google.cloud.dataflow.sdk.io.OffsetBasedSource, com.google.cloud.dataflow.sdk.io.Source
    public void validate() {
        super.validate();
    }

    @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource, com.google.cloud.dataflow.sdk.io.FileBasedSource
    public AvroSource<T> createForSubrangeOfFile(String str, long j, long j2) {
        String str2;
        byte[] bArr = this.syncMarker;
        String str3 = this.codec;
        String str4 = this.readSchemaString;
        String str5 = this.fileSchemaString;
        if (str3 == null || bArr == null || str5 == null) {
            try {
                Preconditions.checkArgument(FileBasedSource.expandFilePattern(str).size() <= 1, "More than 1 file matched %s");
                AvroUtils.AvroMetadata readMetadataFromFile = AvroUtils.readMetadataFromFile(str);
                str3 = readMetadataFromFile.getCodec();
                bArr = readMetadataFromFile.getSyncMarker();
                str5 = readMetadataFromFile.getSchemaString();
                if (str4 == null) {
                    str4 = readMetadataFromFile.getSchemaString();
                }
            } catch (IOException e) {
                String valueOf = String.valueOf(str);
                if (valueOf.length() != 0) {
                    str2 = "Error reading metadata from file ".concat(valueOf);
                } else {
                    str2 = r3;
                    String str6 = new String("Error reading metadata from file ");
                }
                throw new RuntimeException(str2, e);
            }
        }
        return new AvroSource<>(str, getMinBundleSize(), j, j2, str4, this.type, str3, bArr, str5);
    }

    @Override // com.google.cloud.dataflow.sdk.io.BlockBasedSource, com.google.cloud.dataflow.sdk.io.FileBasedSource
    public AvroReader<T> createSingleFileReader(PipelineOptions pipelineOptions) {
        return new AvroReader<>(this);
    }

    @Override // com.google.cloud.dataflow.sdk.io.BoundedSource
    public boolean producesSortedKeys(PipelineOptions pipelineOptions) throws Exception {
        return false;
    }

    @Override // com.google.cloud.dataflow.sdk.io.Source
    public AvroCoder<T> getDefaultOutputCoder() {
        if (this.coder == null) {
            this.coder = AvroCoder.of(this.type, new Schema.Parser().parse(this.readSchemaString));
        }
        return this.coder;
    }

    public String getSchema() {
        return this.readSchemaString;
    }

    private Schema getReadSchema() {
        if (this.readSchemaString == null) {
            return null;
        }
        if (this.readSchema == null) {
            this.readSchema = new Schema.Parser().parse(this.readSchemaString);
        }
        return this.readSchema;
    }

    private Schema getFileSchema() {
        if (this.fileSchemaString == null) {
            return null;
        }
        if (this.fileSchema == null) {
            this.fileSchema = new Schema.Parser().parse(this.fileSchemaString);
        }
        return this.fileSchema;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public byte[] getSyncMarker() {
        return this.syncMarker;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String getCodec() {
        return this.codec;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public DatumReader<T> createDatumReader() {
        Schema readSchema = getReadSchema();
        Schema fileSchema = getFileSchema();
        Preconditions.checkNotNull(readSchema, "No read schema has been initialized for source %s", this);
        Preconditions.checkNotNull(fileSchema, "No file schema has been initialized for source %s", this);
        return this.type == GenericRecord.class ? new GenericDatumReader(fileSchema, readSchema) : new ReflectDatumReader(fileSchema, readSchema);
    }
}
