package org.kitesdk.data.spi.filesystem;

import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.Files;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.kitesdk.data.CompressionType;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetException;
import org.kitesdk.data.DatasetReader;
import org.kitesdk.data.Format;
import org.kitesdk.data.Formats;
import org.kitesdk.data.MiniDFSTest;
import org.kitesdk.data.PartitionStrategy;
import org.kitesdk.data.TestHelpers;
import org.kitesdk.data.ValidationException;
import org.kitesdk.data.spi.FieldPartitioner;
import org.kitesdk.data.spi.PartitionKey;
import org.kitesdk.data.spi.PartitionedDataset;
import org.kitesdk.data.spi.filesystem.DatasetTestUtilities;
import org.kitesdk.data.spi.filesystem.FileSystemDataset;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@RunWith(Parameterized.class)
/* loaded from: input_file:org/kitesdk/data/spi/filesystem/TestFileSystemDataset.class */
public class TestFileSystemDataset extends MiniDFSTest {
    private static final Logger LOG = LoggerFactory.getLogger(TestFileSystemDataset.class);
    private final Format format;
    private final FileSystem fileSystem;
    private final CompressionType compressionType;
    private Path testDirectory;

    @Parameterized.Parameters
    public static Collection<Object[]> data() throws IOException {
        MiniDFSTest.setupFS();
        return Arrays.asList(new Object[]{Formats.AVRO, getDFS(), CompressionType.Uncompressed}, new Object[]{Formats.AVRO, getDFS(), CompressionType.Snappy}, new Object[]{Formats.AVRO, getDFS(), CompressionType.Deflate}, new Object[]{Formats.AVRO, getDFS(), CompressionType.Bzip2}, new Object[]{Formats.AVRO, getFS(), CompressionType.Uncompressed}, new Object[]{Formats.AVRO, getFS(), CompressionType.Snappy}, new Object[]{Formats.AVRO, getFS(), CompressionType.Deflate}, new Object[]{Formats.AVRO, getFS(), CompressionType.Bzip2}, new Object[]{Formats.PARQUET, getDFS(), CompressionType.Uncompressed}, new Object[]{Formats.PARQUET, getDFS(), CompressionType.Snappy}, new Object[]{Formats.PARQUET, getDFS(), CompressionType.Deflate}, new Object[]{Formats.PARQUET, getFS(), CompressionType.Uncompressed}, new Object[]{Formats.PARQUET, getFS(), CompressionType.Snappy}, new Object[]{Formats.PARQUET, getFS(), CompressionType.Deflate});
    }

    public TestFileSystemDataset(Format format, FileSystem fileSystem, CompressionType compressionType) {
        this.format = format;
        this.fileSystem = fileSystem;
        this.compressionType = compressionType;
    }

    @Before
    public void setUp() throws IOException {
        this.testDirectory = this.fileSystem.makeQualified(new Path(Files.createTempDir().getAbsolutePath()));
    }

    @After
    public void tearDown() throws IOException {
        this.fileSystem.delete(this.testDirectory, true);
    }

    @Test
    public void testWriteAndRead() throws IOException {
        FileSystemDataset build = new FileSystemDataset.Builder().namespace("ns").name("test").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schemaUri(DatasetTestUtilities.USER_SCHEMA_URL).format(this.format).compressionType(this.compressionType).location(this.testDirectory).build()).type(GenericData.Record.class).build();
        Assert.assertFalse("Dataset is not partitioned", build.getDescriptor().isPartitioned());
        DatasetTestUtilities.writeTestUsers(build, 10);
        DatasetTestUtilities.checkTestUsers((Dataset<GenericData.Record>) build, 10);
    }

    @Test
    public void testPartitionedWriterSingle() throws IOException {
        PartitionStrategy build = new PartitionStrategy.Builder().hash("username", 2).build();
        FileSystemDataset<GenericData.Record> build2 = new FileSystemDataset.Builder().namespace("ns").name("partitioned-users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).compressionType(this.compressionType).location(this.testDirectory).partitionStrategy(build).build()).type(GenericData.Record.class).build();
        Assert.assertTrue("Dataset is partitioned", build2.getDescriptor().isPartitioned());
        Assert.assertEquals(build, build2.getDescriptor().getPartitionStrategy());
        DatasetTestUtilities.writeTestUsers(build2, 10);
        Assert.assertTrue("Partitioned directory 0 exists", this.fileSystem.exists(new Path(this.testDirectory, "username_hash=0")));
        Assert.assertTrue("Partitioned directory 1 exists", this.fileSystem.exists(new Path(this.testDirectory, "username_hash=1")));
        DatasetTestUtilities.checkTestUsers((Dataset<GenericData.Record>) build2, 10);
        PartitionKey partitionKey = new PartitionKey(new Object[]{0});
        PartitionKey partitionKey2 = new PartitionKey(new Object[]{1});
        Assert.assertEquals(10L, readTestUsersInPartition(build2, partitionKey, null) + readTestUsersInPartition(build2, partitionKey2, null));
        DatasetTestUtilities.testPartitionKeysAreEqual(build2, partitionKey, partitionKey2);
        HashSet newHashSet = Sets.newHashSet();
        Iterator it = build2.getPartitions().iterator();
        while (it.hasNext()) {
            Assert.assertFalse("Partitions should not have further partitions", ((Dataset) it.next()).getDescriptor().isPartitioned());
            newHashSet.addAll(DatasetTestUtilities.materialize(build2));
        }
        DatasetTestUtilities.checkTestUsers(newHashSet, 10);
    }

    @Test
    public void testPartitionedWriterDouble() throws IOException {
        PartitionStrategy build = new PartitionStrategy.Builder().hash("username", 2).hash("email", 3).build();
        FileSystemDataset<GenericData.Record> build2 = new FileSystemDataset.Builder().namespace("ns").name("partitioned-users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).compressionType(this.compressionType).location(this.testDirectory).partitionStrategy(build).build()).type(GenericData.Record.class).build();
        Assert.assertTrue("Dataset is partitioned", build2.getDescriptor().isPartitioned());
        Assert.assertEquals(build, build2.getDescriptor().getPartitionStrategy());
        DatasetTestUtilities.writeTestUsers(build2, 10);
        DatasetTestUtilities.checkTestUsers((Dataset<GenericData.Record>) build2, 10);
        PartitionKey partitionKey = new PartitionKey(new Object[]{0});
        PartitionKey partitionKey2 = new PartitionKey(new Object[]{1});
        Assert.assertEquals(10L, readTestUsersInPartition(build2, partitionKey, "email_hash") + readTestUsersInPartition(build2, partitionKey, "email_hash"));
        int i = 0;
        for (int i2 = 0; i2 < 2; i2++) {
            for (int i3 = 0; i3 < 3; i3++) {
                String str = "username_hash=" + i2 + "/email_hash=" + i3;
                Assert.assertTrue("Partitioned directory " + str + " exists", this.fileSystem.exists(new Path(this.testDirectory, str)));
                i += readTestUsersInPartition(build2, new PartitionKey(new Object[]{Integer.valueOf(i2), Integer.valueOf(i3)}), null);
            }
        }
        Assert.assertEquals(10L, i);
        DatasetTestUtilities.testPartitionKeysAreEqual(build2, partitionKey, partitionKey2);
        HashSet newHashSet = Sets.newHashSet();
        Iterator it = build2.getPartitions().iterator();
        while (it.hasNext()) {
            Assert.assertTrue("Partitions should have further partitions", ((Dataset) it.next()).getDescriptor().isPartitioned());
            newHashSet.addAll(DatasetTestUtilities.materialize(build2));
        }
        DatasetTestUtilities.checkTestUsers(newHashSet, 10);
    }

    @Test
    public void testGetPartitionReturnsNullIfNoAutoCreate() throws IOException {
        Assert.assertNull(new FileSystemDataset.Builder().namespace("ns").name("partitioned-users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).location(this.testDirectory).partitionStrategy(new PartitionStrategy.Builder().hash("username", 2).build()).build()).type(GenericData.Record.class).build().getPartition(new PartitionKey(new Object[]{1}), false));
    }

    @Test
    public void testWriteToSubpartition() throws IOException {
        FileSystemDataset<GenericData.Record> build = new FileSystemDataset.Builder().namespace("ns").name("partitioned-users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).compressionType(this.compressionType).location(this.testDirectory).partitionStrategy(new PartitionStrategy.Builder().hash("username", "username_part", 2).hash("email", 3).build()).build()).type(GenericData.Record.class).build();
        PartitionKey partitionKey = new PartitionKey(new Object[]{1});
        FileSystemDataset partition = build.getPartition(partitionKey, true);
        Assert.assertEquals(partitionKey, partition.getPartitionKey());
        DatasetTestUtilities.writeTestUsers(partition, 1);
        Assert.assertTrue("Partitioned directory exists", this.fileSystem.exists(new Path(this.testDirectory, "username_part=1/email_hash=2")));
        Assert.assertEquals(1L, readTestUsersInPartition(build, partitionKey, "email_hash"));
    }

    @Test
    public void testDropPartition() throws IOException {
        FileSystemDataset build = new FileSystemDataset.Builder().namespace("ns").name("partitioned-users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).compressionType(this.compressionType).location(this.testDirectory).partitionStrategy(new PartitionStrategy.Builder().hash("username", 2).build()).build()).type(GenericData.Record.class).build();
        DatasetTestUtilities.writeTestUsers(build, 10);
        Assert.assertTrue(this.fileSystem.isDirectory(new Path(this.testDirectory, "username_hash=0")));
        Assert.assertTrue(this.fileSystem.isDirectory(new Path(this.testDirectory, "username_hash=1")));
        build.dropPartition(new PartitionKey(new Object[]{0}));
        Assert.assertFalse(this.fileSystem.isDirectory(new Path(this.testDirectory, "username_hash=0")));
        build.dropPartition(new PartitionKey(new Object[]{1}));
        Assert.assertFalse(this.fileSystem.isDirectory(new Path(this.testDirectory, "username_hash=1")));
        DatasetException datasetException = null;
        try {
            build.dropPartition(new PartitionKey(new Object[]{0}));
        } catch (DatasetException e) {
            datasetException = e;
        }
        Assert.assertNotNull(datasetException);
    }

    @Test
    public void testMerge() throws IOException {
        PartitionStrategy build = new PartitionStrategy.Builder().hash("username", 2).build();
        FileSystemDataset build2 = new FileSystemDataset.Builder().namespace("ns").name("partitioned-users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).compressionType(this.compressionType).location(this.testDirectory).partitionStrategy(build).build()).type(GenericData.Record.class).build();
        DatasetTestUtilities.writeTestUsers(build2, 10);
        DatasetTestUtilities.checkTestUsers((Dataset<GenericData.Record>) build2, 10);
        FileSystemDataset build3 = new FileSystemDataset.Builder().namespace("ns").name("partitioned-users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).compressionType(this.compressionType).location(this.fileSystem.makeQualified(new Path(Files.createTempDir().getAbsolutePath()))).partitionStrategy(build).build()).type(GenericData.Record.class).build();
        DatasetTestUtilities.writeTestUsers(build3, 5, 10);
        DatasetTestUtilities.checkTestUsers((Dataset<GenericData.Record>) build3, 5, 10);
        build2.merge(build3);
        DatasetTestUtilities.checkTestUsers((Dataset<GenericData.Record>) build3, 0);
        DatasetTestUtilities.checkTestUsers((Dataset<GenericData.Record>) build2, 15);
    }

    @Test(expected = ValidationException.class)
    public void testCannotMergeDatasetsWithDifferentFormats() throws IOException {
        new FileSystemDataset.Builder().namespace("ns").name("users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(Formats.AVRO).location(this.testDirectory).build()).type(GenericData.Record.class).build().merge(new FileSystemDataset.Builder().namespace("ns").name("users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(Formats.PARQUET).location(this.testDirectory).build()).type(GenericData.Record.class).build());
    }

    @Test(expected = ValidationException.class)
    public void testCannotMergeDatasetsWithDifferentPartitionStrategies() throws IOException {
        new FileSystemDataset.Builder().namespace("ns").name("users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).location(this.testDirectory).partitionStrategy(new PartitionStrategy.Builder().hash("username", 2).build()).build()).type(GenericData.Record.class).build().merge(new FileSystemDataset.Builder().namespace("ns").name("users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).location(this.testDirectory).partitionStrategy(new PartitionStrategy.Builder().hash("username", 2).hash("email", 3).build()).build()).type(GenericData.Record.class).build());
    }

    @Test(expected = ValidationException.class)
    public void testCannotMergeDatasetsWithDifferentSchemas() throws IOException {
        new FileSystemDataset.Builder().namespace("ns").name("users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.STRING_SCHEMA).location(this.testDirectory).build()).type(GenericData.Record.class).build().merge(new FileSystemDataset.Builder().namespace("ns").name("users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).location(this.testDirectory).build()).type(GenericData.Record.class).build());
    }

    @Test
    public void testPathIterator_Directory() {
        ArrayList newArrayList = Lists.newArrayList(new FileSystemDataset.Builder().namespace("ns").name("users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).compressionType(this.compressionType).location(this.testDirectory).build()).type(GenericData.Record.class).build().dirIterator());
        Assert.assertEquals("dirIterator for non-partitioned dataset should yield a single path.", 1L, newArrayList.size());
        Assert.assertEquals("dirIterator should yield absolute paths.", this.testDirectory, newArrayList.get(0));
    }

    @Test
    public void testPathIterator_Partition_Directory() {
        PartitionStrategy build = new PartitionStrategy.Builder().hash("username", 2).hash("email", 3).build();
        final FileSystemDataset build2 = new FileSystemDataset.Builder().namespace("ns").name("partitioned-users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).compressionType(this.compressionType).location(this.testDirectory).partitionStrategy(build).build()).type(GenericData.Record.class).build();
        Assert.assertTrue("Dataset is partitioned", build2.getDescriptor().isPartitioned());
        Assert.assertEquals(build, build2.getDescriptor().getPartitionStrategy());
        DatasetTestUtilities.writeTestUsers(build2, 10);
        DatasetTestUtilities.checkTestUsers((Dataset<GenericData.Record>) build2, 10);
        ArrayList newArrayList = Lists.newArrayList(build2.dirIterator());
        Assert.assertEquals(6L, newArrayList.size());
        Assert.assertTrue("dirIterator should yield absolute paths.", ((Path) newArrayList.get(0)).isAbsolute());
        ArrayList newArrayList2 = Lists.newArrayList(build2.getPartition(new PartitionKey(new Object[]{1, 2}), false).dirIterator());
        Assert.assertEquals(1L, newArrayList2.size());
        final Path path = (Path) newArrayList2.get(0);
        Assert.assertTrue("dirIterator should yield absolute paths.", path.isAbsolute());
        Assert.assertEquals(new PartitionKey(new Object[]{1, 2}), build2.keyFromDirectory(path));
        Assert.assertEquals(new PartitionKey(new Object[]{1}), build2.keyFromDirectory(path.getParent()));
        Assert.assertEquals(new PartitionKey(new Object[0]), build2.keyFromDirectory(path.getParent().getParent()));
        TestHelpers.assertThrows("Path with too many components", (Class<? extends Exception>) IllegalStateException.class, new Runnable() { // from class: org.kitesdk.data.spi.filesystem.TestFileSystemDataset.1
            @Override // java.lang.Runnable
            public void run() {
                build2.keyFromDirectory(new Path(path, "extra_dir"));
            }
        });
        TestHelpers.assertThrows("Non-relative path", (Class<? extends Exception>) IllegalStateException.class, new Runnable() { // from class: org.kitesdk.data.spi.filesystem.TestFileSystemDataset.2
            @Override // java.lang.Runnable
            public void run() {
                build2.keyFromDirectory(new Path("hdfs://different_host/"));
            }
        });
    }

    @Test
    public void testDeleteAllWithoutPartitions() {
        FileSystemDataset build = new FileSystemDataset.Builder().namespace("ns").name("users").configuration(getConfiguration()).descriptor(new DatasetDescriptor.Builder().schema(DatasetTestUtilities.USER_SCHEMA).format(this.format).location(this.testDirectory).build()).type(GenericData.Record.class).build();
        DatasetTestUtilities.writeTestUsers(build, 10);
        Assert.assertTrue(build.deleteAll());
        DatasetTestUtilities.checkReaderBehavior(build.newReader(), 0, (DatasetTestUtilities.RecordValidator) null);
    }

    private int readTestUsersInPartition(FileSystemDataset<GenericData.Record> fileSystemDataset, PartitionKey partitionKey, String str) {
        int i = 0;
        DatasetReader<GenericData.Record> datasetReader = null;
        try {
            PartitionedDataset partition = fileSystemDataset.getPartition(partitionKey, false);
            if (str != null) {
                List fieldPartitioners = partition.getDescriptor().getPartitionStrategy().getFieldPartitioners();
                Assert.assertEquals(1L, fieldPartitioners.size());
                Assert.assertEquals(str, ((FieldPartitioner) fieldPartitioners.get(0)).getName());
            }
            datasetReader = partition.newReader();
            for (GenericData.Record record : datasetReader) {
                Assert.assertEquals(record.toString(), partitionKey.get(0), Integer.valueOf((record.get("username").hashCode() & Integer.MAX_VALUE) % 2));
                if (partitionKey.getLength() > 1) {
                    Assert.assertEquals(partitionKey.get(1), Integer.valueOf((record.get("email").hashCode() & Integer.MAX_VALUE) % 3));
                }
                i++;
            }
            if (datasetReader != null) {
                datasetReader.close();
            }
            return i;
        } catch (Throwable th) {
            if (datasetReader != null) {
                datasetReader.close();
            }
            throw th;
        }
    }
}
