package org.kitesdk.data.spi.filesystem;

import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import javax.annotation.Nullable;
import org.apache.avro.generic.IndexedRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.Formats;
import org.kitesdk.data.PartitionStrategy;
import org.kitesdk.data.RefinableView;
import org.kitesdk.data.impl.Accessor;
import org.kitesdk.data.spi.AbstractDataset;
import org.kitesdk.data.spi.Compatibility;
import org.kitesdk.data.spi.Constraints;
import org.kitesdk.data.spi.FieldPartitioner;
import org.kitesdk.data.spi.InputFormatAccessor;
import org.kitesdk.data.spi.LastModifiedAccessor;
import org.kitesdk.data.spi.Mergeable;
import org.kitesdk.data.spi.PartitionKey;
import org.kitesdk.data.spi.PartitionListener;
import org.kitesdk.data.spi.PartitionedDataset;
import org.kitesdk.data.spi.SizeAccessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/kitesdk/data/spi/filesystem/FileSystemDataset.class */
public class FileSystemDataset<E> extends AbstractDataset<E> implements Mergeable<FileSystemDataset<E>>, InputFormatAccessor<E>, LastModifiedAccessor, PartitionedDataset<E>, SizeAccessor {
    private static final Logger LOG = LoggerFactory.getLogger(FileSystemDataset.class);
    private final FileSystem fileSystem;
    private final Path directory;
    private final String namespace;
    private final String name;
    private final DatasetDescriptor descriptor;
    private PartitionKey partitionKey;
    private final URI uri;
    private final PartitionStrategy partitionStrategy;
    private final PartitionListener partitionListener;
    private final FileSystemView<E> unbounded;
    private final PathConversion convert;

    /* loaded from: input_file:org/kitesdk/data/spi/filesystem/FileSystemDataset$Builder.class */
    public static class Builder<E> {
        private Configuration conf;
        private FileSystem fileSystem;
        private Path directory;
        private String namespace;
        private String name;
        private DatasetDescriptor descriptor;
        private Class<E> type;
        private URI uri;
        private PartitionKey partitionKey;
        private PartitionListener partitionListener;

        public Builder<E> namespace(String str) {
            this.namespace = str;
            return this;
        }

        public Builder<E> name(String str) {
            this.name = str;
            return this;
        }

        protected Builder<E> fileSystem(FileSystem fileSystem) {
            this.fileSystem = fileSystem;
            return this;
        }

        public Builder<E> configuration(Configuration configuration) {
            this.conf = configuration;
            return this;
        }

        public Builder<E> descriptor(DatasetDescriptor datasetDescriptor) {
            Preconditions.checkArgument(datasetDescriptor.getLocation() != null, "Dataset location cannot be null");
            this.descriptor = datasetDescriptor;
            return this;
        }

        public Builder<E> type(Class<E> cls) {
            Preconditions.checkNotNull(cls, "Type cannot be null");
            this.type = cls;
            return this;
        }

        public Builder<E> uri(URI uri) {
            this.uri = uri;
            return this;
        }

        /* JADX INFO: Access modifiers changed from: package-private */
        public Builder<E> partitionKey(@Nullable PartitionKey partitionKey) {
            this.partitionKey = partitionKey;
            return this;
        }

        /* JADX INFO: Access modifiers changed from: package-private */
        public Builder<E> partitionListener(@Nullable PartitionListener partitionListener) {
            this.partitionListener = partitionListener;
            return this;
        }

        public FileSystemDataset<E> build() {
            Preconditions.checkState(this.namespace != null, "No namespace defined");
            Preconditions.checkState(this.name != null, "No dataset name defined");
            Preconditions.checkState(this.descriptor != null, "No dataset descriptor defined");
            Preconditions.checkState((this.conf == null && this.fileSystem == null) ? false : true, "Configuration or FileSystem must be set");
            Preconditions.checkState(this.type != null, "No type specified");
            this.directory = new Path(this.descriptor.getLocation());
            if (this.fileSystem == null) {
                try {
                    this.fileSystem = this.directory.getFileSystem(this.conf);
                } catch (IOException e) {
                    throw new DatasetIOException("Cannot access FileSystem", e);
                }
            }
            return new FileSystemDataset<>(this.fileSystem, this.fileSystem.makeQualified(this.directory), this.namespace, this.name, this.descriptor, this.uri, this.partitionKey, this.partitionListener, this.type);
        }
    }

    FileSystemDataset(FileSystem fileSystem, Path path, String str, String str2, DatasetDescriptor datasetDescriptor, URI uri, @Nullable PartitionListener partitionListener, Class<E> cls) {
        super(cls, datasetDescriptor.getSchema());
        if (Formats.PARQUET.equals(datasetDescriptor.getFormat())) {
            Preconditions.checkArgument(IndexedRecord.class.isAssignableFrom(cls) || cls == Object.class, "Parquet only supports generic and specific data models, type parameter must implement IndexedRecord");
        }
        this.fileSystem = fileSystem;
        this.directory = path;
        this.namespace = str;
        this.name = str2;
        this.descriptor = datasetDescriptor;
        this.partitionStrategy = datasetDescriptor.isPartitioned() ? datasetDescriptor.getPartitionStrategy() : null;
        this.partitionListener = partitionListener;
        this.convert = new PathConversion(datasetDescriptor.getSchema());
        this.uri = uri;
        this.unbounded = new FileSystemView<>(this, cls);
        this.partitionKey = null;
    }

    FileSystemDataset(FileSystem fileSystem, Path path, String str, String str2, DatasetDescriptor datasetDescriptor, URI uri, @Nullable PartitionKey partitionKey, @Nullable PartitionListener partitionListener, Class<E> cls) {
        this(fileSystem, path, str, str2, datasetDescriptor, uri, partitionListener, cls);
        this.partitionKey = partitionKey;
    }

    @Override // org.kitesdk.data.Dataset, org.kitesdk.data.View
    public URI getUri() {
        return this.uri;
    }

    @Override // org.kitesdk.data.Dataset
    public String getNamespace() {
        return this.namespace;
    }

    @Override // org.kitesdk.data.Dataset
    public String getName() {
        return this.name;
    }

    @Override // org.kitesdk.data.Dataset
    public DatasetDescriptor getDescriptor() {
        return this.descriptor;
    }

    PartitionKey getPartitionKey() {
        return this.partitionKey;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public FileSystem getFileSystem() {
        return this.fileSystem;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public Path getDirectory() {
        return this.directory;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public PartitionListener getPartitionListener() {
        return this.partitionListener;
    }

    @Override // org.kitesdk.data.spi.AbstractDataset, org.kitesdk.data.View
    public boolean deleteAll() {
        return this.unbounded.deleteAllUnsafe();
    }

    public PathIterator pathIterator() {
        return this.unbounded.pathIterator();
    }

    public Iterator<Path> dirIterator() {
        return this.unbounded.dirIterator();
    }

    @Override // org.kitesdk.data.spi.AbstractDataset
    protected RefinableView<E> asRefinableView() {
        return this.unbounded;
    }

    @Override // org.kitesdk.data.spi.AbstractDataset
    public FileSystemView<E> filter(Constraints constraints) {
        return this.unbounded.filter(constraints);
    }

    @Override // org.kitesdk.data.spi.PartitionedDataset
    @Nullable
    public PartitionedDataset<E> getPartition(PartitionKey partitionKey, boolean z) {
        Preconditions.checkState(this.descriptor.isPartitioned(), "Attempt to get a partition on a non-partitioned dataset (name:%s)", new Object[]{this.name});
        LOG.debug("Loading partition for key {}, allowCreate:{}", new Object[]{partitionKey, Boolean.valueOf(z)});
        Path makeQualified = this.fileSystem.makeQualified(toDirectoryName(this.directory, partitionKey));
        try {
            if (!this.fileSystem.exists(makeQualified)) {
                if (!z) {
                    return null;
                }
                this.fileSystem.mkdirs(makeQualified);
                if (this.partitionListener != null) {
                    this.partitionListener.partitionAdded(this.namespace, this.name, toRelativeDirectory(partitionKey).toString());
                }
            }
            return new Builder().namespace(this.namespace).name(this.name).fileSystem(this.fileSystem).uri(this.uri).descriptor(new DatasetDescriptor.Builder(this.descriptor).location(makeQualified).partitionStrategy(Accessor.getDefault().getSubpartitionStrategy(this.partitionStrategy, partitionKey.getLength())).build()).type(this.type).partitionKey(partitionKey).partitionListener(this.partitionListener).build();
        } catch (IOException e) {
            throw new DatasetIOException("Unable to locate or create dataset partition directory " + makeQualified, e);
        }
    }

    @Override // org.kitesdk.data.spi.PartitionedDataset
    public void dropPartition(PartitionKey partitionKey) {
        Preconditions.checkState(this.descriptor.isPartitioned(), "Attempt to drop a partition on a non-partitioned dataset (name:%s)", new Object[]{this.name});
        Preconditions.checkNotNull(partitionKey, "Partition key may not be null");
        LOG.debug("Dropping partition with key:{} dataset:{}", partitionKey, this.name);
        Path directoryName = toDirectoryName(this.directory, partitionKey);
        try {
            if (this.fileSystem.delete(directoryName, true)) {
            } else {
                throw new IOException("Partition directory " + directoryName + " for key " + partitionKey + " does not exist");
            }
        } catch (IOException e) {
            throw new DatasetIOException("Unable to locate or drop dataset partition directory " + directoryName, e);
        }
    }

    @Override // org.kitesdk.data.spi.PartitionedDataset
    public Iterable<PartitionedDataset<E>> getPartitions() {
        Preconditions.checkState(this.descriptor.isPartitioned(), "Attempt to get partitions on a non-partitioned dataset (name:%s)", new Object[]{this.name});
        ArrayList newArrayList = Lists.newArrayList();
        try {
            for (FileStatus fileStatus : this.fileSystem.listStatus(this.directory, PathFilters.notHidden())) {
                Path makeQualified = this.fileSystem.makeQualified(fileStatus.getPath());
                newArrayList.add(new Builder().namespace(this.namespace).name(this.name).fileSystem(this.fileSystem).uri(this.uri).descriptor(new DatasetDescriptor.Builder(this.descriptor).location(makeQualified).partitionStrategy(Accessor.getDefault().getSubpartitionStrategy(this.partitionStrategy, 1)).build()).type(this.type).partitionKey(keyFromDirectory(makeQualified.getName())).partitionListener(this.partitionListener).build());
            }
            return newArrayList;
        } catch (IOException e) {
            throw new DatasetIOException("Unable to list partition directory for directory " + this.directory, e);
        }
    }

    public String toString() {
        return Objects.toStringHelper(this).add("name", this.name).add("descriptor", this.descriptor).add("directory", this.directory).add("dataDirectory", this.directory).add("partitionKey", this.partitionKey).toString();
    }

    @Override // org.kitesdk.data.spi.Mergeable
    public void merge(FileSystemDataset<E> fileSystemDataset) {
        Compatibility.checkCompatible(fileSystemDataset.getDescriptor(), this.descriptor);
        HashSet newHashSet = Sets.newHashSet();
        Iterator<Path> it = fileSystemDataset.pathIterator().iterator();
        while (it.hasNext()) {
            Path next = it.next();
            URI relativize = fileSystemDataset.getDirectory().toUri().relativize(next.toUri());
            Path path = relativize.toString().isEmpty() ? this.directory : new Path(this.directory, new Path(relativize));
            Path parent = path.getParent();
            try {
                if (!this.fileSystem.exists(parent)) {
                    this.fileSystem.mkdirs(parent);
                }
                LOG.debug("Renaming {} to {}", next, path);
                if (!this.fileSystem.rename(next, path)) {
                    throw new IOException("Dataset merge failed during rename of " + next + " to " + path);
                }
                if (this.descriptor.isPartitioned() && this.partitionListener != null) {
                    String path2 = parent.toString();
                    if (!newHashSet.contains(path2)) {
                        this.partitionListener.partitionAdded(this.namespace, this.name, path2);
                        newHashSet.add(path2);
                    }
                }
            } catch (IOException e) {
                throw new DatasetIOException("Dataset merge failed", e);
            }
        }
    }

    @Override // org.kitesdk.data.spi.InputFormatAccessor
    public InputFormat<E, Void> getInputFormat(Configuration configuration) {
        return new FileSystemViewKeyInputFormat(this, configuration);
    }

    private Path toDirectoryName(@Nullable Path path, PartitionKey partitionKey) {
        Path path2 = path;
        for (int i = 0; i < partitionKey.getLength(); i++) {
            FieldPartitioner fieldPartitioner = this.partitionStrategy.getFieldPartitioners().get(i);
            path2 = path2 != null ? new Path(path2, PathConversion.dirnameForValue(fieldPartitioner, partitionKey.get(i))) : new Path(PathConversion.dirnameForValue(fieldPartitioner, partitionKey.get(i)));
        }
        return path2;
    }

    private Path toRelativeDirectory(PartitionKey partitionKey) {
        return toDirectoryName(null, partitionKey);
    }

    /* JADX WARN: Multi-variable type inference failed */
    private PartitionKey keyFromDirectory(String str) {
        FieldPartitioner fieldPartitioner = this.partitionStrategy.getFieldPartitioners().get(0);
        ArrayList newArrayList = Lists.newArrayList();
        if (this.partitionKey != null) {
            newArrayList.addAll(this.partitionKey.getValues());
        }
        newArrayList.add(this.convert.valueForDirname(fieldPartitioner, str));
        return new PartitionKey(newArrayList.toArray());
    }

    /* JADX WARN: Multi-variable type inference failed */
    public PartitionKey keyFromDirectory(Path path) {
        Path path2 = null;
        URI relativize = this.directory.toUri().relativize(path.toUri());
        if (!relativize.toString().isEmpty()) {
            path2 = new Path(relativize);
            Preconditions.checkState(!path2.equals(path), "Partition directory %s is not relative to dataset directory %s", new Object[]{path, this.directory});
        }
        ArrayList newArrayList = Lists.newArrayList();
        while (path2 != null && !path2.getName().equals("")) {
            newArrayList.add(0, path2.getName());
            path2 = path2.getParent();
        }
        List<FieldPartitioner> fieldPartitioners = this.partitionStrategy.getFieldPartitioners();
        Preconditions.checkState(newArrayList.size() <= fieldPartitioners.size(), "Number of components in partition directory %s (%s) exceeds number of field partitioners %s", new Object[]{path, newArrayList, this.partitionStrategy});
        ArrayList newArrayList2 = Lists.newArrayList();
        for (int i = 0; i < newArrayList.size(); i++) {
            newArrayList2.add(this.convert.valueForDirname(fieldPartitioners.get(i), (String) newArrayList.get(i)));
        }
        if (this.partitionKey != null) {
            newArrayList2.addAll(0, this.partitionKey.getValues());
        }
        return new PartitionKey(newArrayList2.toArray());
    }

    @Override // org.kitesdk.data.spi.SizeAccessor
    public long getSize() {
        long j = 0;
        Iterator<Path> dirIterator = dirIterator();
        while (dirIterator.hasNext()) {
            Path next = dirIterator.next();
            try {
                for (FileStatus fileStatus : this.fileSystem.listStatus(next)) {
                    j += fileStatus.getLen();
                }
            } catch (IOException e) {
                throw new DatasetIOException("Cannot find size of " + next, e);
            }
        }
        return j;
    }

    @Override // org.kitesdk.data.spi.LastModifiedAccessor
    public long getLastModified() {
        long j = -1;
        Iterator<Path> dirIterator = dirIterator();
        while (dirIterator.hasNext()) {
            Path next = dirIterator.next();
            try {
                for (FileStatus fileStatus : this.fileSystem.listStatus(next)) {
                    if (j < fileStatus.getModificationTime()) {
                        j = fileStatus.getModificationTime();
                    }
                }
            } catch (IOException e) {
                throw new DatasetIOException("Cannot find last modified time of of " + next, e);
            }
        }
        return j;
    }

    @Override // org.kitesdk.data.View
    public boolean isEmpty() {
        return this.unbounded.isEmpty();
    }
}
