package com.twitter.hraven.etl;

import com.twitter.hraven.util.BatchUtil;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.TimeZone;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/* loaded from: input_file:com/twitter/hraven/etl/JobFilePartitioner.class */
public class JobFilePartitioner extends Configured implements Tool {
    static final String NAME = JobFilePartitioner.class.getSimpleName();
    private static Log LOG = LogFactory.getLog(JobFilePartitioner.class);
    public static final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy");
    public static final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("MM");
    public static final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("dd");
    Configuration myConf;
    String input;
    boolean skipExisting = true;
    boolean moveFiles = true;
    int maXretention = Integer.MAX_VALUE;
    FileSystem hdfs;
    Path outputPath;

    private static CommandLine parseArgs(String[] strArr) throws ParseException {
        Options options = new Options();
        Option option = new Option("i", "input", true, "input directory as hdfs path, or local as file://");
        option.setArgName("input-path");
        option.setRequired(true);
        options.addOption(option);
        Option option2 = new Option("o", "output", true, "output directory");
        option2.setArgName("input-path");
        option2.setRequired(true);
        options.addOption(option2);
        Option option3 = new Option("s", "skipExisting", false, "skip existing files. Cannot be used together with m for move.");
        option3.setRequired(false);
        options.addOption(option3);
        Option option4 = new Option("x", "maXretention", true, "The maximum number of the most recent files to retain in the input directory after processing. Can be used by HDFS input paths only. Mutually exclusive with s (move), but can be used in combination with s (skipExisting)");
        option4.setRequired(false);
        options.addOption(option4);
        Option option5 = new Option("m", "move", false, "move all files rather than copying.Delete source if target already exists. Can be used with HDFS input paths only.  Mutually exlusive with s (skipExisting)");
        option5.setRequired(false);
        options.addOption(option5);
        options.addOption("d", "debug", false, "switch on DEBUG log level");
        option5.setRequired(false);
        options.addOption(option5);
        CommandLine commandLine = null;
        try {
            commandLine = new PosixParser().parse(options, strArr);
        } catch (Exception e) {
            System.err.println("ERROR: " + e.getMessage() + "\n");
            new HelpFormatter().printHelp(NAME + " ", options, true);
            System.exit(-1);
        }
        return commandLine;
    }

    public int run(String[] strArr) throws Exception {
        this.myConf = getConf();
        this.hdfs = FileSystem.get(this.myConf);
        CommandLine parseArgs = parseArgs(new GenericOptionsParser(this.myConf, strArr).getRemainingArgs());
        this.input = parseArgs.getOptionValue("i");
        LOG.info("input=" + this.input);
        String optionValue = parseArgs.getOptionValue("o");
        LOG.info("output=" + optionValue);
        this.skipExisting = parseArgs.hasOption("s");
        LOG.info("skipExisting=" + this.skipExisting);
        this.moveFiles = parseArgs.hasOption("m");
        LOG.info("moveFiles=" + this.moveFiles);
        if (this.skipExisting && this.moveFiles) {
            throw new IllegalArgumentException("Cannot use both options skipExisting and move simultaneously.");
        }
        if (parseArgs.hasOption("x")) {
            try {
                this.maXretention = Integer.parseInt(parseArgs.getOptionValue("x"));
                if (this.maXretention < 0) {
                    throw new IllegalArgumentException("Cannot retain less than 0 files. Specified maXretention option -x is: " + parseArgs.getOptionValue("x"));
                }
                LOG.info("maXretention=" + this.maXretention);
                if (this.moveFiles) {
                    throw new IllegalArgumentException("Cannot use both options maXretention and move simultaneously.");
                }
            } catch (NumberFormatException e) {
                throw new IllegalArgumentException("maXretention option -x is is not a valid number: " + parseArgs.getOptionValue("x"), e);
            }
        } else {
            this.maXretention = Integer.MAX_VALUE;
        }
        this.outputPath = new Path(optionValue);
        FileStatus fileStatus = this.hdfs.getFileStatus(this.outputPath);
        if (!fileStatus.isDir()) {
            throw new IOException("Output is not a directory" + fileStatus.getPath().getName());
        }
        Path path = new Path(this.input);
        URI uri = path.toUri();
        String scheme = uri.getScheme();
        LOG.info("input scheme is: " + scheme);
        if (scheme == null || this.hdfs.getUri().getScheme().equals(scheme)) {
            processHDFSSources(path);
        } else {
            if (!scheme.equals("file")) {
                throw new IllegalArgumentException("Cannot process files from this URI scheme: " + scheme);
            }
            if (this.moveFiles) {
                throw new IllegalArgumentException("Cannot move files that are not already in hdfs. Input is not HDFS: " + this.input);
            }
            processPlainFileSources(uri);
        }
        FileSystem.Statistics statistics = FileSystem.getStatistics(this.outputPath.toUri().getScheme(), this.hdfs.getClass());
        if (statistics == null) {
            return 0;
        }
        LOG.info("HDFS bytes read: " + statistics.getBytesRead());
        LOG.info("HDFS bytes written: " + statistics.getBytesWritten());
        LOG.info("HDFS read ops: " + statistics.getReadOps());
        System.out.println("HDFS large read ops: " + statistics.getLargeReadOps());
        LOG.info("HDFS write ops: " + statistics.getWriteOps());
        return 0;
    }

    private void processHDFSSources(Path path) throws IOException {
        if (!this.hdfs.getFileStatus(path).isDir()) {
            throw new IOException("Input is not a directory in HDFS: " + this.input);
        }
        JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter = new JobFileModifiedRangePathFilter(this.myConf, 0L);
        LOG.info("Listing / filtering (" + this.hdfs.getContentSummary(path).getFileCount() + ") files in: " + path);
        FileStatus[] listFiles = FileLister.listFiles(true, this.hdfs, path, jobFileModifiedRangePathFilter);
        LOG.info("Sorting " + listFiles.length + " job files.");
        Arrays.sort(listFiles, new FileStatusModificationComparator());
        int i = 0;
        for (int i2 = 0; i2 < listFiles.length; i2++) {
            try {
                processHDFSSource(this.hdfs, listFiles[i2], this.outputPath, this.myConf, this.skipExisting, BatchUtil.shouldRetain(i2, this.maXretention, listFiles.length));
                i++;
                if (i2 % 1000 == 0) {
                    LOG.info("Processed " + i2 + " files.");
                }
            } catch (Throwable th) {
                LOG.info("Processed " + i + " files.");
                throw th;
            }
        }
        LOG.info("Processed " + i + " files.");
    }

    private void processPlainFileSources(URI uri) throws IOException {
        LOG.info("Scheme specific part is: " + uri.getSchemeSpecificPart());
        File file = new File(uri.getSchemeSpecificPart());
        if (!file.isDirectory()) {
            throw new IOException("Input is not a regular directory: " + this.input);
        }
        int i = 0;
        try {
            for (File file2 : file.listFiles()) {
                if (file2.isFile()) {
                    processPlainFile(this.hdfs, file2, this.outputPath, this.skipExisting);
                    i++;
                    if (i % 1000 == 0) {
                        LOG.info("Processed " + i + " files.");
                    }
                }
            }
            LOG.info("Processed " + i + " files.");
        } catch (Throwable th) {
            LOG.info("Processed " + i + " files.");
            throw th;
        }
    }

    private Path getTargetDirectory(FileSystem fileSystem, Path path, long j) throws IOException {
        String format = YEAR_FORMAT.format(new Date(j));
        Path path2 = new Path(new Path(new Path(path, format), MONTH_FORMAT.format(new Date(j))), DAY_FORMAT.format(new Date(j)));
        if (!fileSystem.exists(path2)) {
            if (!fileSystem.mkdirs(path2)) {
                throw new IOException("Unable to create target directory with date: " + path2.getName());
            }
            LOG.info("Created: " + path2.toString());
        }
        return path2;
    }

    private void processPlainFile(FileSystem fileSystem, File file, Path path, boolean z) throws IOException {
        Path targetDirectory = getTargetDirectory(fileSystem, path, file.lastModified());
        boolean z2 = true;
        Path path2 = new Path(file.getPath());
        if (z && fileSystem.exists(new Path(targetDirectory, path2.getName()))) {
            z2 = false;
        }
        if (z2) {
            fileSystem.copyFromLocalFile(path2, targetDirectory);
        }
    }

    private void processHDFSSource(FileSystem fileSystem, FileStatus fileStatus, Path path, Configuration configuration, boolean z, boolean z2) throws IOException {
        Path targetDirectory = getTargetDirectory(fileSystem, path, fileStatus.getModificationTime());
        boolean exists = fileSystem.exists(new Path(targetDirectory, fileStatus.getPath().getName()));
        if (this.moveFiles || !z2) {
            if (exists) {
                fileSystem.delete(fileStatus.getPath(), false);
                return;
            } else {
                fileSystem.rename(fileStatus.getPath(), targetDirectory);
                return;
            }
        }
        if (exists && z) {
            return;
        }
        copy(fileSystem, fileStatus, configuration, targetDirectory);
    }

    private void copy(FileSystem fileSystem, FileStatus fileStatus, Configuration configuration, Path path) throws IOException {
        long nanoTime = System.nanoTime();
        FileUtil.copy(fileSystem, fileStatus.getPath(), fileSystem, path, false, true, configuration);
        long nanoTime2 = (System.nanoTime() - nanoTime) / 1000000;
        if (nanoTime2 > 3000) {
            LOG.warn("It took " + (nanoTime2 / 1000) + " seconds to copy " + fileStatus.getPath().getName() + " of " + fileStatus.getLen() + " bytes.");
        }
    }

    public static void main(String[] strArr) {
        try {
            ToolRunner.run(new JobFilePartitioner(), strArr);
        } catch (Exception e) {
            LOG.error("Problem running: " + NAME, e);
        }
    }

    static {
        TimeZone timeZone = TimeZone.getTimeZone("UTC");
        YEAR_FORMAT.setTimeZone(timeZone);
        MONTH_FORMAT.setTimeZone(timeZone);
        DAY_FORMAT.setTimeZone(timeZone);
    }
}
