package com.twitter.hraven.etl;

import com.twitter.hraven.datasource.ProcessingException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

/* loaded from: input_file:com/twitter/hraven/etl/FileLister.class */
public class FileLister {
    private static final Log LOG = LogFactory.getLog(FileLister.class);

    private static void traverseDirs(List<FileStatus> list, FileSystem fileSystem, Path path, JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter) throws IOException {
        for (FileStatus fileStatus : fileSystem.listStatus(path)) {
            if (fileStatus.isDir()) {
                traverseDirs(list, fileSystem, fileStatus.getPath(), jobFileModifiedRangePathFilter);
            } else if (jobFileModifiedRangePathFilter.accept(fileStatus.getPath())) {
                list.add(fileStatus);
            }
        }
    }

    public static FileStatus[] listFiles(boolean z, FileSystem fileSystem, Path path, JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter) throws IOException {
        if (!z) {
            return fileSystem.listStatus(path, jobFileModifiedRangePathFilter);
        }
        ArrayList arrayList = new ArrayList();
        traverseDirs(arrayList, fileSystem, path, jobFileModifiedRangePathFilter);
        return (FileStatus[]) arrayList.toArray(new FileStatus[arrayList.size()]);
    }

    public static FileStatus[] getListFilesToProcess(long j, boolean z, FileSystem fileSystem, Path path, JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter) throws IOException {
        LOG.info(" in getListFilesToProcess maxFileSize=" + j + " inputPath= " + path.toUri());
        FileStatus[] listFiles = listFiles(z, fileSystem, path, jobFileModifiedRangePathFilter);
        if (listFiles != null) {
            return pruneFileListBySize(j, listFiles, fileSystem, path);
        }
        LOG.info(" No files found, orig list returning 0");
        return new FileStatus[0];
    }

    static FileStatus[] pruneFileListBySize(long j, FileStatus[] fileStatusArr, FileSystem fileSystem, Path path) {
        LOG.info("Pruning orig list  of size " + fileStatusArr.length + " for source" + path.toUri());
        ArrayList arrayList = new ArrayList();
        HashSet hashSet = new HashSet();
        for (int i = 0; i < fileStatusArr.length; i++) {
            long len = fileStatusArr[i].getLen();
            if (len <= j) {
                arrayList.add(fileStatusArr[i]);
            } else {
                Path path2 = fileStatusArr[i].getPath();
                LOG.info("In getListFilesToProcess filesize " + len + " has exceeded maxFileSize " + j + " for " + path2.toUri());
                hashSet.add(getJobIdFromPath(path2));
            }
        }
        if (arrayList.size() == 0) {
            LOG.info("Found no files worth processing. Returning 0 sized array");
            return new FileStatus[0];
        }
        ListIterator listIterator = arrayList.listIterator();
        while (listIterator.hasNext() && hashSet.size() != 0) {
            Path path3 = ((FileStatus) listIterator.next()).getPath();
            String jobIdFromPath = getJobIdFromPath(path3);
            if (hashSet.contains(jobIdFromPath)) {
                LOG.info("Removing from prunedList " + path3.toUri());
                listIterator.remove();
                hashSet.remove(jobIdFromPath);
            }
        }
        return (FileStatus[]) arrayList.toArray(new FileStatus[arrayList.size()]);
    }

    static String getJobIdFromPath(Path path) {
        String jobid = new JobFile(path.getName()).getJobid();
        if (jobid == null) {
            throw new ProcessingException("job id is null for " + path.toUri());
        }
        return jobid;
    }
}
