package tech.bsdb.tools;

import com.google.common.base.Strings;
import java.io.File;
import java.io.FileOutputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import tech.bsdb.io.Native;
import tech.bsdb.read.SyncReader;
import tech.bsdb.serde.Field;
import tech.bsdb.serde.ParquetSer;
import tech.bsdb.util.Common;
import tech.bsdb.write.BSDBWriter;

/* loaded from: input_file:tech/bsdb/tools/ParquetBuilder.class */
public class ParquetBuilder {
    private static final String STORED_ENCODING = "UTF-8";
    private static final String DEFAULT_CHECKSUM_BITS = "4";
    private static final String DEFAULT_PASS_CACHE_SIZE = "1024";
    private static final String DEFAULT_SAMPLE_COUNT = "10000";
    private static final String DEFAULT_DICT_SIZE = "1";
    static Logger logger = LoggerFactory.getLogger(ParquetBuilder.class);

    public static void main(String[] strArr) throws Exception {
        CommandLine parseArgs = parseArgs(strArr);
        boolean hasOption = parseArgs.hasOption("q");
        boolean hasOption2 = parseArgs.hasOption("v");
        boolean hasOption3 = parseArgs.hasOption("c");
        boolean hasOption4 = parseArgs.hasOption("z");
        boolean hasOption5 = parseArgs.hasOption("a");
        String optionValue = parseArgs.getOptionValue("i");
        String optionValue2 = parseArgs.getOptionValue("kf", (String) null);
        if (Objects.isNull(optionValue)) {
            logger.error("must specify input file.");
            System.exit(1);
        }
        if (Objects.isNull(optionValue2)) {
            logger.error("must specify parquet field name for database key.");
            System.exit(1);
        }
        String optionValue3 = parseArgs.getOptionValue("o", "./rdb/");
        File file = new File(optionValue3);
        if (!file.exists()) {
            file.mkdirs();
        } else if (file.isFile()) {
            logger.error("output directory exist but not as directory: {}", optionValue3);
            System.exit(1);
        }
        String optionValue4 = parseArgs.getOptionValue("temp", (String) null);
        File file2 = Strings.isNullOrEmpty(optionValue4) ? null : new File(optionValue4);
        if (file2 != null) {
            if (!file2.exists()) {
                file2.mkdirs();
            } else if (file2.isFile()) {
                logger.error("temp directory exist but not as directory: {}", optionValue4);
                System.exit(1);
            }
        }
        int parseInt = Integer.parseInt(parseArgs.getOptionValue("cb", DEFAULT_CHECKSUM_BITS));
        int parseInt2 = Integer.parseInt(parseArgs.getOptionValue("sc", DEFAULT_SAMPLE_COUNT));
        int parseInt3 = Integer.parseInt(parseArgs.getOptionValue("ds", DEFAULT_DICT_SIZE)) * Native.O_APPEND * Native.O_APPEND;
        long parseLong = Long.parseLong(parseArgs.getOptionValue("ps", DEFAULT_PASS_CACHE_SIZE)) * 1024 * 1024;
        int parseInt4 = Integer.parseInt(parseArgs.getOptionValue("bs", "8192"));
        AtomicLong atomicLong = new AtomicLong();
        AtomicLong atomicLong2 = new AtomicLong();
        long currentTimeMillis = System.currentTimeMillis();
        BSDBWriter bSDBWriter = new BSDBWriter(file, file2, parseInt, parseLong, hasOption3, hasOption4, parseInt4, parseInt3, hasOption5);
        Path path = new Path(optionValue);
        Configuration configuration = new Configuration();
        String optionValue5 = parseArgs.getOptionValue("nn");
        if (!Strings.isNullOrEmpty(optionValue5)) {
            configuration.set("fs.defaultFS", optionValue5);
        }
        FileSystem fileSystem = FileSystem.get(configuration);
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        if (fileStatus == null) {
            System.err.println("input file not exist:" + optionValue);
            System.exit(1);
        }
        ArrayList arrayList = new ArrayList();
        if (fileStatus.isDirectory()) {
            for (FileStatus fileStatus2 : fileSystem.listStatus(path)) {
                if (!fileStatus2.isDirectory() && fileStatus2.getPath().getName().endsWith(".parquet")) {
                    arrayList.add(fileStatus2);
                }
            }
        } else {
            arrayList.add(fileStatus);
        }
        if (arrayList.isEmpty()) {
            System.err.println("no valid input file found.");
            System.exit(1);
        }
        ParquetSer parquetSer = new ParquetSer();
        Field[] schema = parquetSer.getSchema((FileStatus) arrayList.get(0), optionValue2);
        FileOutputStream fileOutputStream = new FileOutputStream(new File(file, Common.FILE_NAME_VALUE_SCHEMA));
        try {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(fileOutputStream);
            try {
                objectOutputStream.writeObject(schema);
                objectOutputStream.close();
                fileOutputStream.close();
                int min = Math.min(8, arrayList.size());
                int i = parseInt2 / min;
                for (int i2 = 0; i2 < min; i2++) {
                    FileStatus fileStatus3 = (FileStatus) arrayList.get(i2);
                    long currentTimeMillis2 = System.currentTimeMillis();
                    try {
                        long readWithLimit = parquetSer.readWithLimit(fileStatus3, optionValue2, (bArr, bArr2) -> {
                            if ((bArr.length > 255) || (bArr2.length > 32510)) {
                                logger.warn("record too large, dropped");
                            } else {
                                bSDBWriter.sample(bArr, bArr2);
                            }
                        }, i);
                        if (!hasOption) {
                            logger.info("sampling {} rows of src file {} cost: {}", new Object[]{Long.valueOf(readWithLimit), fileStatus3, Long.valueOf(System.currentTimeMillis() - currentTimeMillis2)});
                        }
                    } catch (Throwable th) {
                        logger.error("error when sampling", th);
                        throw new RuntimeException(th);
                    }
                }
                bSDBWriter.onSampleFinished();
                Common.runParallel(Integer.parseInt(System.getProperty("bsdb.build.threads", "8")), (executorService, list) -> {
                    Iterator it2 = arrayList.iterator();
                    while (it2.hasNext()) {
                        FileStatus fileStatus4 = (FileStatus) it2.next();
                        list.add(executorService.submit(() -> {
                            long currentTimeMillis3 = System.currentTimeMillis();
                            try {
                                long read = parquetSer.read(fileStatus4, optionValue2, (bArr3, bArr4) -> {
                                    atomicLong.getAndIncrement();
                                    try {
                                        if ((bArr3.length > 255) || (bArr4.length > 32510)) {
                                            logger.warn("record too large, dropped");
                                        } else {
                                            bSDBWriter.put(bArr3, bArr4);
                                            atomicLong2.getAndIncrement();
                                        }
                                    } catch (Throwable th2) {
                                        logger.error("error when insert records", th2);
                                        throw new RuntimeException("error when inserting record.", th2);
                                    }
                                });
                                if (!hasOption) {
                                    logger.info("read {} rows from src file {} cost: {}", new Object[]{Long.valueOf(read), fileStatus4, Long.valueOf(System.currentTimeMillis() - currentTimeMillis3)});
                                }
                            } catch (Throwable th2) {
                                logger.error("error when insert records", th2);
                                throw new RuntimeException(th2);
                            }
                        }));
                    }
                }, true);
                bSDBWriter.build();
                if (!hasOption) {
                    logger.info("build cost:{}", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
                    logger.info("read: {}, write: {}", Long.valueOf(atomicLong.get()), Long.valueOf(atomicLong2.get()));
                }
                if (hasOption2) {
                    if (!hasOption) {
                        logger.info("Verifying data integrity...");
                    }
                    long currentTimeMillis3 = System.currentTimeMillis();
                    SyncReader syncReader = new SyncReader(file, false, false, false, false);
                    AtomicLong atomicLong3 = new AtomicLong(0L);
                    Common.runParallel(Runtime.getRuntime().availableProcessors(), (executorService2, list2) -> {
                        Iterator it2 = arrayList.iterator();
                        while (it2.hasNext()) {
                            FileStatus fileStatus4 = (FileStatus) it2.next();
                            list2.add(executorService2.submit(() -> {
                                try {
                                    parquetSer.read(fileStatus4, optionValue2, (bArr3, bArr4) -> {
                                        if ((bArr3.length > 255) || (bArr4.length > 32510)) {
                                            logger.warn("record too large, dropped");
                                            return;
                                        }
                                        try {
                                            byte[] asBytes = syncReader.getAsBytes(bArr3);
                                            atomicLong3.getAndIncrement();
                                            if (Common.bytesEquals(bArr4, asBytes)) {
                                                return;
                                            }
                                            logger.error("read out value not match input, verify failed.");
                                            System.exit(2);
                                        } catch (Exception e) {
                                            logger.error("error when query", e);
                                            throw new RuntimeException(e);
                                        }
                                    });
                                } catch (Throwable th2) {
                                    logger.error("error when query", th2);
                                    throw new RuntimeException(th2);
                                }
                            }));
                        }
                    }, true);
                    if (hasOption) {
                        return;
                    }
                    logger.info("Integrity check done, cost: {}", Long.valueOf(System.currentTimeMillis() - currentTimeMillis3));
                }
            } finally {
            }
        } catch (Throwable th2) {
            try {
                fileOutputStream.close();
            } catch (Throwable th3) {
                th2.addSuppressed(th3);
            }
            throw th2;
        }
    }

    private static CommandLine parseArgs(String[] strArr) {
        Options options = new Options();
        options.addOption("q", "quiet", false, "Quiet mode, disable all prints");
        options.addOption("i", "input", true, "Specify input file or directory");
        options.addOption("kf", "key-name", true, "Specify parquet field name which will use as for database key");
        options.addOption("c", "compact", false, "Use compact record layout on disk");
        options.addOption("z", "zstd", false, "Compression the db record file");
        options.addOption("o", "output", true, "Specify output directory, default to ./rdb");
        options.addOption("cb", "checksum-bits", true, "Specify checksum bit length, default to 4");
        options.addOption("v", "verify", false, "Verify integrity of generated index");
        options.addOption("a", "approximate", false, "Approximate mode, keys will not be stored, choosing proper checksum bits to meet false-positive query rate");
        options.addOption("ps", "pass-size", true, "Memory cache size in MB used when generation index, in approximate mode memory cost doubles");
        options.addOption("bs", "compress-block-size", true, "Block size for compression, default 1024");
        options.addOption("sc", "sample-count", true, "Sample record count, default 10000");
        options.addOption("ds", "dictionary-size", true, "Shared dictionary size in MB, default 1");
        options.addOption("temp", "temp-dir", true, "temp directory to store hash keys");
        options.addOption("nn", "name-node", true, "name node url, e.g. hdfs://localhost:9000");
        try {
            return new DefaultParser().parse(options, strArr);
        } catch (ParseException e) {
            logger.error("Error parsing input args: ", e);
            System.exit(1);
            return null;
        }
    }
}
