package water.parser;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import water.DKV;
import water.H2O;
import water.Iced;
import water.Key;
import water.MRTask;
import water.api.schemas3.ParseSetupV3;
import water.exceptions.H2OIllegalArgumentException;
import water.fvec.ByteVec;
import water.fvec.FileVec;
import water.fvec.Frame;
import water.fvec.UploadFileVec;
import water.fvec.Vec;
import water.parser.ParseDataset;
import water.parser.ParseWriter;
import water.parser.Parser;
import water.util.ArrayUtils;
import water.util.FileUtils;
import water.util.Log;

/* loaded from: input_file:water/parser/ParseSetup.class */
public class ParseSetup extends Iced {
    public static final byte GUESS_SEP = -1;
    public static final int NO_HEADER = -1;
    public static final int GUESS_HEADER = 0;
    public static final int HAS_HEADER = 1;
    public static final int GUESS_COL_CNT = -1;
    ParserInfo _parse_type;
    byte _separator;
    boolean _single_quotes;
    int _check_header;
    int _number_columns;
    String[] _column_names;
    byte[] _column_types;
    String[][] _domains;
    String[][] _na_strings;
    String[][] _data;
    String[] _fileNames;
    public ParseWriter.ParseErr[] _errs;
    public int _chunk_size;
    PreviewParseWriter _column_previews;

    /* loaded from: input_file:water/parser/ParseSetup$GuessSetupTsk.class */
    public static class GuessSetupTsk extends MRTask<GuessSetupTsk> {
        final ParseSetup _userSetup;
        boolean _empty = true;
        public ParseSetup _gblSetup;
        public long _totalParseSize;
        public long _maxLineLength;
        String _file;
        static final /* synthetic */ boolean $assertionsDisabled;

        public GuessSetupTsk(ParseSetup parseSetup) {
            this._userSetup = parseSetup;
        }

        @Override // water.MRTask
        public void map(Key key) {
            this._file = key.toString();
            Iced get = DKV.getGet(key);
            if (get == null) {
                throw new H2OIllegalArgumentException("Missing data", "Did not find any data under key " + key);
            }
            ByteVec byteVec = (ByteVec) (get instanceof ByteVec ? get : ((Frame) get).vecs()[0]);
            byte[] firstUnzippedBytes = ZipUtil.getFirstUnzippedBytes(byteVec);
            if (firstUnzippedBytes != null && firstUnzippedBytes.length > 0) {
                this._empty = false;
                if (ZipUtil.decompressionRatio(byteVec) > 1.0d) {
                    this._totalParseSize = ((float) this._totalParseSize) + (((float) byteVec.length()) * r0);
                } else {
                    this._totalParseSize += byteVec.length();
                }
                ParseSetup.checkEncoding(firstUnzippedBytes);
                this._maxLineLength = ParseSetup.maxLineLength(firstUnzippedBytes);
                if (this._maxLineLength == -1) {
                    throw new H2OIllegalArgumentException("The first 4MB of the data don't contain any line breaks. Cannot parse.");
                }
                try {
                    this._gblSetup = ParseSetup.guessSetup(byteVec, firstUnzippedBytes, this._userSetup);
                    for (ParseWriter.ParseErr parseErr : this._gblSetup._errs) {
                        parseErr._byteOffset += parseErr._cidx * Parser.StreamData.bufSz;
                        parseErr._cidx = 0;
                        parseErr._file = this._file;
                    }
                } catch (ParseDataset.H2OParseException e) {
                    throw e.resetMsg(e.getMessage() + " for " + key);
                }
            }
            this._gblSetup.setFileName(FileUtils.keyToFileName(key));
        }

        @Override // water.MRTask
        public void reduce(GuessSetupTsk guessSetupTsk) {
            if (guessSetupTsk._empty) {
                return;
            }
            if (this._gblSetup != null) {
                this._gblSetup = mergeSetups(this._gblSetup, guessSetupTsk._gblSetup, this._file, guessSetupTsk._file);
                this._totalParseSize += guessSetupTsk._totalParseSize;
                this._maxLineLength = Math.max(this._maxLineLength, guessSetupTsk._maxLineLength);
            } else {
                this._empty = false;
                this._gblSetup = guessSetupTsk._gblSetup;
                if (!$assertionsDisabled && this._gblSetup == null) {
                    throw new AssertionError();
                }
            }
        }

        @Override // water.MRTask
        public void postGlobal() {
            if (this._gblSetup._column_previews != null && !this._gblSetup._parse_type.equals(DefaultParserProviders.ARFF_INFO)) {
                this._gblSetup._column_types = this._gblSetup._column_previews.guessTypes();
                if (this._userSetup._na_strings == null) {
                    this._gblSetup._na_strings = this._gblSetup._column_previews.guessNAStrings(this._gblSetup._column_types);
                } else {
                    this._gblSetup._na_strings = this._userSetup._na_strings;
                }
            }
            for (ParseWriter.ParseErr parseErr : this._gblSetup._errs) {
                Log.warn("ParseSetup: " + parseErr.toString());
            }
        }

        private ParseSetup mergeSetups(ParseSetup parseSetup, ParseSetup parseSetup2, String str, String str2) {
            if (parseSetup == null) {
                return parseSetup2;
            }
            parseSetup._check_header = unifyCheckHeader(parseSetup._check_header, parseSetup2._check_header);
            parseSetup._separator = unifyColumnSeparators(parseSetup._separator, parseSetup2._separator);
            if (!parseSetup._parse_type.equals(DefaultParserProviders.ARFF_INFO) || !parseSetup2._parse_type.equals(DefaultParserProviders.CSV_INFO)) {
                if (parseSetup._parse_type.equals(DefaultParserProviders.CSV_INFO) && parseSetup2._parse_type.equals(DefaultParserProviders.ARFF_INFO)) {
                    parseSetup._parse_type = DefaultParserProviders.ARFF_INFO;
                    parseSetup._column_types = parseSetup2._column_types;
                } else {
                    if (!parseSetup.isCompatible(parseSetup2)) {
                        throw new ParseDataset.H2OParseException("File type mismatch. Cannot parse files " + parseSetup.file() + " and " + parseSetup2.file() + " of type " + parseSetup._parse_type.name() + " and " + parseSetup2._parse_type.name() + " as one dataset.");
                    }
                    parseSetup._column_previews = PreviewParseWriter.unifyColumnPreviews(parseSetup._column_previews, parseSetup2._column_previews);
                }
            }
            parseSetup._column_names = unifyColumnNames(parseSetup._column_names, parseSetup2._column_names);
            parseSetup._number_columns = parseSetup._parse_type.equals(DefaultParserProviders.CSV_INFO) ? Math.max(parseSetup._number_columns, parseSetup2._number_columns) : unifyColumnCount(parseSetup._number_columns, parseSetup2._number_columns, parseSetup, str, str2);
            if (parseSetup._data.length < 10) {
                int length = parseSetup._data.length;
                int min = Math.min(10, (length + parseSetup2._data.length) - 1);
                parseSetup._data = (String[][]) Arrays.copyOf(parseSetup._data, min);
                System.arraycopy(parseSetup2._data, 1, parseSetup._data, length, min - length);
            }
            parseSetup._errs = (ParseWriter.ParseErr[]) ArrayUtils.append(parseSetup._errs, parseSetup2._errs);
            parseSetup._fileNames = ArrayUtils.append(parseSetup._fileNames, parseSetup2._fileNames);
            if (parseSetup._errs.length > 20) {
                parseSetup._errs = (ParseWriter.ParseErr[]) Arrays.copyOf(parseSetup._errs, 20);
            }
            return parseSetup;
        }

        private static int unifyCheckHeader(int i, int i2) {
            if (i == 0 || i2 == 0) {
                throw new ParseDataset.H2OParseException("Unable to determine header on a file. Not expected.");
            }
            return (i == 1 || i2 == 1) ? 1 : -1;
        }

        private static byte unifyColumnSeparators(byte b, byte b2) {
            if (b == b2) {
                return b;
            }
            if (b == -1) {
                return b2;
            }
            if (b2 == -1) {
                return b;
            }
            throw new ParseDataset.H2OParseException("Column separator mismatch. One file seems to use \"" + ((char) b) + "\" and the other uses \"" + ((char) b2) + "\".");
        }

        private int unifyColumnCount(int i, int i2, ParseSetup parseSetup, String str, String str2) {
            if (i == i2) {
                return i;
            }
            if (i == 0) {
                return i2;
            }
            if (i2 == 0) {
                return i;
            }
            ParseWriter.ParseErr parseErr = new ParseWriter.ParseErr();
            parseErr._err = "Incompatible number of columns, " + i + " != " + i2;
            parseErr._file = str + ", " + str2;
            parseSetup._errs = (ParseWriter.ParseErr[]) ArrayUtils.append(parseSetup._errs, parseErr);
            return Math.max(i, i2);
        }

        private static String[] unifyColumnNames(String[] strArr, String[] strArr2) {
            if (strArr == null) {
                return strArr2;
            }
            if (strArr2 == null) {
                return strArr;
            }
            for (int i = 0; i < strArr.length; i++) {
                if (i > strArr2.length || !strArr[i].equals(strArr2[i])) {
                    throw new ParseDataset.H2OParseException("Column names do not match between files.");
                }
            }
            return strArr;
        }

        static {
            $assertionsDisabled = !ParseSetup.class.desiredAssertionStatus();
        }
    }

    public void setFileName(String str) {
        this._fileNames[0] = str;
    }

    public ParseSetup(ParseSetup parseSetup) {
        this(parseSetup._parse_type, parseSetup._separator, parseSetup._single_quotes, parseSetup._check_header, parseSetup._number_columns, parseSetup._column_names, parseSetup._column_types, parseSetup._domains, parseSetup._na_strings, parseSetup._data, new ParseWriter.ParseErr[0], parseSetup._chunk_size);
    }

    public static ParseSetup makeSVMLightSetup() {
        return new ParseSetup(DefaultParserProviders.SVMLight_INFO, (byte) -1, false, -1, 1, null, new byte[]{3}, (String[][]) null, (String[][]) null, (String[][]) null, new ParseWriter.ParseErr[0]);
    }

    public ParseSetup(ParserInfo parserInfo, byte b, boolean z, int i, int i2, String[] strArr, byte[] bArr, String[][] strArr2, String[][] strArr3, String[][] strArr4, ParseWriter.ParseErr[] parseErrArr, int i3) {
        this._fileNames = new String[]{"unknown"};
        this._chunk_size = FileVec.DFLT_CHUNK_SIZE;
        this._column_previews = null;
        this._parse_type = parserInfo;
        this._separator = b;
        this._single_quotes = z;
        this._check_header = i;
        this._number_columns = i2;
        this._column_names = strArr;
        this._column_types = bArr;
        this._domains = strArr2;
        this._na_strings = strArr3;
        this._data = strArr4;
        this._chunk_size = i3;
        this._errs = parseErrArr;
    }

    public ParseSetup(ParseSetupV3 parseSetupV3) {
        this(parseSetupV3.parse_type != null ? ParserService.INSTANCE.getByName(parseSetupV3.parse_type).info() : DefaultParserProviders.GUESS_INFO, parseSetupV3.separator != 0 ? parseSetupV3.separator : (byte) -1, parseSetupV3.single_quotes, parseSetupV3.check_header, -1, parseSetupV3.column_names, strToColumnTypes(parseSetupV3.column_types), (String[][]) null, parseSetupV3.na_strings, (String[][]) null, new ParseWriter.ParseErr[0], parseSetupV3.chunk_size);
    }

    public ParseSetup(ParserInfo parserInfo, byte b, boolean z, int i, int i2, String[] strArr, byte[] bArr, String[][] strArr2, String[][] strArr3, String[][] strArr4) {
        this(parserInfo, b, z, i, i2, strArr, bArr, strArr2, strArr3, strArr4, new ParseWriter.ParseErr[0], FileVec.DFLT_CHUNK_SIZE);
    }

    public ParseSetup(ParserInfo parserInfo, byte b, boolean z, int i, int i2, String[] strArr, byte[] bArr, String[][] strArr2, String[][] strArr3, String[][] strArr4, ParseWriter.ParseErr[] parseErrArr) {
        this(parserInfo, b, z, i, i2, strArr, bArr, strArr2, strArr3, strArr4, parseErrArr, FileVec.DFLT_CHUNK_SIZE);
    }

    public ParseSetup(ParserInfo parserInfo, byte b, boolean z, int i, int i2, String[][] strArr, ParseWriter.ParseErr[] parseErrArr) {
        this(parserInfo, b, z, i, i2, null, null, (String[][]) null, (String[][]) null, strArr, parseErrArr, FileVec.DFLT_CHUNK_SIZE);
    }

    public ParseSetup() {
        this._fileNames = new String[]{"unknown"};
        this._chunk_size = FileVec.DFLT_CHUNK_SIZE;
        this._column_previews = null;
    }

    public String[] getColumnNames() {
        return this._column_names;
    }

    public String[][] getData() {
        return this._data;
    }

    public String[] getColumnTypeStrings() {
        String[] strArr = new String[this._column_types.length];
        for (int i = 0; i < strArr.length; i++) {
            strArr[i] = Vec.TYPE_STR[this._column_types[i]];
        }
        return strArr;
    }

    public byte[] getColumnTypes() {
        return this._column_types;
    }

    /* JADX WARN: Removed duplicated region for block: B:48:0x0184  */
    /* JADX WARN: Removed duplicated region for block: B:51:0x018b  */
    /* JADX WARN: Removed duplicated region for block: B:53:0x0192  */
    /* JADX WARN: Removed duplicated region for block: B:55:0x0199  */
    /* JADX WARN: Removed duplicated region for block: B:57:0x01a0  */
    /* JADX WARN: Removed duplicated region for block: B:59:0x01a7  */
    /* JADX WARN: Removed duplicated region for block: B:61:0x01ae A[SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public static byte[] strToColumnTypes(java.lang.String[] r6) {
        /*
            Method dump skipped, instructions count: 476
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: water.parser.ParseSetup.strToColumnTypes(java.lang.String[]):byte[]");
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Parser parser(Key key) {
        ParserProvider byInfo = ParserService.INSTANCE.getByInfo(this._parse_type);
        if (byInfo != null) {
            return byInfo.createParser(this, key);
        }
        throw new H2OIllegalArgumentException("Unknown file type.  Parse cannot be completed.", "Attempted to invoke a parser for ParseType:" + this._parse_type + ", which doesn't exist.");
    }

    public final ParseSetup getFinalSetup(Key[] keyArr, ParseSetup parseSetup) {
        ParserProvider byInfo = ParserService.INSTANCE.getByInfo(this._parse_type);
        if (byInfo != null) {
            return byInfo.createParserSetup(keyArr, parseSetup);
        }
        throw new H2OIllegalArgumentException("Unknown parser configuration! Configuration=" + this);
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public HashSet<String> checkDupColumnNames() {
        HashSet<String> hashSet = new HashSet<>();
        if (null == this._column_names) {
            return hashSet;
        }
        HashSet hashSet2 = new HashSet();
        for (String str : this._column_names) {
            if (!hashSet2.add(str)) {
                hashSet.add(str);
            }
        }
        return hashSet;
    }

    public String toString() {
        return this._parse_type.toString();
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static boolean allStrings(String[] strArr) {
        BufferedString bufferedString = new BufferedString();
        for (String str : strArr) {
            try {
                Double.parseDouble(str);
                return false;
            } catch (NumberFormatException e) {
                bufferedString.set(str);
                if (ParseTime.isTime(bufferedString) || ParseUUID.isUUID(bufferedString)) {
                    return false;
                }
            }
        }
        return true;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static boolean hasHeader(String[] strArr, String[] strArr2) {
        return allStrings(strArr) && !allStrings(strArr2);
    }

    public static ParseSetup guessSetup(Key[] keyArr, boolean z, int i) {
        return guessSetup(keyArr, new ParseSetup(DefaultParserProviders.GUESS_INFO, (byte) -1, z, i, -1, (String[][]) null, new ParseWriter.ParseErr[0]));
    }

    public static ParseSetup guessSetup(Key[] keyArr, ParseSetup parseSetup) {
        GuessSetupTsk guessSetupTsk = new GuessSetupTsk(parseSetup);
        guessSetupTsk.doAll(keyArr).getResult();
        Iced get = DKV.getGet(keyArr[0]);
        if ((get instanceof Frame) && (((Frame) get).vec(0) instanceof UploadFileVec)) {
            guessSetupTsk._gblSetup._chunk_size = FileVec.DFLT_CHUNK_SIZE;
        } else {
            guessSetupTsk._gblSetup._chunk_size = FileVec.calcOptimalChunkSize(guessSetupTsk._totalParseSize, guessSetupTsk._gblSetup._number_columns, guessSetupTsk._maxLineLength, Runtime.getRuntime().availableProcessors(), H2O.getCloudSize(), false, true);
        }
        return guessSetupTsk._gblSetup;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public String file() {
        String[] strArr = this._fileNames;
        if (strArr.length > 5) {
            strArr = (String[]) Arrays.copyOf(strArr, 5);
        }
        return Arrays.toString(strArr);
    }

    protected boolean isCompatible(ParseSetup parseSetup) {
        return this._parse_type.equals(parseSetup._parse_type) && this._number_columns == parseSetup._number_columns;
    }

    public static ParseSetup guessSetup(ByteVec byteVec, byte[] bArr, ParseSetup parseSetup) {
        return guessSetup(byteVec, bArr, parseSetup._parse_type, parseSetup._separator, -1, parseSetup._single_quotes, parseSetup._check_header, parseSetup._column_names, parseSetup._column_types, (String[][]) null, (String[][]) null);
    }

    public static ParseSetup guessSetup(ByteVec byteVec, byte[] bArr, ParserInfo parserInfo, byte b, int i, boolean z, int i2, String[] strArr, byte[] bArr2, String[][] strArr2, String[][] strArr3) {
        ParserProvider byInfo = ParserService.INSTANCE.getByInfo(parserInfo);
        if (byInfo != null) {
            return byInfo.guessSetup(byteVec, bArr, b, i, z, i2, strArr, bArr2, strArr2, strArr3);
        }
        throw new ParseDataset.H2OParseException("Cannot determine file type.");
    }

    public static String createHexName(String str) {
        int lastIndexOf = str.lastIndexOf(File.separatorChar);
        if (lastIndexOf > 0) {
            str = str.substring(lastIndexOf + 1);
        }
        int lastIndexOf2 = str.lastIndexOf(46);
        while (true) {
            int i = lastIndexOf2;
            if (i <= 0 || !(str.endsWith("zip") || str.endsWith("gz") || str.endsWith("csv") || str.endsWith("xls") || str.endsWith("txt") || str.endsWith("svm") || str.endsWith("orc") || str.endsWith("arff"))) {
                break;
            }
            str = str.substring(0, i);
            lastIndexOf2 = str.lastIndexOf(46);
        }
        if (!Character.isJavaIdentifierStart(str.charAt(0))) {
            str = "X" + str;
        }
        char[] charArray = str.toCharArray();
        for (int i2 = 1; i2 < charArray.length; i2++) {
            if (!Character.isJavaIdentifierPart(charArray[i2])) {
                charArray[i2] = '_';
            }
        }
        String str2 = new String(charArray);
        int i3 = 0;
        String str3 = str2 + ".hex";
        Key make = Key.make(str3);
        while (DKV.get(make) != null) {
            i3++;
            String str4 = str2 + i3 + ".hex";
            str3 = str4;
            make = Key.make(str4);
        }
        return str3;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static final void checkEncoding(byte[] bArr) {
        if (bArr.length >= 2) {
            if ((bArr[0] == -1 && bArr[1] == -2) || (bArr[0] == -2 && bArr[1] == -1)) {
                throw new ParseDataset.H2OParseException("UTF16 encoding detected, but is not supported.");
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static final long maxLineLength(byte[] bArr) {
        if (bArr.length < 2) {
            return -1L;
        }
        BufferedReader bufferedReader = new BufferedReader(new StringReader(new String(bArr)));
        long j = 0;
        while (true) {
            try {
                if (bufferedReader.readLine() == null) {
                    return j;
                }
                j = Math.max(r0.length(), j);
            } catch (IOException e) {
                return -1L;
            }
        }
    }

    public ParserInfo getParseType() {
        return this._parse_type;
    }

    public ParseSetup setParseType(ParserInfo parserInfo) {
        this._parse_type = parserInfo;
        return this;
    }

    public ParseSetup setSeparator(byte b) {
        this._separator = b;
        return this;
    }

    public ParseSetup setSingleQuotes(boolean z) {
        this._single_quotes = z;
        return this;
    }

    public ParseSetup setCheckHeader(int i) {
        this._check_header = i;
        return this;
    }

    public ParseSetup setNumberColumns(int i) {
        this._number_columns = i;
        return this;
    }

    public ParseSetup setColumnNames(String[] strArr) {
        this._column_names = strArr;
        return this;
    }

    public ParseSetup setColumnTypes(byte[] bArr) {
        this._column_types = bArr;
        return this;
    }

    public ParseSetup setDomains(String[][] strArr) {
        this._domains = strArr;
        return this;
    }

    public ParseSetup setNAStrings(String[][] strArr) {
        this._na_strings = strArr;
        return this;
    }

    public ParseSetup setChunkSize(int i) {
        this._chunk_size = i;
        return this;
    }
}
