package org.apache.hyracks.examples.text;

import java.io.DataOutput;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.Arrays;
import org.apache.hyracks.api.comm.IFrameWriter;
import org.apache.hyracks.api.comm.VSizeFrame;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
import org.apache.hyracks.dataflow.common.data.parsers.IValueParser;
import org.apache.hyracks.dataflow.common.data.parsers.UTF8StringParserFactory;
import org.apache.hyracks.dataflow.std.file.ITupleParser;
import org.apache.hyracks.dataflow.std.file.ITupleParserFactory;

/* loaded from: input_file:org/apache/hyracks/examples/text/WordTupleParserFactory.class */
public class WordTupleParserFactory implements ITupleParserFactory {
    private static final long serialVersionUID = 1;

    /* loaded from: input_file:org/apache/hyracks/examples/text/WordTupleParserFactory$WordCursor.class */
    private static class WordCursor {
        private static final int INITIAL_BUFFER_SIZE = 4096;
        private static final int INCREMENT = 4096;
        private char[] buffer = new char[4096];
        private int start = 0;
        private int end = 0;
        private boolean eof = false;
        private int fStart;
        private int fEnd;
        private Reader in;

        public WordCursor(Reader reader) {
            this.in = reader;
        }

        public boolean nextWord() throws IOException {
            if (this.eof) {
                return false;
            }
            boolean z = false;
            int i = this.start;
            while (true) {
                if (i >= this.end) {
                    int i2 = this.start;
                    this.eof = !readMore();
                    if (this.eof) {
                        return true;
                    }
                    i -= i2 - this.start;
                }
                if (isNonWordChar(this.buffer[i])) {
                    this.fStart = this.start;
                    this.fEnd = i;
                    this.start = i + 1;
                    if (z) {
                        return true;
                    }
                } else {
                    z = true;
                }
                i++;
            }
        }

        private boolean isNonWordChar(char c) {
            switch (c) {
                case '!':
                case '\"':
                case '#':
                case '$':
                case '%':
                case '&':
                case '\'':
                case '(':
                case ')':
                case '*':
                case '+':
                case ',':
                case '.':
                case '/':
                case ':':
                case ';':
                case '<':
                case '=':
                case '>':
                case '?':
                case '@':
                case '[':
                case '\\':
                case ']':
                case '^':
                case '`':
                case '{':
                case '|':
                case '}':
                case '~':
                    return true;
                case '-':
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                case 'A':
                case 'B':
                case 'C':
                case 'D':
                case 'E':
                case 'F':
                case 'G':
                case 'H':
                case 'I':
                case 'J':
                case 'K':
                case 'L':
                case 'M':
                case 'N':
                case 'O':
                case 'P':
                case 'Q':
                case 'R':
                case 'S':
                case 'T':
                case 'U':
                case 'V':
                case 'W':
                case 'X':
                case 'Y':
                case 'Z':
                case '_':
                case 'a':
                case 'b':
                case 'c':
                case 'd':
                case 'e':
                case 'f':
                case 'g':
                case 'h':
                case 'i':
                case 'j':
                case 'k':
                case 'l':
                case 'm':
                case 'n':
                case 'o':
                case 'p':
                case 'q':
                case 'r':
                case 's':
                case 't':
                case 'u':
                case 'v':
                case 'w':
                case 'x':
                case 'y':
                case 'z':
                default:
                    return Character.isWhitespace(c);
            }
        }

        private boolean readMore() throws IOException {
            if (this.start > 0) {
                System.arraycopy(this.buffer, this.start, this.buffer, 0, this.end - this.start);
            }
            this.end -= this.start;
            this.start = 0;
            if (this.end == this.buffer.length) {
                this.buffer = Arrays.copyOf(this.buffer, this.buffer.length + 4096);
            }
            int read = this.in.read(this.buffer, this.end, this.buffer.length - this.end);
            if (read < 0) {
                return false;
            }
            this.end += read;
            return true;
        }
    }

    public ITupleParser createTupleParser(final IHyracksTaskContext iHyracksTaskContext) {
        return new ITupleParser() { // from class: org.apache.hyracks.examples.text.WordTupleParserFactory.1
            public void parse(InputStream inputStream, IFrameWriter iFrameWriter) throws HyracksDataException {
                try {
                    FrameTupleAppender frameTupleAppender = new FrameTupleAppender(new VSizeFrame(iHyracksTaskContext));
                    ArrayTupleBuilder arrayTupleBuilder = new ArrayTupleBuilder(1);
                    DataOutput dataOutput = arrayTupleBuilder.getDataOutput();
                    IValueParser createValueParser = UTF8StringParserFactory.INSTANCE.createValueParser();
                    WordCursor wordCursor = new WordCursor(new InputStreamReader(inputStream));
                    while (wordCursor.nextWord()) {
                        arrayTupleBuilder.reset();
                        createValueParser.parse(wordCursor.buffer, wordCursor.fStart, wordCursor.fEnd - wordCursor.fStart, dataOutput);
                        arrayTupleBuilder.addFieldEndOffset();
                        FrameUtils.appendToWriter(iFrameWriter, frameTupleAppender, arrayTupleBuilder.getFieldEndOffsets(), arrayTupleBuilder.getByteArray(), 0, arrayTupleBuilder.getSize());
                    }
                    frameTupleAppender.flush(iFrameWriter, true);
                } catch (IOException e) {
                    throw new HyracksDataException(e);
                }
            }
        };
    }
}
