/*
 * Decompiled with CFR 0.152.
 */
package org.terrier.indexing;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Stack;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.terrier.indexing.Tokenizer;
import org.terrier.indexing.tokenisation.TokenStream;
import org.terrier.indexing.tokenisation.Tokeniser;
import org.terrier.utility.ApplicationSetup;
import org.terrier.utility.TagSet;

public class TRECFullTokenizer
implements Tokenizer {
    protected static final Logger logger = LoggerFactory.getLogger(TRECFullTokenizer.class);
    protected boolean ignoreMissingClosingTags = false;
    public static int lastChar = -1;
    public int number_of_terms = 0;
    public boolean EOF;
    public boolean EOD;
    public boolean error;
    public BufferedReader br;
    public long counter = 0L;
    protected static Stack<String> stk = new Stack();
    protected TagSet tagSet;
    protected TagSet exactTagSet;
    protected static final int tokenMaximumLength = ApplicationSetup.MAX_TERM_LENGTH;
    protected static final boolean lowercase = Boolean.parseBoolean(ApplicationSetup.getProperty((String)"lowercase", (String)"true"));
    public boolean inTagToProcess = false;
    public boolean inTagToSkip = false;
    public boolean inDocnoTag = false;
    protected final StringBuilder sw = new StringBuilder(tokenMaximumLength);
    protected final StringBuilder tagNameSB = new StringBuilder(10);
    private Tokeniser tokeniser = Tokeniser.getTokeniser();
    private TokenStream currentTokenStream = Tokeniser.EMPTY_STREAM;

    public TRECFullTokenizer() {
        this.tagSet = new TagSet("TrecDocTags");
        this.exactTagSet = new TagSet("TrecExactDocTags");
        this.EOD = false;
        this.EOF = false;
    }

    public TRECFullTokenizer(BufferedReader _br) {
        this.br = _br;
        this.tagSet = new TagSet("TrecDocTags");
        this.exactTagSet = new TagSet("TrecExactDocTags");
        this.EOD = false;
        this.EOF = false;
    }

    public TRECFullTokenizer(TagSet _tagSet, TagSet _exactSet) {
        this.tagSet = _tagSet;
        this.exactTagSet = _exactSet;
        this.EOD = false;
        this.EOF = false;
    }

    public TRECFullTokenizer(TagSet _ts, TagSet _exactSet, BufferedReader _br) {
        this.br = _br;
        this.tagSet = _ts;
        this.exactTagSet = _exactSet;
        this.EOD = false;
        this.EOF = false;
    }

    protected String check(String s) {
        if (s == null) {
            return null;
        }
        int length = s.length();
        if (length == 0 || length > tokenMaximumLength) {
            return null;
        }
        if (!stk.empty() && this.exactTagSet.isTagToProcess(stk.peek())) {
            return s;
        }
        StringReader sr = new StringReader(s);
        int _counter = 0;
        int counterdigit = 0;
        int ch = -1;
        int chNew = -1;
        try {
            while ((chNew = sr.read()) != -1 && _counter <= 2) {
                if (chNew >= 48 && chNew <= 57) {
                    ++counterdigit;
                }
                _counter = ch == chNew ? ++_counter : 1;
                ch = chNew;
            }
            sr.close();
        }
        catch (IOException iOException) {
            // empty catch block
        }
        if (_counter > 3 | counterdigit > 4) {
            return null;
        }
        return s;
    }

    public void close() {
        try {
            this.br.close();
        }
        catch (IOException ioe) {
            logger.warn("Error while closing the buffered reader in TRECTokenizer", (Throwable)ioe);
        }
    }

    public void closeBufferedReader() {
        try {
            this.br.close();
        }
        catch (IOException ioe) {
            logger.warn("Error while closing the buffered reader in TRECTokenizer", (Throwable)ioe);
        }
    }

    public String currentTag() {
        return stk.peek();
    }

    public boolean inDocnoTag() {
        return !stk.isEmpty() && this.tagSet.isIdTag(stk.peek());
    }

    public boolean inTagToProcess() {
        return !stk.isEmpty() && this.tagSet.isTagToProcess(stk.peek());
    }

    public boolean inTagToSkip() {
        return !stk.isEmpty() && this.tagSet.isTagToSkip(stk.peek());
    }

    public boolean isEndOfDocument() {
        return this.EOD;
    }

    public boolean isEndOfFile() {
        return this.EOF;
    }

    public void nextDocument() {
        if (this.EOD) {
            this.EOD = false;
        }
    }

    public String nextToken() {
        if (this.currentTokenStream.hasNext()) {
            return (String)this.currentTokenStream.next();
        }
        String s = null;
        String tagName = null;
        boolean btag = true;
        int ch = 0;
        while (btag && ch != -1 && !this.EOD) {
            boolean tag_close = false;
            boolean tag_open = false;
            this.error = false;
            try {
                if (lastChar == 60 || lastChar == 38) {
                    ch = lastChar;
                    lastChar = -1;
                }
                while (ch != -1 && ch != 60 && ch != 38 && Character.isWhitespace((char)ch)) {
                    ch = this.br.read();
                    ++this.counter;
                    if (ch != 62) continue;
                    this.error = true;
                }
                if (ch == 60) {
                    ch = this.br.read();
                    ++this.counter;
                    if (ch == 47) {
                        ch = this.br.read();
                        ++this.counter;
                        tag_close = true;
                    } else if (ch == 33) {
                        ++this.counter;
                        ch = this.br.read();
                        if (ch == 91) {
                            ++this.counter;
                            while ((ch = this.br.read()) != 91 && ch != -1) {
                                ++this.counter;
                            }
                        } else {
                            while ((ch = this.br.read()) != 62 && ch != 60 && ch != -1) {
                                ++this.counter;
                            }
                            ++this.counter;
                        }
                    } else {
                        tag_open = true;
                    }
                }
                if (ch == 38) {
                    while ((ch = this.br.read()) != 62 && ch != 60 && ch != 32 && ch != 59 && ch != -1) {
                        ++this.counter;
                    }
                    ++this.counter;
                }
                if (btag = tag_close || tag_open) {
                    boolean endOfTagName = false;
                    while (ch != -1 && ch != 60 && ch != 62) {
                        if (!endOfTagName) {
                            this.tagNameSB.append((char)ch);
                        }
                        ch = this.br.read();
                        ++this.counter;
                        if (endOfTagName || !Character.isWhitespace((char)ch)) continue;
                        endOfTagName = true;
                        tagName = this.tagNameSB.toString();
                    }
                    if (!endOfTagName) {
                        tagName = this.tagNameSB.toString();
                        this.tagNameSB.setLength(0);
                    }
                } else {
                    if ((char)ch == '>') {
                        ++this.counter;
                        ch = this.br.read();
                    }
                    while (ch != -1 && ch != 60 && ch != 38) {
                        this.sw.append((char)ch);
                        ch = this.br.read();
                        ++this.counter;
                    }
                }
                lastChar = ch;
                s = this.sw.toString();
                this.sw.setLength(0);
                if (tag_open && (this.tagSet.isTagToProcess(tagName) || this.tagSet.isTagToSkip(tagName)) && !tagName.equals("")) {
                    stk.push(tagName.toUpperCase());
                    if (this.tagSet.isTagToProcess(tagName)) {
                        this.inTagToProcess = true;
                        this.inTagToSkip = false;
                    } else {
                        this.inTagToSkip = true;
                        this.inTagToProcess = false;
                        continue;
                    }
                }
                if (!tag_close || !this.tagSet.isTagToProcess(tagName) && !this.tagSet.isTagToSkip(tagName) || tagName.equals("")) continue;
                this.processEndOfTag(tagName.toUpperCase());
                String stackTop = null;
                if (!stk.isEmpty()) {
                    stackTop = stk.peek();
                    if (this.tagSet.isTagToProcess(stackTop)) {
                        this.inTagToProcess = true;
                        this.inTagToSkip = false;
                        continue;
                    }
                    this.inTagToProcess = false;
                    this.inTagToSkip = true;
                    continue;
                }
                this.inTagToProcess = false;
                this.inTagToSkip = false;
            }
            catch (IOException ioe) {
                logger.warn("Input/Output exception while reading tokens", (Throwable)ioe);
                return null;
            }
        }
        if (ch == -1) {
            this.EOF = true;
            this.EOD = true;
        }
        boolean hasWhitelist = this.tagSet.hasWhitelist();
        if (!btag && (!hasWhitelist || hasWhitelist && this.inTagToProcess) && !this.inTagToSkip) {
            if (!stk.empty() && this.tagSet.isIdTag(stk.peek())) {
                return s;
            }
            if (!stk.empty() && this.exactTagSet.isTagToProcess(stk.peek())) {
                return lowercase ? s.toLowerCase() : s;
            }
            this.currentTokenStream = this.tokeniser.tokenise((Reader)new StringReader(s));
            if (this.currentTokenStream.hasNext()) {
                return (String)this.currentTokenStream.next();
            }
        }
        return null;
    }

    protected void processEndOfTag(String tag) {
        if (stk.empty()) {
            return;
        }
        if (tag.equals(stk.peek())) {
            stk.pop();
        } else {
            if (!this.ignoreMissingClosingTags) {
                logger.warn("<" + stk.peek() + "> has no closing tag");
                logger.warn("<" + tag + "> not expected");
            }
            int _counter = 0;
            int x = stk.search(tag);
            while (!stk.empty() & _counter < x) {
                ++_counter;
                stk.pop();
            }
        }
        if (stk.empty()) {
            this.EOD = true;
        }
    }

    public void setIgnoreMissingClosingTags(boolean toIgnore) {
        this.ignoreMissingClosingTags = toIgnore;
    }

    public long getByteOffset() {
        return this.counter;
    }

    public void setInput(BufferedReader _br) {
        this.br = _br;
    }
}

