package org.wikibrain.parser;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.compress.archivers.ArchiveException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.wikibrain.utils.WpIOUtils;

/* loaded from: input_file:org/wikibrain/parser/DumpSplitter.class */
public class DumpSplitter implements Iterable<String> {
    public static final String ARTICLE_BEGIN = "<page>";
    public static final String ARTICLE_END = "</page>";
    private static final int MAX_ARTICLE_LENGTH = 10000000;
    private static final Logger LOG = LoggerFactory.getLogger(DumpSplitter.class);
    private File path;

    /* loaded from: input_file:org/wikibrain/parser/DumpSplitter$ArticleIterator.class */
    public class ArticleIterator implements Iterator<String> {
        private BufferedReader reader;
        private String buffer = null;
        private int lineNum = 0;
        private boolean closed = false;

        public ArticleIterator(File file) throws IOException, ArchiveException, XMLStreamException {
            this.reader = WpIOUtils.openBufferedReader(file);
        }

        private void fillBuffer() {
            if (this.closed || this.buffer != null) {
                return;
            }
            try {
                String readToArticleBegin = readToArticleBegin();
                if (readToArticleBegin == null) {
                    return;
                }
                this.buffer = readToArticleClose(readToArticleBegin);
            } catch (IOException e) {
                logParseError("parser failed", e);
                e.printStackTrace();
            }
        }

        private String readToArticleBegin() throws IOException {
            String readLine;
            do {
                readLine = readLine();
                if (readLine == null) {
                    return null;
                }
            } while (!readLine.trim().equals(DumpSplitter.ARTICLE_BEGIN));
            return readLine + "\n";
        }

        private String readToArticleClose(String str) throws IOException {
            StringBuffer stringBuffer = new StringBuffer(str);
            while (true) {
                String readLine = readLine();
                if (readLine == null) {
                    logParseError("reached eof in middle of article");
                    stringBuffer.append("</page>\n");
                    break;
                }
                if (stringBuffer.length() + readLine.length() > DumpSplitter.MAX_ARTICLE_LENGTH) {
                    logParseError("truncating overly long article");
                    stringBuffer.append("</page>\n");
                    break;
                }
                stringBuffer.append(readLine + "\n");
                if (readLine.trim().equals(DumpSplitter.ARTICLE_END)) {
                    break;
                }
            }
            return stringBuffer.toString();
        }

        private void logParseError(String str) {
            DumpSplitter.LOG.error("parsing " + DumpSplitter.this.path + "  failed in line " + str);
        }

        private void logParseError(String str, Exception exc) {
            DumpSplitter.LOG.error("parsing " + DumpSplitter.this.path + "  failed in line " + str);
        }

        private String readLine() throws IOException {
            if (this.closed) {
                return null;
            }
            String readLine = this.reader.readLine();
            if (readLine != null) {
                this.lineNum++;
                return readLine;
            }
            this.reader.close();
            this.closed = true;
            return null;
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            fillBuffer();
            return this.buffer != null;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public String next() {
            fillBuffer();
            String str = this.buffer;
            this.buffer = null;
            return str;
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    public DumpSplitter(File file) {
        this.path = file;
    }

    public File getPath() {
        return this.path;
    }

    @Override // java.lang.Iterable
    public Iterator<String> iterator() {
        try {
            return new ArticleIterator(this.path);
        } catch (IOException e) {
            LOG.error("article iterator construction failed", e);
            throw new RuntimeException(e);
        } catch (ArchiveException e2) {
            LOG.error("article iterator construction failed", e2);
            throw new RuntimeException((Throwable) e2);
        } catch (XMLStreamException e3) {
            LOG.error("article iterator construction failed", e3);
            throw new RuntimeException((Throwable) e3);
        }
    }
}
