package nlp4j.wiki;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.util.zip.GZIPInputStream;
import nlp4j.Document;
import nlp4j.impl.DefaultDocument;

/* loaded from: input_file:nlp4j/wiki/WikiAbstractReader.class */
public class WikiAbstractReader {
    private final File file;
    private DocumentHandler handler;
    Document doc;

    /* loaded from: input_file:nlp4j/wiki/WikiAbstractReader$Status.class */
    enum Status {
        FEED,
        DOC,
        TITLE,
        URL,
        ABSTRACT,
        LINKS,
        SUBLINK,
        CLOSE_DOC
    }

    public WikiAbstractReader(File file) {
        this.file = file;
    }

    public void read() throws IOException {
        if (this.file == null || !this.file.exists() || !this.file.canRead()) {
            throw new FileNotFoundException();
        }
        InputStream newInputStream = Files.newInputStream(this.file.toPath(), new OpenOption[0]);
        try {
            GZIPInputStream gZIPInputStream = new GZIPInputStream(newInputStream);
            try {
                InputStreamReader inputStreamReader = new InputStreamReader(gZIPInputStream, StandardCharsets.UTF_8);
                try {
                    BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
                    try {
                        Status status = Status.FEED;
                        while (true) {
                            String readLine = bufferedReader.readLine();
                            if (readLine == null) {
                                break;
                            }
                            if (status == Status.FEED) {
                                status = Status.DOC;
                            } else if (status == Status.DOC) {
                                this.doc = new DefaultDocument();
                                status = Status.TITLE;
                            } else if (status == Status.TITLE) {
                                this.doc.putAttribute("title", readLine.substring(7, readLine.length() - 8));
                                status = Status.URL;
                            } else if (status == Status.URL) {
                                this.doc.putAttribute("url", readLine.substring(5, readLine.length() - 6));
                                status = Status.ABSTRACT;
                            } else if (status == Status.ABSTRACT) {
                                if (readLine.startsWith("<abstract>")) {
                                    this.doc.putAttribute("abstract", readLine.substring(10, readLine.length() - 11));
                                    status = Status.LINKS;
                                } else {
                                    status = Status.LINKS;
                                }
                            } else if (status == Status.LINKS) {
                                status = Status.SUBLINK;
                            } else if (status == Status.SUBLINK) {
                                if (!readLine.startsWith("<sublink")) {
                                    status = Status.CLOSE_DOC;
                                }
                            } else if (status == Status.CLOSE_DOC) {
                                status = Status.DOC;
                                if (this.handler != null) {
                                    try {
                                        this.handler.read(this.doc);
                                    } catch (BreakException e) {
                                    }
                                } else {
                                    continue;
                                }
                            } else {
                                continue;
                            }
                        }
                        bufferedReader.close();
                        inputStreamReader.close();
                        gZIPInputStream.close();
                        if (newInputStream != null) {
                            newInputStream.close();
                        }
                    } catch (Throwable th) {
                        try {
                            bufferedReader.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                        throw th;
                    }
                } catch (Throwable th3) {
                    try {
                        inputStreamReader.close();
                    } catch (Throwable th4) {
                        th3.addSuppressed(th4);
                    }
                    throw th3;
                }
            } finally {
            }
        } catch (Throwable th5) {
            if (newInputStream != null) {
                try {
                    newInputStream.close();
                } catch (Throwable th6) {
                    th5.addSuppressed(th6);
                }
            }
            throw th5;
        }
    }

    public void setHandler(DocumentHandler documentHandler) {
        this.handler = documentHandler;
    }
}
