package edu.psu.seersuite.extractors.tableextractor.extraction;

import edu.psu.seersuite.extractors.tableextractor.Config;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.output.Format;
import org.jdom2.output.XMLOutputter;

/* loaded from: input_file:edu/psu/seersuite/extractors/tableextractor/extraction/BatchExtractor.class */
public class BatchExtractor {
    public static void main(String[] strArr) throws IOException {
        if (strArr.length == 3) {
            extractTables(strArr[0], strArr[1], strArr[2]);
            return;
        }
        if (strArr.length != 4) {
            showUsage();
            return;
        }
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        if (strArr[3].compareTo("--debug") == 0) {
            Config.DEBUG_MODE = true;
        } else if (strArr[3].compareTo("--small") == 0) {
            Config.SMALL_TEST = true;
        }
        extractTables(str, str2, str3);
    }

    private static void extractTables(String str, String str2, String str3) throws IOException {
        File file = new File(str);
        if (!file.exists()) {
            System.out.printf("[Error] %s does not exist\n", str);
            return;
        }
        if (!file.isDirectory()) {
            System.out.printf("[Error] %s is not a directory\n", str);
            return;
        }
        File file2 = new File(str2);
        if (!file2.exists()) {
            file2.mkdirs();
            System.out.printf("[Info] output dir %s created\n", str2);
        }
        File file3 = new File(str2, "xml");
        if (!file3.exists()) {
            file3.mkdirs();
            System.out.printf("[Info] xml dir %s created\n", file3);
        }
        TableExtractor tableExtractor = new TableExtractor();
        if (str3.compareToIgnoreCase("pdfbox") != 0) {
            System.out.printf("[Error] %s is not a correct parser name\n", str3);
            return;
        }
        try {
            tableExtractor.setParser(new PdfBoxParser());
            File[] listFiles = file.listFiles(new PdfFileFilter());
            System.out.printf("[Info] %d PDF documents found\n", Integer.valueOf(listFiles.length));
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            XMLOutputter xMLOutputter = new XMLOutputter(Format.getPrettyFormat());
            XMLOutputter xMLOutputter2 = new XMLOutputter(Format.getCompactFormat());
            PrintWriter printWriter = new PrintWriter(new FileOutputStream(new File(file2, "mysql-infile")));
            int i4 = 0;
            for (File file4 : listFiles) {
                try {
                    if (tableExtractor.extract(file4, str2) != null) {
                        PrintWriter printWriter2 = new PrintWriter(new FileOutputStream(new File(file3, file4.getName().replaceFirst(".pdf", ".xml"))));
                        Document xMLDoc = tableExtractor.getXMLDoc();
                        xMLOutputter.output(xMLDoc, printWriter2);
                        printWriter2.close();
                        i3 += tableExtractor.getDocInfo().getTableNum();
                        i++;
                        Element rootElement = xMLDoc.getRootElement();
                        Element child = rootElement.getChild("tables");
                        String attributeValue = rootElement.getAttributeValue("id");
                        if (child != null) {
                            for (Element element : child.getChildren()) {
                                String str4 = attributeValue + "-" + element.getAttributeValue("id");
                                String str5 = "\\N";
                                Element child2 = element.getChild("table");
                                String cleanString = child2 != null ? cleanString(xMLOutputter2.outputString(child2)) : "";
                                Element child3 = element.getChild("caption");
                                String cleanString2 = child3 != null ? cleanString(child3.getText()) : "";
                                Element child4 = element.getChild("footnote");
                                String cleanString3 = child4 != null ? cleanString(child4.getText()) : "";
                                Element child5 = element.getChild("referenceText");
                                String cleanString4 = child5 != null ? cleanString(child5.getText()) : "";
                                Element child6 = element.getChild("pageNumInDoc");
                                if (child6 != null) {
                                    str5 = child6.getText();
                                }
                                printWriter.write(str4 + "\t" + cleanString2 + "\t" + cleanString + "\t" + cleanString3 + "\t" + cleanString4 + "\t" + attributeValue + "\t" + str5 + "\n");
                            }
                        }
                    } else {
                        i2++;
                    }
                } catch (Exception e) {
                    System.out.printf("[Error] unhandled exception of %s\n", file4.getName());
                    System.err.printf("----- %s -----\n", file4.getName());
                    System.err.printf("%s\n", e.getMessage());
                    e.printStackTrace();
                    i2++;
                }
                i4++;
                if (Config.SMALL_TEST && i4 >= 10) {
                    break;
                }
            }
            printWriter.close();
            PrintWriter printWriter3 = null;
            try {
                printWriter3 = new PrintWriter(new FileOutputStream(new File(file2, "summary")));
                printWriter3.write("total=" + listFiles.length + "\n");
                printWriter3.write("success=" + i + "\n");
                printWriter3.write("fail=" + i2 + "\n");
                printWriter3.write("table=" + i3 + "\n");
                if (printWriter3 != null) {
                    printWriter3.close();
                }
            } catch (FileNotFoundException e2) {
                if (printWriter3 != null) {
                    printWriter3.close();
                }
            } catch (Throwable th) {
                if (printWriter3 != null) {
                    printWriter3.close();
                }
                throw th;
            }
            System.out.printf("--------------------\n", new Object[0]);
            System.out.printf("success=%d\n", Integer.valueOf(i));
            System.out.printf("fail=%d\n", Integer.valueOf(i2));
            System.out.printf("--------------------\n", new Object[0]);
        } catch (IOException e3) {
            System.out.printf("[Error] PDFBox parser cannot be created\n", new Object[0]);
        }
    }

    private static void showUsage() {
        System.out.println("BatchExtractor <pdf-dir-path> <output-dir-path> <parser>");
        System.out.println("BatchExtractor <pdf-dir-path> <output-dir-path> <parser> --debug");
        System.out.println("BatchExtractor <pdf-dir-path> <output-dir-path> <parser> --small");
        System.out.println("\tparser= pdfbox | tet");
    }

    private static String cleanString(String str) {
        return str.replace("\r", "").replace("\n", "").replace("\t", "");
    }
}
