package org.archive.crawler.extractor;

import com.lowagie.text.pdf.PRIndirectReference;
import com.lowagie.text.pdf.PdfArray;
import com.lowagie.text.pdf.PdfDictionary;
import com.lowagie.text.pdf.PdfName;
import com.lowagie.text.pdf.PdfObject;
import com.lowagie.text.pdf.PdfReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/extractor/PDFParser.class */
public class PDFParser {
    ArrayList<String> foundURIs;
    ArrayList<ArrayList<Integer>> encounteredReferences;
    PdfReader documentReader;
    byte[] document;
    PdfDictionary catalog;

    public PDFParser(String str) throws IOException {
        resetState();
        getInFromFile(str);
        initialize();
    }

    public PDFParser(byte[] bArr) throws IOException {
        resetState();
        this.document = bArr;
        initialize();
    }

    protected void resetState() {
        this.foundURIs = new ArrayList<>();
        this.encounteredReferences = new ArrayList<>();
        this.documentReader = null;
        this.document = null;
        this.catalog = null;
        for (int i = 0; i < this.encounteredReferences.size(); i++) {
            this.encounteredReferences.add(new ArrayList<>());
        }
    }

    public void resetState(byte[] bArr) throws IOException {
        resetState();
        this.document = bArr;
        initialize();
    }

    public void resetState(String str) throws IOException {
        resetState();
        getInFromFile(str);
        initialize();
    }

    protected void getInFromFile(String str) throws IOException {
        File file = new File(str);
        this.document = new byte[(int) file.length()];
        new FileInputStream(file).read(this.document);
    }

    protected boolean haveSeen(int i, int i2) {
        if (i >= this.encounteredReferences.size()) {
            for (int size = this.encounteredReferences.size(); size <= i; size++) {
                this.encounteredReferences.add(new ArrayList<>());
            }
            return false;
        }
        Iterator<Integer> it2 = this.encounteredReferences.get(i).iterator();
        while (it2.hasNext()) {
            if (it2.next().intValue() == i2) {
                return true;
            }
        }
        return false;
    }

    protected void markAsSeen(int i, int i2) {
        this.encounteredReferences.get(i).add(Integer.valueOf(i2));
    }

    public ArrayList getURIs() {
        return this.foundURIs;
    }

    protected void initialize() throws IOException {
        if (this.document != null) {
            this.documentReader = new PdfReader(this.document);
        }
        this.catalog = this.documentReader.getCatalog();
    }

    public ArrayList extractURIs() {
        extractURIs(this.catalog);
        return getURIs();
    }

    protected void extractURIs(PdfObject pdfObject) {
        if (pdfObject.isDictionary()) {
            PdfDictionary pdfDictionary = (PdfDictionary) pdfObject;
            for (PdfName pdfName : pdfDictionary.getKeys()) {
                PdfObject pdfObject2 = pdfDictionary.get(pdfName);
                if (pdfName.toString().equals("/URI") || pdfName.toString().equals("/URL")) {
                    this.foundURIs.add(pdfObject2.toString());
                } else {
                    extractURIs(pdfObject2);
                }
            }
            return;
        }
        if (pdfObject.isArray()) {
            Iterator it2 = ((PdfArray) pdfObject).getArrayList().iterator();
            while (it2.hasNext()) {
                extractURIs((PdfObject) it2.next());
            }
        } else if (pdfObject.getClass() == PRIndirectReference.class) {
            PRIndirectReference pRIndirectReference = (PRIndirectReference) pdfObject;
            if (haveSeen(pRIndirectReference.getGeneration(), pRIndirectReference.getNumber())) {
                return;
            }
            markAsSeen(pRIndirectReference.getGeneration(), pRIndirectReference.getNumber());
            pRIndirectReference.getReader();
            extractURIs(PdfReader.getPdfObject(pRIndirectReference));
        }
    }

    public static void main(String[] strArr) {
        try {
            Iterator it2 = new PDFParser("/home/parkert/files/pdfspec.pdf").extractURIs().iterator();
            while (it2.hasNext()) {
                System.out.println("got uri: " + ((String) it2.next()));
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
