package org.jpedal.objects.structuredtext;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.jpedal.io.ObjectStore;
import org.jpedal.io.PdfObjectReader;
import org.jpedal.objects.PdfPageData;
import org.jpedal.objects.PdfResources;
import org.jpedal.objects.layers.PdfLayerList;
import org.jpedal.objects.raw.MCObject;
import org.jpedal.objects.raw.PageObject;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.parser.PdfStreamDecoder;
import org.jpedal.render.SwingDisplay;
import org.jpedal.utils.LogWriter;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

/* loaded from: input_file:org/jpedal/objects/structuredtext/MarkedContentGenerator.class */
public class MarkedContentGenerator {
    private PdfObjectReader currentPdfFile;
    private DocumentBuilder db;
    private Document doc;
    private Element root;
    private PdfResources res;
    private PdfLayerList layers;
    private PdfPageData pdfPageData;
    private boolean isDecoding;
    static boolean debug;
    static String indent = "";
    boolean isHTML;
    private final Map pageStreams = new HashMap();
    final Map<String, String> reverseLookup = new HashMap();

    public Document getMarkedContentTree(PdfResources pdfResources, PdfPageData pdfPageData, PdfObjectReader pdfObjectReader) {
        PdfObject pdfObject = pdfResources.getPdfObject(3);
        this.res = pdfResources;
        this.layers = pdfResources.getPdfLayerList();
        this.pdfPageData = pdfPageData;
        this.currentPdfFile = pdfObjectReader;
        this.currentPdfFile.checkResolved(pdfObject);
        if (!this.isHTML) {
            setupTree();
            boolean z = (pdfObject == null || pdfObject.getDictionary(PdfDictionary.ParentTree) == null) ? false : true;
            if (debug) {
                System.out.println("hastree=" + z);
            }
            if (z) {
                buildTree(pdfObject);
                this.pageStreams.clear();
            } else {
                try {
                    decodePageForMarkedContent(1, null, this.doc);
                } catch (Exception e) {
                    LogWriter.writeLog("Exception: " + e.getMessage());
                }
            }
        }
        return this.doc;
    }

    private void setupTree() {
        try {
            this.db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        } catch (ParserConfigurationException e) {
            LogWriter.writeLog("Exception: " + e.getMessage());
        }
        this.doc = this.db.newDocument();
        this.doc.appendChild(this.doc.createComment(" Created from JPedal "));
        this.doc.appendChild(this.doc.createComment(" http://www.idrsolutions.com "));
    }

    private void buildTree(PdfObject pdfObject) {
        this.root = this.doc.createElement("TaggedPDF-doc");
        this.doc.appendChild(this.root);
        traverseContentTree(pdfObject);
    }

    public void traverseContentTree(PdfObject pdfObject) {
        PdfObject dictionary = pdfObject.getDictionary(27);
        if (dictionary != null) {
            if (debug) {
                System.out.println("read child=" + dictionary.getObjectRefAsString());
            }
            readChildNode(dictionary, this.root, null, "");
            return;
        }
        byte[][] stringArray = pdfObject.getStringArray(27);
        if (debug) {
            System.out.println("Karray=");
        }
        readKarray(stringArray, this.root, null, "");
        if (debug) {
            System.out.println("Karray read");
        }
    }

    private void readChildNode(PdfObject pdfObject, Element element, Map map, String str) {
        if (debug) {
            indent += "   ";
            System.out.println(indent + "read child node " + pdfObject.getObjectRefAsString() + ' ' + pdfObject.getInt(27));
        }
        byte[][] stringArray = pdfObject.getStringArray(27);
        int i = pdfObject.getInt(27);
        PdfObject dictionary = pdfObject.getDictionary(27);
        String textStreamValue = pdfObject.getTextStreamValue(PdfDictionary.Lang);
        String name = pdfObject.getName(35);
        String str2 = str + '.' + name;
        Element element2 = null;
        if (debug) {
            System.out.println(indent + "S= " + name + ' ');
            if (name == null) {
                System.out.println("S is null in " + pdfObject.getObjectRefAsString());
            }
        }
        if (name != null) {
            if (name.equals("Span")) {
                element2 = element;
            } else {
                if (this.doc != null) {
                    element2 = this.doc.createElement(cleanName(name));
                }
                if (textStreamValue != null) {
                    element2.setAttribute("xml:lang", textStreamValue);
                }
                if (element != null) {
                    element.appendChild(element2);
                }
            }
        }
        PdfObject dictionary2 = pdfObject.getDictionary(PdfDictionary.Pg);
        if (dictionary2 != null && map == null && !this.isHTML) {
            if (debug) {
                System.out.println(indent + "decode page ");
            }
            map = new HashMap();
            try {
                decodePageForMarkedContent(-1, dictionary2, map);
            } catch (Exception e) {
                LogWriter.writeLog("Exception: " + e.getMessage());
            }
        }
        if (debug) {
            System.out.println(indent + "page decoded karray" + Arrays.toString(stringArray) + " Kdict=" + dictionary + " kint=" + i);
        }
        if (stringArray != null) {
            readKarray(stringArray, element2, map, str2);
        } else if (dictionary != null) {
            readChildNode(dictionary, element2, map, str2);
        } else if (i != -1 && !this.isHTML) {
            addContentToNode(map, String.valueOf(i), element2);
        } else if (pdfObject.getTextStreamValue(36) == null && debug) {
            System.out.println("unimplemented " + pdfObject.getObjectRefAsString());
        }
        if (debug) {
            System.out.println(indent + "child node read " + pdfObject.getObjectRefAsString());
            indent = indent.substring(0, indent.length() - 3);
        }
    }

    private void addContentToNode(Map map, String str, Element element) {
        if (this.isHTML) {
            return;
        }
        String str2 = (String) map.get(str);
        if (str2 != null) {
            str2 = handleXMLCharacters(str2);
            if (this.doc != null) {
                element.appendChild(this.doc.createTextNode(str2));
            }
        }
        if (debug) {
            System.out.println(indent + " added " + str2);
        }
    }

    private static String handleXMLCharacters(String str) {
        return str.replaceAll("&lt;", "<").replaceAll("&gt;", ">");
    }

    private void readKarray(byte[][] bArr, Element element, Map map, String str) {
        int length = bArr.length;
        int i = 0;
        while (i < length) {
            String str2 = new String(bArr[i]);
            if (debug) {
                System.out.println(indent + "aK value=" + str2);
            }
            if (length - i < 3) {
                if (this.isHTML && !this.reverseLookup.containsKey(str2)) {
                    this.reverseLookup.put(str2, str);
                }
                addContentToNode(map, str2, element);
            } else if (bArr[i + 2][0] == 82) {
                MCObject mCObject = new MCObject(str2 + ' ' + new String(bArr[i + 1]) + " R");
                this.currentPdfFile.readObject(mCObject);
                readChildNode(mCObject, element, map, str);
                i += 2;
            } else {
                addContentToNode(map, str2, element);
            }
            i++;
        }
    }

    private static String cleanName(String str) {
        StringBuilder sb = new StringBuilder(10);
        int length = str.length();
        int i = 0;
        while (i < length) {
            char charAt = str.charAt(i);
            if (charAt == '#') {
                StringBuilder sb2 = new StringBuilder(2);
                for (int i2 = 0; i2 < 2; i2++) {
                    i++;
                    sb2.append(str.charAt(i));
                }
                charAt = (char) Integer.parseInt(sb2.toString(), 16);
                if (!Character.isLetterOrDigit(charAt)) {
                    charAt = '-';
                }
            }
            if (charAt == ' ') {
                sb.append('-');
            } else if (charAt == '-') {
                sb.append(charAt);
            } else if (charAt == '_') {
                sb.append(charAt);
            } else if (Character.isLetterOrDigit(charAt)) {
                sb.append(charAt);
            }
            i++;
        }
        return sb.toString();
    }

    private synchronized void decodePageForMarkedContent(int i, PdfObject pdfObject, Object obj) throws Exception {
        if (this.isDecoding) {
            LogWriter.writeLog("[PDF]WARNING - this file is being decoded already");
            return;
        }
        if (pdfObject == null) {
            pdfObject = new PageObject(this.currentPdfFile.getReferenceforPage(i));
            this.currentPdfFile.readObject(pdfObject);
        } else {
            i = this.currentPdfFile.convertObjectToPageNumber(new String(pdfObject.getUnresolvedData()));
            this.currentPdfFile.checkResolved(pdfObject);
        }
        try {
            try {
                this.isDecoding = true;
                ObjectStore objectStore = new ObjectStore();
                PdfStreamDecoder pdfStreamDecoder = new PdfStreamDecoder(this.currentPdfFile, this.layers);
                pdfStreamDecoder.setParameters(true, false, 0, 7, false, false);
                pdfStreamDecoder.setXMLExtraction(false);
                pdfStreamDecoder.setObjectValue(-9, "markedContent");
                pdfStreamDecoder.setObjectValue(-8, objectStore);
                pdfStreamDecoder.setObjectValue(-3, null);
                pdfStreamDecoder.setObjectValue(-18, this.pdfPageData);
                pdfStreamDecoder.setIntValue(-10, i);
                pdfStreamDecoder.setRenderer(new SwingDisplay(i, objectStore, false));
                this.res.setupResources(pdfStreamDecoder, false, pdfObject.getDictionary(PdfDictionary.Resources), i, this.currentPdfFile);
                pdfStreamDecoder.setObjectValue(-5, obj);
                if (debug) {
                    System.out.println(indent + " about to decode page " + pdfObject.getObjectRefAsString());
                }
                pdfStreamDecoder.decodePageContent(pdfObject);
                objectStore.flush();
                this.isDecoding = false;
            } catch (Exception e) {
                LogWriter.writeLog("Exception: " + e.getMessage());
                this.isDecoding = false;
            }
        } catch (Throwable th) {
            this.isDecoding = false;
            throw th;
        }
    }
}
