package org.apache.tika.parser.pdf;

import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.tika.Tika;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.AccessPermissionException;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.ZeroByteFileException;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.metadata.Font;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLCore;
import org.apache.tika.metadata.PDF;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.XMP;
import org.apache.tika.metadata.XMPMM;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.pdf.PDFParserConfig;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ContentHandlerDecorator;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/parser/pdf/PDFParserTest.class */
public class PDFParserTest extends TikaTest {
    public static Level PDFBOX_LOG_LEVEL = Level.INFO;

    /* loaded from: input_file:org/apache/tika/parser/pdf/PDFParserTest$AvoidInlineSelector.class */
    private class AvoidInlineSelector implements DocumentSelector {
        private AvoidInlineSelector() {
        }

        public boolean select(Metadata metadata) {
            String str = metadata.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE);
            return str == null || !str.equals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString());
        }
    }

    /* loaded from: input_file:org/apache/tika/parser/pdf/PDFParserTest$EventCountingHandler.class */
    private class EventCountingHandler extends ContentHandlerDecorator {
        private int endDocument;

        private EventCountingHandler() {
            this.endDocument = 0;
        }

        public void endDocument() {
            this.endDocument++;
        }

        public int getEndDocument() {
            return this.endDocument;
        }
    }

    @BeforeClass
    public static void setup() {
        PDFBOX_LOG_LEVEL = Logger.getLogger("org.apache.pdfbox").getLevel();
        Logger.getLogger("org.apache.pdfbox").setLevel(Level.OFF);
    }

    @AfterClass
    public static void tearDown() {
        Logger.getLogger("org.apache.pdfbox").setLevel(PDFBOX_LOG_LEVEL);
    }

    private static int substringCount(String str, String str2) {
        int i = -1;
        int i2 = 0;
        while (true) {
            int indexOf = str2.indexOf(str, i);
            if (indexOf == -1) {
                return i2;
            }
            i2++;
            i = indexOf + 1;
        }
    }

    @Test
    public void testPdfParsing() throws Exception {
        TikaTest.XMLResult xml = getXML("testPDF.pdf");
        Metadata metadata = xml.metadata;
        String str = xml.xml;
        Assert.assertEquals("application/pdf", metadata.get("Content-Type"));
        Assert.assertEquals("Bertrand Delacrétaz", metadata.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("Firefox", metadata.get(TikaCoreProperties.CREATOR_TOOL));
        Assert.assertEquals("Apache Tika - Apache Tika", metadata.get(TikaCoreProperties.TITLE));
        assertContains("Apache Tika", str);
        assertContains("Tika - Content Analysis Toolkit", str);
        assertContains("incubator", str);
        assertContains("Apache Software Foundation", str);
        Assert.assertTrue("should have word boundary after headline", !str.contains("ToolkitApache"));
        Assert.assertTrue("should have word boundary between paragraphs", !str.contains("libraries.Apache"));
    }

    @Test
    public void testFontNameExtraction() throws Exception {
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractFontNames(true);
        ParseContext parseContext = new ParseContext();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        assertContains("ABCDEE+Calibri", getXML("testPDFVarious.pdf", parseContext).metadata.get(Font.FONT_NAME));
    }

    @Test
    public void testPdfParsingMetadataOnly() throws Exception {
        Metadata metadata = getXML("testPDF.pdf").metadata;
        Assert.assertEquals("application/pdf", metadata.get("Content-Type"));
        Assert.assertEquals("Bertrand Delacrétaz", metadata.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("Firefox", metadata.get(TikaCoreProperties.CREATOR_TOOL));
        Assert.assertEquals("Apache Tika - Apache Tika", metadata.get(TikaCoreProperties.TITLE));
    }

    @Test
    public void testCustomMetadata() throws Exception {
        TikaTest.XMLResult xml = getXML("testPDF-custommetadata.pdf");
        Metadata metadata = xml.metadata;
        Assert.assertEquals("application/pdf", metadata.get("Content-Type"));
        Assert.assertEquals("Document author", metadata.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("Document title", metadata.get(TikaCoreProperties.TITLE));
        Assert.assertEquals("Custom Value", metadata.get("Custom Property"));
        Assert.assertEquals("Array Entry 1", metadata.get("Custom Array"));
        Assert.assertEquals(2L, metadata.getValues("Custom Array").length);
        Assert.assertEquals("Array Entry 1", metadata.getValues("Custom Array")[0]);
        Assert.assertEquals("Array Entry 2", metadata.getValues("Custom Array")[1]);
        assertContains("Hello World!", xml.xml);
    }

    @Test
    public void testProtectedPDF() throws Exception {
        TikaTest.XMLResult xml = getXML("testPDF_protected.pdf");
        Metadata metadata = xml.metadata;
        Assert.assertEquals("true", metadata.get("pdf:encrypted"));
        Assert.assertEquals("application/pdf", metadata.get("Content-Type"));
        Assert.assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("Speeches by Andrew G Haldane", metadata.get(OfficeOpenXMLCore.SUBJECT));
        Assert.assertEquals("Rethinking the Financial Network, Speech by Andrew G Haldane, Executive Director, Financial Stability delivered at the Financial Student Association, Amsterdam on 28 April 2009", metadata.get(TikaCoreProperties.TITLE));
        assertContains("RETHINKING THE FINANCIAL NETWORK", xml.xml);
        assertContains("On 16 November 2002", xml.xml);
        assertContains("In many important respects", xml.xml);
        ParseContext parseContext = new ParseContext();
        parseContext.set(PasswordProvider.class, new PasswordProvider() { // from class: org.apache.tika.parser.pdf.PDFParserTest.1
            public String getPassword(Metadata metadata2) {
                return "";
            }
        });
        TikaTest.XMLResult xml2 = getXML("testPDF_protected.pdf", parseContext);
        Metadata metadata2 = xml2.metadata;
        Assert.assertEquals("true", metadata2.get("pdf:encrypted"));
        Assert.assertEquals("application/pdf", metadata2.get("Content-Type"));
        Assert.assertEquals("The Bank of England", metadata2.get(TikaCoreProperties.CREATOR));
        Assert.assertEquals("Speeches by Andrew G Haldane", metadata2.get(OfficeOpenXMLCore.SUBJECT));
        Assert.assertEquals("Rethinking the Financial Network, Speech by Andrew G Haldane, Executive Director, Financial Stability delivered at the Financial Student Association, Amsterdam on 28 April 2009", metadata2.get(TikaCoreProperties.TITLE));
        assertContains("RETHINKING THE FINANCIAL NETWORK", xml2.xml);
        assertContains("On 16 November 2002", xml2.xml);
        assertContains("In many important respects", xml2.xml);
        parseContext.set(PasswordProvider.class, new PasswordProvider() { // from class: org.apache.tika.parser.pdf.PDFParserTest.2
            public String getPassword(Metadata metadata3) {
                return "WRONG!!!!";
            }
        });
        boolean z = false;
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata3 = new Metadata();
        try {
            InputStream resourceAsStream = getResourceAsStream("/test-documents/testPDF_protected.pdf");
            Throwable th = null;
            try {
                try {
                    AUTO_DETECT_PARSER.parse(resourceAsStream, bodyContentHandler, metadata3, parseContext);
                    if (resourceAsStream != null) {
                        if (0 != 0) {
                            try {
                                resourceAsStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            resourceAsStream.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (EncryptedDocumentException e) {
            z = true;
        }
        Assert.assertTrue("encryption exception", z);
        Assert.assertEquals("application/pdf", metadata3.get("Content-Type"));
        Assert.assertEquals("true", metadata3.get("pdf:encrypted"));
        Assert.assertEquals("very little metadata should be parsed", 3L, metadata3.names().length);
        Assert.assertEquals(0L, bodyContentHandler.toString().length());
    }

    @Test
    public void testTwoTextBoxes() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf");
        Throwable th = null;
        try {
            try {
                String text = getText(resourceAsStream, AUTO_DETECT_PARSER);
                if (resourceAsStream != null) {
                    if (0 != 0) {
                        try {
                            resourceAsStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        resourceAsStream.close();
                    }
                }
                assertContains("Left column line 1 Left column line 2 Right column line 1 Right column line 2", text.replaceAll("\\s+", " "));
            } finally {
            }
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (th != null) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    @Test
    public void testVarious() throws Exception {
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testPDFVarious.pdf");
        Throwable th = null;
        try {
            try {
                String text = getText(resourceAsStream, AUTO_DETECT_PARSER, metadata);
                if (resourceAsStream != null) {
                    if (0 != 0) {
                        try {
                            resourceAsStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        resourceAsStream.close();
                    }
                }
                assertContains("Footnote appears here", text);
                assertContains("This is a footnote.", text);
                assertContains("This is the header text.", text);
                assertContains("This is the footer text.", text);
                assertContains("Here is a text box", text);
                assertContains("Bold", text);
                assertContains("italic", text);
                assertContains("underline", text);
                assertContains("superscript", text);
                assertContains("subscript", text);
                assertContains("Here is a citation:", text);
                assertContains("Figure 1 This is a caption for Figure 1", text);
                assertContains("(Kramer)", text);
                assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", text.replaceAll("\\s+", " "));
                assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", text.replaceAll("\\s+", " "));
                assertContains("This is a hyperlink", text);
                assertContains("Here is a list:", text);
                for (int i = 1; i <= 3; i++) {
                    assertContains("Bullet " + i, text);
                }
                assertContains("Here is a numbered list:", text);
                for (int i2 = 1; i2 <= 3; i2++) {
                    assertContains(i2 + ") Number bullet " + i2, text);
                }
                for (int i3 = 1; i3 <= 2; i3++) {
                    for (int i4 = 1; i4 <= 3; i4++) {
                        assertContains("Row " + i3 + " Col " + i4, text);
                    }
                }
                assertContains("Keyword1 Keyword2", text);
                Assert.assertEquals("Keyword1 Keyword2", metadata.get(Office.KEYWORDS));
                assertContains("Subject is here", text);
                Assert.assertEquals("Subject is here", metadata.get(OfficeOpenXMLCore.SUBJECT));
                assertContains("Suddenly some Japanese text:", text);
                assertContains("（ＧＨＱ）", text);
                assertContains("ゾルゲと尾崎、淡々と最期", text);
                assertContains("And then some Gothic text:", text);
            } finally {
            }
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (th != null) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    @Test
    public void testAnnotations() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testAnnotations.pdf");
        Throwable th = null;
        try {
            try {
                String text = getText(resourceAsStream, AUTO_DETECT_PARSER);
                if (resourceAsStream != null) {
                    if (0 != 0) {
                        try {
                            resourceAsStream.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        resourceAsStream.close();
                    }
                }
                String replaceAll = text.replaceAll("[\\s ]+", " ");
                assertContains("Here is some text", replaceAll);
                assertContains("Here is a comment", replaceAll);
                PDFParser pDFParser = new PDFParser();
                pDFParser.getPDFParserConfig().setExtractAnnotationText(false);
                InputStream resourceAsStream2 = getResourceAsStream("/test-documents/testAnnotations.pdf");
                Throwable th3 = null;
                try {
                    String text2 = getText(resourceAsStream2, pDFParser);
                    if (resourceAsStream2 != null) {
                        if (0 != 0) {
                            try {
                                resourceAsStream2.close();
                            } catch (Throwable th4) {
                                th3.addSuppressed(th4);
                            }
                        } else {
                            resourceAsStream2.close();
                        }
                    }
                    assertContains("Here is some text", text2.replaceAll("[\\s ]+", " "));
                    Assert.assertEquals(-1L, r0.indexOf("Here is a comment"));
                    ParseContext parseContext = new ParseContext();
                    PDFParserConfig pDFParserConfig = new PDFParserConfig();
                    pDFParserConfig.setExtractAnnotationText(false);
                    parseContext.set(PDFParserConfig.class, pDFParserConfig);
                    resourceAsStream = getResourceAsStream("/test-documents/testAnnotations.pdf");
                    Throwable th5 = null;
                    try {
                        try {
                            String text3 = getText(resourceAsStream, AUTO_DETECT_PARSER, parseContext);
                            if (resourceAsStream != null) {
                                if (0 != 0) {
                                    try {
                                        resourceAsStream.close();
                                    } catch (Throwable th6) {
                                        th5.addSuppressed(th6);
                                    }
                                } else {
                                    resourceAsStream.close();
                                }
                            }
                            assertContains("Here is some text", text3.replaceAll("[\\s ]+", " "));
                            Assert.assertEquals(-1L, r0.indexOf("Here is a comment"));
                            String str = getXML("testAnnotations.pdf").xml;
                            Assert.assertEquals(substringCount("<p>", str), substringCount("</p>", str));
                        } finally {
                        }
                    } finally {
                    }
                } catch (Throwable th7) {
                    if (resourceAsStream2 != null) {
                        if (0 != 0) {
                            try {
                                resourceAsStream2.close();
                            } catch (Throwable th8) {
                                th3.addSuppressed(th8);
                            }
                        } else {
                            resourceAsStream2.close();
                        }
                    }
                    throw th7;
                }
            } finally {
            }
        } finally {
        }
    }

    @Test
    public void testPopupAnnotation() throws Exception {
        TikaTest.XMLResult xml = getXML("testPopupAnnotation.pdf");
        assertContains("this is the note", xml.xml);
        assertContains("igalsh", xml.xml);
    }

    @Test
    public void testEmbeddedPDFs() throws Exception {
        String str = getXML("testPDFPackage.pdf").xml;
        assertContains("PDF1", str);
        assertContains("PDF2", str);
    }

    @Test
    public void testPageNumber() throws Exception {
        assertContains("<p>1</p>", getXML("testPageNumber.pdf").xml.replaceAll("\\s+", ""));
    }

    @Test
    public void testLinks() throws Exception {
        assertContains("<div class=\"annotation\"><a href=\"http://tika.apache.org/\">http://tika.apache.org/</a></div>", getXML("testPDFVarious.pdf").xml);
    }

    @Test
    public void testDisableAutoSpace() throws Exception {
        PDFParser pDFParser = new PDFParser();
        pDFParser.getPDFParserConfig().setEnableAutoSpace(false);
        assertContains("Here is some formatted text", getXML("testExtraSpaces.pdf", pDFParser).xml.replaceAll("[\\s ]+", " "));
        pDFParser.getPDFParserConfig().setEnableAutoSpace(true);
        Assert.assertEquals(-1L, getXML("testExtraSpaces.pdf", pDFParser).xml.replaceAll("[\\s ]+", " ").indexOf("Here is some formatted text"));
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        Assert.assertEquals(-1L, getXML("testExtraSpaces.pdf", parseContext).xml.replaceAll("[\\s ]+", " ").indexOf("Here is some formatted text"));
        pDFParserConfig.setEnableAutoSpace(false);
        assertContains("Here is some formatted text", getXML("testExtraSpaces.pdf", pDFParser, parseContext).xml.replaceAll("[\\s ]+", " "));
    }

    @Test
    public void testDuplicateOverlappingText() throws Exception {
        PDFParser pDFParser = new PDFParser();
        assertContains("Text the first timeText the second time", getXML("testOverlappingText.pdf", pDFParser).xml);
        pDFParser.getPDFParserConfig().setSuppressDuplicateOverlappingText(true);
        assertContains("Text the first timesecond time", getXML("testOverlappingText.pdf", pDFParser).xml);
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        assertContains("Text the first timeText the second time", getXML("testOverlappingText.pdf", parseContext).xml);
        pDFParserConfig.setSuppressDuplicateOverlappingText(true);
        assertContains("Text the first timesecond time", getXML("testOverlappingText.pdf", parseContext).xml);
    }

    @Test
    public void testSortByPosition() throws Exception {
        PDFParser pDFParser = new PDFParser();
        pDFParser.getPDFParserConfig().setEnableAutoSpace(false);
        assertContains("Left column line 1 Left column line 2 Right column line 1 Right column line 2", getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), pDFParser).replaceAll("\\s+", " "));
        pDFParser.setSortByPosition(true);
        assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), pDFParser).replaceAll("\\s+", " "));
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        assertContains("Left column line 1 Left column line 2 Right column line 1 Right column line 2", getText("testPDFTwoTextBoxes.pdf", new Metadata(), parseContext).replaceAll("\\s+", " "));
        pDFParserConfig.setSortByPosition(true);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf");
        assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", getText("testPDFTwoTextBoxes.pdf", new Metadata(), parseContext).replaceAll("\\s+", " "));
    }

    @Test
    public void testBookmarks() throws Exception {
        String str = getXML("testPDF_bookmarks.pdf").xml;
        int indexOf = str.indexOf("Denmark bookmark is here");
        int indexOf2 = str.indexOf("</body>");
        Assert.assertTrue(indexOf != -1);
        Assert.assertTrue(indexOf2 != -1);
        Assert.assertTrue(indexOf < indexOf2);
    }

    @Test
    public void testTurningOffBookmarks() throws Exception {
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractBookmarksText(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        assertNotContained("Denmark bookmark is here", getXML("testPDF_bookmarks.pdf", parseContext).xml);
    }

    @Test
    public void testSignatureInAcroForm() throws Exception {
        TikaTest.XMLResult xml = getXML("testPDF_acroform3.pdf");
        Metadata metadata = xml.metadata;
        Assert.assertEquals("true", metadata.get(PDF.HAS_XMP));
        Assert.assertEquals("true", metadata.get(PDF.HAS_ACROFORM_FIELDS));
        Assert.assertEquals("false", metadata.get(PDF.HAS_XFA));
        Assert.assertTrue("found", xml.xml.contains("<li>aTextField: TIKA-1226</li>"));
    }

    @Test
    public void testSingleCloseDoc() throws Exception {
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        ContentHandlerDecorator eventCountingHandler = new EventCountingHandler();
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testPDFTripleLangTitle.pdf");
        Throwable th = null;
        try {
            AUTO_DETECT_PARSER.parse(resourceAsStream, eventCountingHandler, metadata, parseContext);
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            Assert.assertEquals(1L, ((EventCountingHandler) eventCountingHandler).getEndDocument());
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    @Test
    public void testVersions() throws Exception {
        HashMap hashMap = new HashMap();
        hashMap.put("4.x", "application/pdf; version=1.3");
        hashMap.put("5.x", "application/pdf; version=1.4");
        hashMap.put("6.x", "application/pdf; version=1.5");
        hashMap.put("7.x", "application/pdf; version=1.6");
        hashMap.put("8.x", "application/pdf; version=1.7");
        hashMap.put("9.x", "application/pdf; version=1.7");
        hashMap.put("10.x", "application/pdf; version=1.7");
        hashMap.put("11.x.PDFA-1b", "application/pdf; version=1.7");
        HashMap hashMap2 = new HashMap();
        hashMap2.put("4.x", "1.3");
        hashMap2.put("5.x", "1.4");
        hashMap2.put("6.x", "1.5");
        hashMap2.put("7.x", "1.6");
        hashMap2.put("8.x", "1.7");
        hashMap2.put("9.x", "1.7");
        hashMap2.put("10.x", "1.7");
        hashMap2.put("11.x.PDFA-1b", "1.7");
        HashMap hashMap3 = new HashMap();
        hashMap3.put("9.x", "1.7 Adobe Extension Level 3");
        hashMap3.put("10.x", "1.7 Adobe Extension Level 8");
        hashMap3.put("11.x.PDFA-1b", "1.7 Adobe Extension Level 8");
        for (Map.Entry entry : hashMap.entrySet()) {
            TikaTest.XMLResult xml = getXML("testPDF_Version." + ((String) entry.getKey()) + ".pdf");
            boolean z = false;
            for (String str : xml.metadata.getValues("dc:format")) {
                if (str.equals(entry.getValue())) {
                    z = true;
                }
            }
            Assert.assertTrue("dc:format ::" + ((String) entry.getValue()), z);
            String str2 = (String) hashMap3.get(entry.getKey());
            if (str2 != null) {
                Assert.assertEquals("pdf:PDFExtensionVersion :: " + str2, str2, xml.metadata.get("pdf:PDFExtensionVersion"));
            }
            Assert.assertEquals("pdf:PDFVersion", hashMap2.get(entry.getKey()), xml.metadata.get("pdf:PDFVersion"));
        }
        TikaTest.XMLResult xml2 = getXML("testPDF_Version.11.x.PDFA-1b.pdf");
        HashSet hashSet = new HashSet(Arrays.asList(xml2.metadata.getValues("dc:format")));
        for (String str3 : new String[]{"application/pdf; version=1.7", "application/pdf; version=\"A-1b\"", "application/pdf; version=\"1.7 Adobe Extension Level 8\""}) {
            Assert.assertTrue(str3, hashSet.contains(str3));
        }
        Assert.assertEquals("pdfaid:conformance", xml2.metadata.get("pdfaid:conformance"), "B");
        Assert.assertEquals("pdfaid:part", xml2.metadata.get("pdfaid:part"), "1");
    }

    @Test
    public void testMultipleAuthors() throws Exception {
        List asList = Arrays.asList(getXML("testPDF_twoAuthors.pdf").metadata.getValues(TikaCoreProperties.CREATOR));
        assertContains("Sample Author 1", asList);
        assertContains("Sample Author 2", asList);
    }

    @Test
    public void testMultipleTitles() throws Exception {
        Assert.assertEquals("Hello World", getXML("testPDFTripleLangTitle.pdf").metadata.get("dc:title"));
    }

    @Test
    public void testInlineSelector() throws Exception {
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractInlineImages(true);
        pDFParserConfig.setExtractUniqueInlineImagesOnly(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        int i = 0;
        int i2 = 0;
        Iterator it = getRecursiveMetadata("testPDF_childAttachments.pdf", parseContext).iterator();
        while (it.hasNext()) {
            String str = ((Metadata) it.next()).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE);
            if (str != null) {
                if (str.equals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString())) {
                    i++;
                } else if (str.equals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString())) {
                    i2++;
                }
            }
        }
        Assert.assertEquals(2L, i);
        Assert.assertEquals(2L, i2);
        parseContext.set(DocumentSelector.class, new AvoidInlineSelector());
        int i3 = 0;
        int i4 = 0;
        Iterator it2 = getRecursiveMetadata("testPDF_childAttachments.pdf", parseContext).iterator();
        while (it2.hasNext()) {
            String str2 = ((Metadata) it2.next()).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE);
            if (str2 != null) {
                if (str2.equals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString())) {
                    i3++;
                } else if (str2.equals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString())) {
                    i4++;
                }
            }
        }
        Assert.assertEquals(0L, i3);
        Assert.assertEquals(2L, i4);
    }

    @Test
    public void testInlineConfig() throws Exception {
        int i = 0;
        int i2 = 0;
        Iterator it = getRecursiveMetadata("testPDF_childAttachments.pdf").iterator();
        while (it.hasNext()) {
            String str = ((Metadata) it.next()).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE);
            if (str != null) {
                if (str.equals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString())) {
                    i++;
                } else if (str.equals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString())) {
                    i2++;
                }
            }
        }
        Assert.assertEquals(0L, i);
        Assert.assertEquals(2L, i2);
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractInlineImages(true);
        pDFParserConfig.setExtractUniqueInlineImagesOnly(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        int i3 = 0;
        int i4 = 0;
        Iterator it2 = getRecursiveMetadata("testPDF_childAttachments.pdf", parseContext).iterator();
        while (it2.hasNext()) {
            String str2 = ((Metadata) it2.next()).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE);
            if (str2 != null) {
                if (str2.equals(TikaCoreProperties.EmbeddedResourceType.INLINE.toString())) {
                    i3++;
                } else if (str2.equals(TikaCoreProperties.EmbeddedResourceType.ATTACHMENT.toString())) {
                    i4++;
                }
            }
        }
        Assert.assertEquals(2L, i3);
        Assert.assertEquals(2L, i4);
    }

    @Test
    public void testEmbeddedFileNameExtraction() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("testPDF_multiFormatEmbFiles.pdf");
        Assert.assertEquals("metadata size", 5L, recursiveMetadata.size());
        Assert.assertEquals("attachment file name", "Test.txt", ((Metadata) recursiveMetadata.get(1)).get("resourceName"));
    }

    @Test
    public void testEmbeddedFileMarkup() throws Exception {
        ParseContext parseContext = new ParseContext();
        parseContext.set(Parser.class, AUTO_DETECT_PARSER);
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractInlineImages(true);
        pDFParserConfig.setExtractUniqueInlineImagesOnly(false);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        TikaTest.XMLResult xml = getXML("testPDF_childAttachments.pdf", parseContext);
        assertContains("<div source=\"attachment\" class=\"embedded\" id=\"Unit10.doc\" />", xml.xml);
        assertContains("<img src=\"embedded:image1.tif\" alt=\"image1.tif\" />", xml.xml);
        assertContains("<div source=\"annotation\" class=\"embedded\" id=\"Excel.xlsx\" />", getXML("testPDFFileEmbInAnnotation.pdf").xml);
    }

    @Test
    public void testLegacyAccessChecking() throws Exception {
        for (String str : new String[]{"testPDF_no_extract_no_accessibility_owner_empty.pdf", "testPDF_no_extract_yes_accessibility_owner_empty.pdf"}) {
            assertContains("Hello World", getXML(str).xml);
        }
        PasswordProvider passwordProvider = new PasswordProvider() { // from class: org.apache.tika.parser.pdf.PDFParserTest.3
            public String getPassword(Metadata metadata) {
                return "user";
            }
        };
        ParseContext parseContext = new ParseContext();
        parseContext.set(PasswordProvider.class, passwordProvider);
        for (String str2 : new String[]{"testPDF_no_extract_no_accessibility_owner_user.pdf", "testPDF_no_extract_yes_accessibility_owner_user.pdf"}) {
            assertContains("Hello World", getXML(str2, parseContext).xml);
        }
    }

    @Test
    public void testAccessCheckingEmptyPassword() throws Exception {
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setAccessChecker(new AccessChecker(false));
        ParseContext parseContext = new ParseContext();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        for (String str : new String[]{"testPDF_no_extract_no_accessibility_owner_empty.pdf", "testPDF_no_extract_yes_accessibility_owner_empty.pdf"}) {
            assertException("/test-documents/" + str, AUTO_DETECT_PARSER, parseContext, AccessPermissionException.class);
        }
        pDFParserConfig.setAccessChecker(new AccessChecker(true));
        assertException("/test-documents/testPDF_no_extract_no_accessibility_owner_empty.pdf", AUTO_DETECT_PARSER, parseContext, AccessPermissionException.class);
        assertContains("Hello World", getXML("testPDF_no_extract_yes_accessibility_owner_empty.pdf", parseContext).xml);
    }

    @Test
    public void testAccessCheckingUserPassword() throws Exception {
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setAccessChecker(new AccessChecker(false));
        parseContext.set(PasswordProvider.class, new PasswordProvider() { // from class: org.apache.tika.parser.pdf.PDFParserTest.4
            public String getPassword(Metadata metadata) {
                return "user";
            }
        });
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        for (String str : new String[]{"testPDF_no_extract_no_accessibility_owner_empty.pdf", "testPDF_no_extract_yes_accessibility_owner_empty.pdf"}) {
            assertException("/test-documents/" + str, AUTO_DETECT_PARSER, parseContext, EncryptedDocumentException.class);
        }
        pDFParserConfig.setAccessChecker(new AccessChecker(true));
        for (String str2 : new String[]{"testPDF_no_extract_no_accessibility_owner_empty.pdf", "testPDF_no_extract_yes_accessibility_owner_empty.pdf"}) {
            assertException("/test-documents/" + str2, AUTO_DETECT_PARSER, parseContext, EncryptedDocumentException.class);
        }
        assertException("/test-documents/testPDF_no_extract_no_accessibility_owner_user.pdf", AUTO_DETECT_PARSER, parseContext, AccessPermissionException.class);
        assertContains("Hello World", getXML("testPDF_no_extract_yes_accessibility_owner_user.pdf", parseContext).xml);
        pDFParserConfig.setAccessChecker(new AccessChecker(false));
        for (String str3 : new String[]{"testPDF_no_extract_no_accessibility_owner_user.pdf", "testPDF_no_extract_yes_accessibility_owner_user.pdf"}) {
            assertException("/test-documents/" + str3, AUTO_DETECT_PARSER, parseContext, AccessPermissionException.class);
        }
    }

    @Test
    public void testAccessCheckingOwnerPassword() throws Exception {
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setAccessChecker(new AccessChecker(true));
        parseContext.set(PasswordProvider.class, new PasswordProvider() { // from class: org.apache.tika.parser.pdf.PDFParserTest.5
            public String getPassword(Metadata metadata) {
                return "owner";
            }
        });
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        for (String str : new String[]{"testPDF_no_extract_no_accessibility_owner_user.pdf", "testPDF_no_extract_yes_accessibility_owner_user.pdf", "testPDF_no_extract_no_accessibility_owner_empty.pdf", "testPDF_no_extract_yes_accessibility_owner_empty.pdf"}) {
            assertContains("Hello World", getXML(str, parseContext).xml);
        }
        pDFParserConfig.setAccessChecker(new AccessChecker(false));
        for (String str2 : new String[]{"testPDF_no_extract_no_accessibility_owner_user.pdf", "testPDF_no_extract_yes_accessibility_owner_user.pdf", "testPDF_no_extract_no_accessibility_owner_empty.pdf", "testPDF_no_extract_yes_accessibility_owner_empty.pdf"}) {
            assertContains("Hello World", getXML(str2, parseContext).xml);
        }
    }

    @Test
    public void testNoXMP() throws Exception {
        Assert.assertEquals("false", getXML("testPDF.pdf").metadata.get(PDF.HAS_XMP));
    }

    @Test
    public void testPDFEncodedStringsInXMP() throws Exception {
        Assert.assertEquals("Microsoft", getXML("testPDF_PDFEncodedStringInXMP.pdf").metadata.get(TikaCoreProperties.TITLE));
    }

    @Test
    public void testXFAExtractionBasic() throws Exception {
        TikaTest.XMLResult xml = getXML("testPDF_XFA_govdocs1_258578.pdf");
        Metadata metadata = xml.metadata;
        Assert.assertEquals("true", metadata.get(PDF.HAS_XFA));
        Assert.assertEquals("true", metadata.get(PDF.HAS_ACROFORM_FIELDS));
        Assert.assertEquals("true", metadata.get(PDF.HAS_XMP));
        assertContains("Mount Rushmore National Memorial", xml.xml);
        assertContains("<li fieldName=\"School_Name\">School Name: my_school</li>", xml.xml);
    }

    @Test
    public void testXFAOnly() throws Exception {
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setIfXFAExtractOnlyXFA(true);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        String str = getXML("testPDF_XFA_govdocs1_258578.pdf", parseContext).xml;
        assertContains("<body><div class=\"xfa_content\">", str);
        assertContains("<li fieldName=\"Room_1\">Room [1]: my_room1</li>", str);
        assertNotContained("Mount Rushmore National Memorial", str);
    }

    @Test
    public void testXMPMM() throws Exception {
        Assert.assertEquals("uuid:0e46913c-72b9-40c0-8232-69e362abcd1e", getXML("testPDF_twoAuthors.pdf").metadata.get(XMPMM.DOCUMENTID));
        Metadata metadata = getXML("testPDF_Version.11.x.PDFA-1b.pdf").metadata;
        Assert.assertEquals("uuid:cccee1fc-51b3-4b52-ac86-672af3974d25", metadata.get(XMPMM.DOCUMENTID));
        Assert.assertArrayEquals(new String[]{"uuid:0313504b-a0b0-4dac-a9f0-357221f2eadf", "uuid:edc4279e-0d5f-465e-b13e-1298402fd11c", "uuid:f565b775-43f3-4a9a-8541-e98c4115db6d", "uuid:9fd5e0a8-14a5-4920-ad7f-870c0b8ee65f", "uuid:09b6cfba-efde-4e07-a77f-70de858cc0aa", "uuid:1e4ffbd7-dabc-4aae-801c-15b3404ade36", "uuid:c1669773-a6ca-4bdd-aade-519030d0af00"}, metadata.getValues(XMPMM.HISTORY_EVENT_INSTANCEID));
        Assert.assertArrayEquals(new String[]{"converted", "converted", "converted", "converted", "converted", "converted", "converted"}, metadata.getValues(XMPMM.HISTORY_ACTION));
        Assert.assertArrayEquals(new String[]{"Preflight", "Preflight", "Preflight", "Preflight", "Preflight", "Preflight", "Preflight"}, metadata.getValues(XMPMM.HISTORY_SOFTWARE_AGENT));
        Assert.assertArrayEquals(new String[]{"2014-03-04T23:50:41Z", "2014-03-04T23:50:42Z", "2014-03-04T23:51:34Z", "2014-03-04T23:51:36Z", "2014-03-04T23:51:37Z", "2014-03-04T23:52:22Z", "2014-03-04T23:54:48Z"}, metadata.getValues(XMPMM.HISTORY_WHEN));
    }

    @Test
    public void testSkipBadPage() throws Exception {
        InputStream resourceAsStream;
        Throwable th;
        BodyContentHandler bodyContentHandler = new BodyContentHandler(-1);
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        boolean z = false;
        try {
            resourceAsStream = getResourceAsStream("/test-documents/testPDF_bad_page_303226.pdf");
            Throwable th2 = null;
            try {
                try {
                    AUTO_DETECT_PARSER.parse(resourceAsStream, bodyContentHandler, metadata, parseContext);
                    if (resourceAsStream != null) {
                        if (0 != 0) {
                            try {
                                resourceAsStream.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            resourceAsStream.close();
                        }
                    }
                } finally {
                }
            } finally {
            }
        } catch (TikaException e) {
            z = true;
        }
        String obj = bodyContentHandler.toString();
        Assert.assertTrue("Should have thrown exception", z);
        Assert.assertEquals(1L, metadata.getValues(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING).length);
        assertContains("Unknown dir", metadata.get(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING));
        assertContains("1309.61", obj);
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setCatchIntermediateIOExceptions(false);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        BodyContentHandler bodyContentHandler2 = new BodyContentHandler(-1);
        Metadata metadata2 = new Metadata();
        boolean z2 = false;
        try {
            resourceAsStream = getResourceAsStream("/test-documents/testPDF_bad_page_303226.pdf");
            th = null;
        } catch (TikaException e2) {
            z2 = true;
        }
        try {
            try {
                AUTO_DETECT_PARSER.parse(resourceAsStream, bodyContentHandler2, metadata2, parseContext);
                if (resourceAsStream != null) {
                    if (0 != 0) {
                        try {
                            resourceAsStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        resourceAsStream.close();
                    }
                }
                String obj2 = bodyContentHandler2.toString();
                Assert.assertTrue("Should have thrown exception", z2);
                Assert.assertEquals(0L, metadata2.getValues(TikaCoreProperties.TIKA_META_EXCEPTION_WARNING).length);
                assertNotContained("1309.61", obj2);
            } finally {
            }
        } finally {
            if (resourceAsStream != null) {
                if (th != null) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th5) {
                        th.addSuppressed(th5);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
        }
    }

    @Test
    public void testInitializationViaConfig() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("/org/apache/tika/parser/pdf/tika-config.xml");
        Throwable th = null;
        try {
            Assert.assertNotNull(resourceAsStream);
            assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), new AutoDetectParser(new TikaConfig(resourceAsStream))).replaceAll("\\s+", " "));
            if (resourceAsStream != null) {
                if (0 == 0) {
                    resourceAsStream.close();
                    return;
                }
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    @Test
    public void testInitializationOfNonPrimitivesViaConfig() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("/org/apache/tika/parser/pdf/tika-config-non-primitives.xml");
        Throwable th = null;
        try {
            Assert.assertNotNull(resourceAsStream);
            PDFParser pDFParser = (Parser) ((Parser) new AutoDetectParser(new TikaConfig(resourceAsStream)).getParsers().get(MediaType.application("pdf"))).getParsers().get(MediaType.application("pdf"));
            Assert.assertEquals("org.apache.tika.parser.pdf.PDFParser", pDFParser.getClass().getName());
            Assert.assertEquals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY, pDFParser.getPDFParserConfig().getOcrStrategy());
            Assert.assertEquals(ImageType.RGB, pDFParser.getPDFParserConfig().getOcrImageType());
            if (resourceAsStream != null) {
                if (0 == 0) {
                    resourceAsStream.close();
                    return;
                }
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    @Test
    public void testDiffTitles() throws Exception {
        Metadata metadata = getXML("testPDF_diffTitles.pdf").metadata;
        Assert.assertEquals("this is a new title", metadata.get(PDF.DOC_INFO_TITLE));
        Assert.assertEquals("Sample Title", metadata.get(TikaCoreProperties.TITLE));
    }

    @Test
    public void testMaxLength() throws Exception {
        String parseToString = new Tika().parseToString(getResourceAsStream("/test-documents/testPDF.pdf"), new Metadata(), 100);
        Assert.assertTrue(parseToString.length() == 100);
        assertContains("Tika - Content", parseToString);
    }

    @Test
    public void testConfiguringMoreParams() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("/org/apache/tika/parser/pdf/tika-inline-config.xml");
        Throwable th = null;
        try {
            Assert.assertNotNull(resourceAsStream);
            AutoDetectParser autoDetectParser = new AutoDetectParser(new TikaConfig(resourceAsStream));
            Assert.assertEquals(2L, getRecursiveMetadata("testOCR.pdf", autoDetectParser).size());
            PDFParser pDFParser = (Parser) ((Parser) autoDetectParser.getParsers().get(MediaType.application("pdf"))).getParsers().get(MediaType.application("pdf"));
            Assert.assertTrue(pDFParser instanceof PDFParser);
            PDFParserConfig pDFParserConfig = pDFParser.getPDFParserConfig();
            Assert.assertEquals(new AccessChecker(true), pDFParserConfig.getAccessChecker());
            Assert.assertEquals(true, Boolean.valueOf(pDFParserConfig.isExtractInlineImages()));
            Assert.assertEquals(false, Boolean.valueOf(pDFParserConfig.isExtractUniqueInlineImagesOnly()));
            Assert.assertEquals(314L, pDFParserConfig.getOcrDPI());
            Assert.assertEquals(2.1f, pDFParserConfig.getOcrImageQuality(), 0.01f);
            Assert.assertEquals("jpeg", pDFParserConfig.getOcrImageFormatName());
            Assert.assertEquals(524288000L, pDFParserConfig.getMaxMainMemoryBytes());
            Assert.assertEquals(false, Boolean.valueOf(pDFParserConfig.isCatchIntermediateIOExceptions()));
            if (resourceAsStream != null) {
                if (0 == 0) {
                    resourceAsStream.close();
                    return;
                }
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    private void assertException(String str, Parser parser, ParseContext parseContext, Class cls) {
        boolean z = false;
        InputStream resourceAsStream = getResourceAsStream(str);
        try {
            try {
                getText(resourceAsStream, parser, parseContext);
                z = true;
                IOUtils.closeQuietly(resourceAsStream);
            } catch (Exception e) {
                Assert.assertEquals("Not the right exception: " + str, cls, e.getClass());
                IOUtils.closeQuietly(resourceAsStream);
            }
            Assert.assertFalse(str + " should have thrown exception", z);
        } catch (Throwable th) {
            IOUtils.closeQuietly(resourceAsStream);
            throw th;
        }
    }

    @Test
    public void testLanguageMetadata() throws Exception {
        Assert.assertEquals("de-CH", getXML("testPDF-custommetadata.pdf").metadata.get(TikaCoreProperties.LANGUAGE));
        Assert.assertEquals("zh-CN", getXML("testPDFFileEmbInAnnotation.pdf").metadata.get(TikaCoreProperties.LANGUAGE));
    }

    @Test
    public void testAngles() throws Exception {
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setDetectAngles(true);
        ParseContext parseContext = new ParseContext();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        String str = getXML("testPDF_angles.pdf", parseContext).xml;
        assertContainsCount("<div class=\"page\">", str, 1);
        assertContains("IN-DEMAND", str);
        assertContains("natural underground", str);
        assertContains("transport mined materials", str);
    }

    @Test
    public void testUnmappedUnicodeStats() throws Exception {
        Metadata metadata = (Metadata) getRecursiveMetadata("testPDF_bad_page_303226.pdf", true).get(0);
        int[] intValues = metadata.getIntValues(PDF.CHARACTERS_PER_PAGE);
        int[] intValues2 = metadata.getIntValues(PDF.UNMAPPED_UNICODE_CHARS_PER_PAGE);
        if (intValues[15] > 0) {
            Assert.assertEquals(3805L, intValues[15]);
            Assert.assertEquals(120L, intValues2[15]);
        }
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setDetectAngles(true);
        ParseContext parseContext = new ParseContext();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        Metadata metadata2 = (Metadata) getRecursiveMetadata("testPDF_bad_page_303226.pdf", parseContext, true).get(0);
        int[] intValues3 = metadata2.getIntValues(PDF.CHARACTERS_PER_PAGE);
        int[] intValues4 = metadata2.getIntValues(PDF.UNMAPPED_UNICODE_CHARS_PER_PAGE);
        if (intValues3[15] > 0) {
            Assert.assertEquals(3805L, intValues3[15]);
            Assert.assertEquals(120L, intValues4[15]);
        }
    }

    @Test
    public void testNPEInPDFParserConfig() {
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.toString();
        pDFParserConfig.hashCode();
        pDFParserConfig.equals(new PDFParserConfig());
    }

    @Test
    @Ignore("turn back on if we add file from PDFBOX-52")
    public void testPDFBox52() throws Exception {
        int parseInt;
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractInlineImages(true);
        pDFParserConfig.setExtractUniqueInlineImagesOnly(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        List recursiveMetadata = getRecursiveMetadata("testPDF_PDFBOX-52.pdf", parseContext);
        int i = 0;
        Matcher matcher = Pattern.compile("image(\\d+)").matcher("");
        Iterator it = recursiveMetadata.iterator();
        while (it.hasNext()) {
            String str = ((Metadata) it.next()).get("resourceName");
            if (str != null && matcher.reset(str).find() && (parseInt = Integer.parseInt(matcher.group(1))) > i) {
                i = parseInt;
            }
        }
        Assert.assertEquals(37L, recursiveMetadata.size());
        Assert.assertEquals(35L, i);
    }

    @Test
    public void testXMPBasicSchema() throws Exception {
        Metadata metadata = (Metadata) getRecursiveMetadata("testPDF_XMPBasicSchema.pdf").get(0);
        Assert.assertEquals("Hewlett-Packard MFP", metadata.get(XMP.CREATOR_TOOL));
        Assert.assertEquals("1998-08-29T13:53:15Z", metadata.get(XMP.CREATE_DATE));
    }

    @Test
    public void testXMPPDFSchema() throws Exception {
        Assert.assertEquals("IBM Lotus Symphony 3.0", getXML("testPopupAnnotation.pdf").metadata.get(PDF.PRODUCER));
    }

    @Test
    public void testExtractInlineImageMetadata() throws Exception {
        ParseContext parseContext = new ParseContext();
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractInlineImageMetadataOnly(true);
        parseContext.set(PDFParserConfig.class, pDFParserConfig);
        List recursiveMetadata = getRecursiveMetadata("testOCR.pdf", parseContext);
        Assert.assertNull(parseContext.get(ZeroByteFileException.IgnoreZeroByteFileException.class));
        Assert.assertEquals(2L, recursiveMetadata.size());
        Assert.assertEquals("image/png", ((Metadata) recursiveMetadata.get(1)).get("Content-Type"));
        Assert.assertEquals("/image0.png", ((Metadata) recursiveMetadata.get(1)).get(RecursiveParserWrapperHandler.EMBEDDED_RESOURCE_PATH));
        Assert.assertEquals(261L, ((Metadata) recursiveMetadata.get(1)).getInt(Metadata.IMAGE_LENGTH).intValue());
        Assert.assertEquals(934L, ((Metadata) recursiveMetadata.get(1)).getInt(Metadata.IMAGE_WIDTH).intValue());
        Assert.assertEquals("image0.png", ((Metadata) recursiveMetadata.get(1)).get("resourceName"));
    }
}
