package org.apache.tika.parser.pdf;

import java.util.List;
import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.RecursiveParserWrapperHandler;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

/* loaded from: input_file:org/apache/tika/parser/pdf/PDFMarkedContent2XHTMLTest.class */
public class PDFMarkedContent2XHTMLTest extends TikaTest {
    static ParseContext MARKUP_CONTEXT = new ParseContext();

    @BeforeClass
    public static void setUp() {
        PDFParserConfig pDFParserConfig = new PDFParserConfig();
        pDFParserConfig.setExtractMarkedContent(true);
        MARKUP_CONTEXT.set(PDFParserConfig.class, pDFParserConfig);
    }

    @Test
    public void testJournal() throws Exception {
        String str = getXML("testJournalParser.pdf", MARKUP_CONTEXT).xml;
        assertContains("<h1>I. INTRODUCTION</h1>", str);
        assertContains("<table><tr>\t<td><p />", str);
        assertContains("</td>\t<td><p>NHG</p>", str);
        assertContains("</td>\t<td><p>STRING</p>", str);
    }

    @Test
    public void testVarious() throws Exception {
        String str = getXML("testPDFVarious.pdf", MARKUP_CONTEXT).xml;
        assertContains("<div class=\"textbox\"><p>Here is a text box</p>", str);
        assertContains("<div class=\"footnote\"><p>1 This is a footnote.</p>", str);
        assertContains("<ul>\t<li>Bullet 1</li>", str);
        assertContains("<table><tr>\t<td><p>Row 1 Col 1</p>", str);
        assertContains("<p>Here is a citation:</p>", str);
        assertContains("a href=\"http://tika.apache.org/\">This is a hyperlink</a>", str);
        assertContains("This is the header text.", str);
        assertContains("This is the footer text.", str);
    }

    @Test
    public void testChildAttachments() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("testPDF_childAttachments.pdf", MARKUP_CONTEXT);
        Assert.assertEquals(3L, recursiveMetadata.size());
        String str = ((Metadata) recursiveMetadata.get(0)).get(RecursiveParserWrapperHandler.TIKA_CONTENT);
        assertContains("<a href=\"http://www.irs.gov\">IRS.gov</a>", str);
        assertContains("<a href=\"http://www.irs.gov/pub15\">www.irs.gov/pub15</a>", str);
    }
}
