package org.apache.tika.parser.microsoft.ooxml;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormatSymbols;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.transform.sax.SAXTransformerFactory;
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.util.LocaleUtil;
import org.apache.tika.MultiThreadedTikaTest;
import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Office;
import org.apache.tika.metadata.OfficeOpenXMLExtended;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.parser.RecursiveParserWrapper;
import org.apache.tika.parser.microsoft.OfficeParser;
import org.apache.tika.parser.microsoft.OfficeParserConfig;
import org.apache.tika.parser.microsoft.OfficeParserTest;
import org.apache.tika.sax.BodyContentHandler;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.class */
public class OOXMLParserTest extends MultiThreadedTikaTest {
    private static Locale USER_LOCALE = null;

    @BeforeAll
    public static void setUp() {
        USER_LOCALE = LocaleUtil.getUserLocale();
    }

    @AfterAll
    public static void tearDown() {
        LocaleUtil.setUserLocale(USER_LOCALE);
        Locale.setDefault(USER_LOCALE);
    }

    @BeforeEach
    public void beforeEach() {
        LocaleUtil.setUserLocale(Locale.US);
        Locale.setDefault(Locale.US);
    }

    @Test
    public void testExcel() throws Exception {
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        parseContext.set(Locale.class, Locale.US);
        String text = getText("testEXCEL.xlsx", metadata, parseContext);
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
        Assertions.assertEquals("Simple Excel document", metadata.get(TikaCoreProperties.TITLE));
        Assertions.assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
        assertContains("Sample Excel Worksheet", text);
        assertContains("Numbers and their Squares", text);
        assertContains("9", text);
        assertNotContained("9.0", text);
        assertContains("196", text);
        assertNotContained("196.0", text);
        Assertions.assertEquals("false", metadata.get("protected"));
    }

    @Test
    public void testExcelFormats() throws Exception {
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        parseContext.set(Locale.class, Locale.US);
        String text = getText("testEXCEL-formats.xlsx", metadata, parseContext);
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
        assertContains("1,599.99", text);
        assertContains("-1,599.99", text);
        assertContains("$1,599.99", text);
        assertContains("$1,599.99)", text);
        Assertions.assertTrue(text.contains("1.98E08") || text.contains("1.98E+08"));
        Assertions.assertTrue(text.contains("-1.98E08") || text.contains("-1.98E+08"));
        assertContains("2.50%", text);
        if (System.getProperty("java.version").startsWith("1.5")) {
            assertContains("2%", text);
        } else {
            assertContains("3%", text);
        }
        assertContains("6:15", text);
        assertContains("18:15", text);
        assertContains("17-May-07", text);
        assertContains("$1,599.99", text);
        assertContains("($1,599.99)", text);
        assertContains("2 1/2", text);
    }

    @Disabled("OOXML-Strict not currently supported by POI, see #57699")
    @Test
    public void testExcelStrict() throws Exception {
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        parseContext.set(Locale.class, Locale.US);
        String text = getText("testEXCEL.strict.xlsx", metadata, parseContext);
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
        Assertions.assertEquals("Sample Spreadsheet", metadata.get(TikaCoreProperties.TITLE));
        Assertions.assertEquals("Nick Burch", metadata.get(TikaCoreProperties.CREATOR));
        Assertions.assertEquals("Spreadsheet for testing", metadata.get(TikaCoreProperties.DESCRIPTION));
        assertContains("Test spreadsheet", text);
        assertContains("This one is red", text);
        assertContains("cb=10", text);
        assertNotContained("10.0", text);
        assertContains("cb=sum", text);
        assertNotContained("13.0", text);
        Assertions.assertEquals("false", metadata.get("protected"));
    }

    @Test
    public void testPowerPoint() throws Exception {
        String[] strArr = {"pptx", "pptm", "ppsm", "ppsx", "potm"};
        String[] strArr2 = {"application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.ms-powerpoint.presentation.macroenabled.12", "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "application/vnd.ms-powerpoint.template.macroenabled.12"};
        for (int i = 0; i < strArr.length; i++) {
            String str = strArr[i];
            String str2 = "testPPT." + str;
            Metadata metadata = new Metadata();
            new BodyContentHandler();
            String text = getText(str2, metadata, new ParseContext());
            Assertions.assertEquals(strArr2[i], metadata.get("Content-Type"), "Mime-type checking for " + str2);
            Assertions.assertEquals("Attachment Test", metadata.get(TikaCoreProperties.TITLE));
            Assertions.assertEquals("Rajiv", metadata.get(TikaCoreProperties.CREATOR));
            if (str.equals("thmx")) {
                Assertions.assertEquals("", text);
            } else {
                Assertions.assertTrue(text.contains("Attachment Test"), "Text missing for " + str2 + "\n" + text);
                Assertions.assertTrue(text.contains("This is a test file data with the same content"), "Text missing for " + str2 + "\n" + text);
                Assertions.assertTrue(text.contains("content parsing"), "Text missing for " + str2 + "\n" + text);
                Assertions.assertTrue(text.contains("Different words to test against"), "Text missing for " + str2 + "\n" + text);
                Assertions.assertTrue(text.contains("Mystery"), "Text missing for " + str2 + "\n" + text);
            }
        }
    }

    @Test
    public void testPowerPointMetadataEarly() throws Exception {
        String[] strArr = {"pptx", "pptm", "ppsm", "ppsx", "potm"};
        final String[] strArr2 = {"application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/vnd.ms-powerpoint.presentation.macroenabled.12", "application/vnd.ms-powerpoint.slideshow.macroenabled.12", "application/vnd.openxmlformats-officedocument.presentationml.slideshow", "application/vnd.ms-powerpoint.template.macroenabled.12"};
        for (int i = 0; i < strArr.length; i++) {
            final String str = "testPPT." + strArr[i];
            final Metadata metadata = new Metadata();
            final int i2 = i;
            BodyContentHandler bodyContentHandler = new BodyContentHandler() { // from class: org.apache.tika.parser.microsoft.ooxml.OOXMLParserTest.1
                public void startDocument() {
                    Assertions.assertEquals(strArr2[i2], metadata.get("Content-Type"), "Mime-type checking for " + str);
                    Assertions.assertEquals("Attachment Test", metadata.get(TikaCoreProperties.TITLE));
                    Assertions.assertEquals("Rajiv", metadata.get(TikaCoreProperties.CREATOR));
                }
            };
            ParseContext parseContext = new ParseContext();
            InputStream resourceAsStream = getResourceAsStream("/test-documents/" + str);
            try {
                AUTO_DETECT_PARSER.parse(resourceAsStream, bodyContentHandler, metadata, parseContext);
                if (resourceAsStream != null) {
                    resourceAsStream.close();
                }
            } catch (Throwable th) {
                if (resourceAsStream != null) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
    }

    @Test
    public void testUnsupportedPowerPoint() throws Exception {
        String[] strArr = {"xps", "thmx"};
        String[] strArr2 = {"application/vnd.ms-xpsdocument", "application/vnd.openxmlformats-officedocument"};
        for (int i = 0; i < strArr.length; i++) {
            String str = "testPPT." + strArr[i];
            Metadata metadata = new Metadata();
            metadata.set("resourceName", str);
            getXML(str, metadata);
            Assertions.assertEquals(strArr2[i], metadata.get("Content-Type"), "Mime-type checking for " + str);
        }
    }

    @Test
    public void testWord() throws Exception {
        Metadata metadata = new Metadata();
        String text = getText("testWORD.docx", metadata, new ParseContext());
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
        Assertions.assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
        Assertions.assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
        Assertions.assertTrue(text.contains("Sample Word Document"));
    }

    @Test
    public void testWordFootnote() throws Exception {
        TikaTest.XMLResult xml = getXML("footnotes.docx");
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", xml.metadata.get("Content-Type"));
        Assertions.assertTrue(xml.xml.contains("snoska"));
    }

    @Test
    public void testWordHTML() throws Exception {
        TikaTest.XMLResult xml = getXML("testWORD.docx");
        String str = xml.xml;
        Metadata metadata = xml.metadata;
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
        Assertions.assertEquals("Sample Word Document", metadata.get(TikaCoreProperties.TITLE));
        Assertions.assertEquals("Keith Bennett", metadata.get(TikaCoreProperties.CREATOR));
        Assertions.assertTrue(str.contains("Sample Word Document"));
        Assertions.assertTrue(str.contains("<h1 class=\"title\">"));
        Assertions.assertTrue(str.contains("<h1>Heading Level 1</h1>"));
        Assertions.assertTrue(str.contains("<h2>Heading Level 2</h2>"));
        Assertions.assertTrue(str.contains("<h3><a name=\"OnLevel3\" />Heading Level 3</h3>"));
        Assertions.assertTrue(str.contains("<b>BOLD</b>"));
        Assertions.assertTrue(str.contains("<i>ITALIC</i>"));
        Assertions.assertTrue(str.contains("<table>"));
        Assertions.assertTrue(str.contains("<td>"));
        Assertions.assertTrue(str.contains("<a href=\"http://tika.apache.org/\">Tika</a>"));
        assertContains("<a href=\"#OnMainHeading\">The Main Heading Bookmark</a>", str);
        Assertions.assertTrue(str.contains("<p class=\"signature\">This one"));
        String str2 = getXML("testWORD_3imgs.docx").xml;
        Assertions.assertTrue(str2.contains("<img src=\"embedded:image2.png\" alt=\"A description...\" />"), "Image not found in:\n" + str2);
        Assertions.assertTrue(str2.contains("<img src=\"embedded:image3.jpeg\" alt=\"A description...\" />"), "Image not found in:\n" + str2);
        Assertions.assertTrue(str2.contains("<img src=\"embedded:image4.png\" alt=\"A description...\" />"), "Image not found in:\n" + str2);
        Assertions.assertTrue(str2.contains("<p>The end!</p>"));
        String str3 = getXML("testWORD_bold_character_runs.docx").xml;
        Assertions.assertTrue(str3.contains("F<b>oob</b>a<b>r</b>"), "Bold text wasn't contiguous: " + str3);
        String str4 = getXML("testWORD_bold_character_runs2.docx").xml;
        Assertions.assertTrue(str4.contains("F<b>oob</b>a<b>r</b>"), "Bold text wasn't contiguous: " + str4);
    }

    @Test
    public void testWordPicturesInHeader() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("headerPic.docx");
        Assertions.assertEquals(2, recursiveMetadata.size());
        Metadata metadata = (Metadata) recursiveMetadata.get(0);
        String str = metadata.get(TikaCoreProperties.TIKA_CONTENT);
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
        Assertions.assertTrue(str.contains("<img"));
    }

    @Disabled("need to add links in xhtml")
    @Test
    public void testPicturesInVariousPlaces() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("testWORD_embedded_pics.docx");
        Assertions.assertEquals(3, recursiveMetadata.size());
        String str = ((Metadata) recursiveMetadata.get(0)).get(TikaCoreProperties.TIKA_CONTENT);
        for (int i = 1; i < 4; i++) {
            assertContains("header" + i + "_pic", str);
            assertContains("footer" + i + "_pic", str);
        }
        assertContains("body_pic.jpg", str);
        assertContains("sdt_pic.jpg", str);
        assertContains("deeply_embedded_pic", str);
        assertContains("deleted_pic", str);
        assertContains("footnotes_pic", str);
        assertContains("comments_pic", str);
        assertContains("endnotes_pic", str);
        assertContainsCount("<img src=", str, 14);
    }

    @Test
    public void testProtectedExcelSheets() throws Exception {
        Metadata metadata = getXML("protectedSheets.xlsx").metadata;
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
        Assertions.assertEquals("true", metadata.get("protected"));
    }

    @Test
    public void testProtectedExcelFile() throws Exception {
        TikaTest.XMLResult xml = getXML("protectedFile.xlsx");
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", xml.metadata.get("Content-Type"));
        Assertions.assertEquals("true", xml.metadata.get("protected"));
        assertContains("Office", xml.xml);
    }

    @Test
    public void testNullHeaders() throws Exception {
        Assertions.assertFalse(getXML("NullHeader.docx").xml.isEmpty(), "Should have found some text");
    }

    @Test
    public void testTextDecoration() throws Exception {
        String str = getXML("testWORD_various.docx").xml;
        assertContains("<b>Bold</b>", str);
        assertContains("<i>italic</i>", str);
        assertContains("<u>underline</u>", str);
        assertContains("<s>strikethrough</s>", str);
    }

    @Test
    public void testTextDecorationNested() throws Exception {
        String str = getXML("testWORD_various.docx").xml;
        assertContains("<i>ita<s>li</s>c</i>", str);
        assertContains("<i>ita<s>l<u>i</u></s>c</i>", str);
        assertContains("<i><u>unde<s>r</s>line</u></i>", str);
        String text = getText("testWORD_various.docx");
        assertContainsCount("italic", text, 3);
        assertNotContained("ita ", text);
        assertContainsCount("underline", text, 2);
        assertNotContained("unde ", text);
    }

    @Test
    public void testVarious() throws Exception {
        Metadata metadata = new Metadata();
        String text = getText("testWORD_various.docx", metadata);
        assertContains("Footnote appears here", text);
        assertContains("This is a footnote.", text);
        assertContains("This is the header text.", text);
        assertContains("This is the footer text.", text);
        assertContains("Here is a text box", text);
        assertContains("Bold", text);
        assertContains("italic", text);
        assertContains("underline", text);
        assertContains("superscript", text);
        assertContains("subscript", text);
        assertContains("Here is a citation:", text);
        assertContains("Figure 1 This is a caption for Figure 1", text);
        assertContains("(Kramer)", text);
        assertContains("Row 1 Col 1 Row 1 Col 2 Row 1 Col 3 Row 2 Col 1 Row 2 Col 2 Row 2 Col 3", text.replaceAll("\\s+", " "));
        assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", text.replaceAll("\\s+", " "));
        assertContains("This is a hyperlink", text);
        assertContains("Here is a list:", text);
        for (int i = 1; i <= 3; i++) {
            assertContains("Bullet " + i, text);
        }
        assertContains("Here is a numbered list:", text);
        for (int i2 = 1; i2 <= 3; i2++) {
            assertContains("Number bullet " + i2, text);
        }
        for (int i3 = 1; i3 <= 2; i3++) {
            for (int i4 = 1; i4 <= 3; i4++) {
                assertContains("Row " + i3 + " Col " + i4, text);
            }
        }
        assertContains("Keyword1 Keyword2", text);
        Assertions.assertEquals("Keyword1 Keyword2", metadata.get(Office.KEYWORDS));
        assertContains("Keyword1 Keyword2", Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
        assertContains("Subject is here", text);
        assertContains("Subject is here", Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
        assertContains("Suddenly some Japanese text:", text);
        assertContains("（ＧＨＱ）", text);
        assertContains("ゾルゲと尾崎、淡々と最期", text);
        assertContains("And then some Gothic text:", text);
        assertContains("������������", text);
    }

    @Test
    public void testDOCXHeaderFooterNotExtraction() throws Exception {
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeHeadersAndFooters(false);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        String str = getXML("testWORD_various.docx", parseContext).xml;
        assertNotContained("This is the header text.", str);
        assertNotContained("This is the footer text.", str);
        InputStream resourceAsStream = OfficeParserTest.class.getResourceAsStream("tika-config-headers-footers.xml");
        try {
            AutoDetectParser autoDetectParser = new AutoDetectParser(new TikaConfig(resourceAsStream));
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
            String str2 = getXML("testWORD_various.docx", autoDetectParser).xml;
            assertNotContained("This is the header text.", str2);
            assertNotContained("This is the footer text.", str2);
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testVariousPPTX() throws Exception {
        Metadata metadata = new Metadata();
        String str = getXML("testPPT_various.pptx", metadata).xml;
        assertContains("<p>Footnote appears here", str);
        assertContains("<p>[1] This is a footnote.", str);
        assertContains("<p>This is the header text.</p>", str);
        assertContains("<p>This is the footer text.</p>", str);
        assertContains("<p>Here is a text box</p>", str);
        assertContains("<p>Bold", str);
        assertContains("italic underline superscript subscript", str);
        assertContains("<p>Here is a citation:", str);
        assertContains("Figure 1 This is a caption for Figure 1", str);
        assertContains("(Kramer)", str);
        assertContains("<table><tr>\t<td>Row 1 Col 1</td>", str);
        assertContains("<td>Row 2 Col 2</td>\t<td>Row 2 Col 3</td></tr>", str);
        assertContains("<p>Row 1 column 1</p>", str);
        assertContains("<p>Row 2 column 2</p>", str);
        assertContains("<p><a href=\"http://tika.apache.org/\">This is a hyperlink</a>", str);
        assertContains("<p>Here is a list:", str);
        for (int i = 1; i <= 3; i++) {
            assertContains("<p>Bullet " + i, str);
        }
        assertContains("Here is a numbered list:", str);
        for (int i2 = 1; i2 <= 3; i2++) {
            assertContains("<p>Number bullet " + i2, str);
        }
        for (int i3 = 1; i3 <= 2; i3++) {
            for (int i4 = 1; i4 <= 3; i4++) {
                assertContains("Row " + i3 + " Col " + i4, str);
            }
        }
        assertContains("Keyword1 Keyword2", str);
        Assertions.assertEquals("Keyword1 Keyword2", metadata.get(Office.KEYWORDS));
        assertContains("Subject is here", str);
        Assertions.assertEquals("Subject is here", metadata.get(DublinCore.SUBJECT));
        assertContains("Keyword1 Keyword2", Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
        assertContains("Subject is here", Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
        assertContains("Suddenly some Japanese text:", str);
        assertContains("（ＧＨＱ）", str);
        assertContains("ゾルゲと尾崎、淡々と最期", str);
        assertContains("And then some Gothic text:", str);
        assertContains("������������", str);
    }

    @Test
    public void testSkipHeaderFooter() throws Exception {
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeHeadersAndFooters(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        assertNotContained("This is the header text", getXML("testPPT_various.pptx", parseContext).xml);
    }

    @Test
    public void testCommentPPTX() throws Exception {
        assertContains("<p class=\"slide-comment\"><b>Allison, Timothy B. (ATB)", getXML("testPPT_comment.pptx").xml);
    }

    @Test
    public void testMasterFooter() throws Exception {
        assertContains("Master footer is here", getText("testPPT_masterFooter.pptx"));
    }

    @Disabled("can't tell why this isn't working")
    @Test
    public void testTurningOffMasterContent() throws Exception {
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeSlideMasterContent(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        assertNotContained("Master footer", getXML("testPPT_masterFooter.pptx", parseContext).xml);
    }

    @Test
    public void testMasterText() throws Exception {
        assertContains("Text that I added to the master slide", getText("testPPT_masterText.pptx"));
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeSlideMasterContent(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        assertNotContained("Text that I added", getXML("testPPT_masterText.pptx", parseContext).xml);
    }

    @Test
    public void testMasterText2() throws Exception {
        assertContains("Text that I added to the master slide", getText("testPPT_masterText2.pptx"));
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeSlideMasterContent(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        assertNotContained("Text that I added", getXML("testPPT_masterText2.pptx", parseContext).xml);
    }

    @Test
    public void testWordArt() throws Exception {
        assertContains("Here is some red word Art", getText("testWordArt.pptx"));
    }

    @Test
    public void testExcelCustomProperties() throws Exception {
        Metadata metadata = new Metadata();
        ParseContext parseContext = new ParseContext();
        parseContext.set(Locale.class, Locale.US);
        getXML("testEXCEL_custom_props.xlsx", metadata, parseContext);
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
        Assertions.assertEquals((Object) null, metadata.get(TikaCoreProperties.CREATOR));
        Assertions.assertEquals((Object) null, metadata.get(TikaCoreProperties.MODIFIER));
        Assertions.assertEquals("2006-09-12T15:06:44Z", metadata.get(TikaCoreProperties.CREATED));
        Assertions.assertEquals("2011-08-22T14:24:38Z", metadata.get(TikaCoreProperties.MODIFIED));
        Assertions.assertEquals("Microsoft Excel", metadata.get(OfficeOpenXMLExtended.APPLICATION));
        Assertions.assertEquals("true", metadata.get("custom:myCustomBoolean"));
        Assertions.assertEquals("3", metadata.get("custom:myCustomNumber"));
        Assertions.assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
        Assertions.assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
        Assertions.assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
    }

    @Test
    public void testWordCustomProperties() throws Exception {
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testWORD_custom_props.docx");
        try {
            BodyContentHandler bodyContentHandler = new BodyContentHandler(-1);
            ParseContext parseContext = new ParseContext();
            parseContext.set(Locale.class, Locale.US);
            new OOXMLParser().parse(resourceAsStream, bodyContentHandler, metadata, parseContext);
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
            Assertions.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get("Content-Type"));
            Assertions.assertEquals("EJ04325S", metadata.get(TikaCoreProperties.CREATOR));
            Assertions.assertEquals("Etienne Jouvin", metadata.get(TikaCoreProperties.MODIFIER));
            Assertions.assertEquals("2011-07-29T16:52:00Z", metadata.get(TikaCoreProperties.CREATED));
            Assertions.assertEquals("2012-01-03T22:14:00Z", metadata.get(TikaCoreProperties.MODIFIED));
            Assertions.assertEquals("Microsoft Office Word", metadata.get(OfficeOpenXMLExtended.APPLICATION));
            Assertions.assertEquals("1", metadata.get(Office.PAGE_COUNT));
            Assertions.assertEquals("2", metadata.get(Office.WORD_COUNT));
            Assertions.assertEquals("My Title", metadata.get(TikaCoreProperties.TITLE));
            Assertions.assertEquals("My Keyword", metadata.get(Office.KEYWORDS));
            assertContains("My Keyword", Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT)));
            Assertions.assertEquals("Normal.dotm", metadata.get(OfficeOpenXMLExtended.TEMPLATE));
            Assertions.assertEquals("My subject", metadata.get(DublinCore.SUBJECT));
            Assertions.assertEquals("EDF-DIT", metadata.get(TikaCoreProperties.PUBLISHER));
            Assertions.assertEquals("true", metadata.get("custom:myCustomBoolean"));
            Assertions.assertEquals("3", metadata.get("custom:myCustomNumber"));
            Assertions.assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
            Assertions.assertEquals("2010-12-30T23:00:00Z", metadata.get("custom:MyCustomDate"));
            Assertions.assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testPowerPointCustomProperties() throws Exception {
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testPPT_custom_props.pptx");
        try {
            BodyContentHandler bodyContentHandler = new BodyContentHandler(-1);
            ParseContext parseContext = new ParseContext();
            parseContext.set(Locale.class, Locale.US);
            new OOXMLParser().parse(resourceAsStream, bodyContentHandler, metadata, parseContext);
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
            Assertions.assertEquals("application/vnd.openxmlformats-officedocument.presentationml.presentation", metadata.get("Content-Type"));
            Assertions.assertEquals("JOUVIN ETIENNE", metadata.get(TikaCoreProperties.CREATOR));
            Assertions.assertEquals("EJ04325S", metadata.get(TikaCoreProperties.MODIFIER));
            Assertions.assertEquals("2011-08-22T13:30:53Z", metadata.get(TikaCoreProperties.CREATED));
            Assertions.assertEquals("2011-08-22T13:32:49Z", metadata.get(TikaCoreProperties.MODIFIED));
            Assertions.assertEquals("1", metadata.get(Office.SLIDE_COUNT));
            Assertions.assertEquals("3", metadata.get(Office.WORD_COUNT));
            Assertions.assertEquals("Test extraction properties pptx", metadata.get(TikaCoreProperties.TITLE));
            Assertions.assertEquals("true", metadata.get("custom:myCustomBoolean"));
            Assertions.assertEquals("3", metadata.get("custom:myCustomNumber"));
            Assertions.assertEquals("MyStringValue", metadata.get("custom:MyCustomString"));
            Assertions.assertEquals("2010-12-30T22:00:00Z", metadata.get("custom:MyCustomDate"));
            Assertions.assertEquals("2010-12-29T22:00:00Z", metadata.get("custom:myCustomSecondDate"));
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testEmbeddedPDF() throws Exception {
        Metadata metadata = new Metadata();
        StringWriter stringWriter = new StringWriter();
        TransformerHandler newTransformerHandler = ((SAXTransformerFactory) SAXTransformerFactory.newInstance()).newTransformerHandler();
        newTransformerHandler.getTransformer().setOutputProperty("method", "xml");
        newTransformerHandler.getTransformer().setOutputProperty("indent", "no");
        newTransformerHandler.setResult(new StreamResult(stringWriter));
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testWORD_embedded_pdf.docx");
        try {
            new OOXMLParser().parse(resourceAsStream, newTransformerHandler, metadata, new ParseContext());
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
            String stringWriter2 = stringWriter.toString();
            int indexOf = stringWriter2.indexOf("Here is the pdf file:");
            int indexOf2 = stringWriter2.indexOf("<div class=\"embedded\" id=\"rId5\"/>");
            int indexOf3 = stringWriter2.indexOf("Bye Bye");
            int indexOf4 = stringWriter2.indexOf("<div class=\"embedded\" id=\"rId6\"/>");
            int indexOf5 = stringWriter2.indexOf("Bye for real.");
            Assertions.assertTrue(indexOf != -1);
            Assertions.assertTrue(indexOf2 != -1);
            Assertions.assertTrue(indexOf3 != -1);
            Assertions.assertTrue(indexOf4 != -1);
            Assertions.assertTrue(indexOf5 != -1);
            Assertions.assertTrue(indexOf < indexOf2);
            Assertions.assertTrue(indexOf2 < indexOf3);
            Assertions.assertTrue(indexOf3 < indexOf4);
            Assertions.assertTrue(indexOf4 < indexOf5);
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testEmbeddedZipInPPTX() throws Exception {
        String str = getXML("test_embedded_zip.pptx").xml;
        int indexOf = str.indexOf("<div class=\"embedded\" id=\"slide1_rId3\" />");
        int indexOf2 = str.indexOf("Send me a note");
        int indexOf3 = str.indexOf("<div class=\"embedded\" id=\"slide2_rId4\" />");
        int indexOf4 = str.indexOf("<p>No title</p>");
        Assertions.assertTrue(indexOf != -1);
        Assertions.assertTrue(indexOf2 != -1);
        Assertions.assertTrue(indexOf3 != -1);
        Assertions.assertTrue(indexOf4 != -1);
        Assertions.assertTrue(indexOf < indexOf2);
        Assertions.assertTrue(indexOf2 < indexOf3);
        Assertions.assertTrue(indexOf3 < indexOf4);
    }

    @Test
    public void testWordNullStyle() throws Exception {
        assertContains("Test av styrt dokument", getXML("testWORD_null_style.docx").xml);
    }

    @Test
    public void testNoFormat() throws Exception {
        BodyContentHandler bodyContentHandler = new BodyContentHandler();
        Metadata metadata = new Metadata();
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testWORD_no_format.docx");
        try {
            new OOXMLParser().parse(resourceAsStream, bodyContentHandler, metadata, new ParseContext());
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
            assertContains("This is a piece of text that causes an exception", bodyContentHandler.toString());
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testTextInsideTextBox() throws Exception {
        String str = getXML("testWORD_text_box.docx").xml;
        assertContains("This text is directly in the body of the document.", str);
        assertContains("This text is inside of a text box in the body of the document.", str);
        assertContains("This text is inside of a text box in the header of the document.", str);
        assertContains("This text is inside of a text box in the footer of the document.", str);
    }

    @Test
    public void testSDTInTextBox() throws Exception {
        String str = getXML("testWORD_sdtInTextBox.docx").xml;
        assertContains("rich-text-content-control_inside-text-box", str);
        assertContainsCount("inside-text", str, 1);
    }

    @Test
    public void testTurningOffTextBoxExtraction() throws Exception {
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeShapeBasedContent(false);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        String str = getXML("testWORD_text_box.docx", parseContext).xml;
        assertContains("This text is directly in the body of the document.", str);
        assertNotContained("This text is inside of a text box in the body of the document.", str);
        assertNotContained("This text is inside of a text box in the header of the document.", str);
        assertNotContained("This text is inside of a text box in the footer of the document.", str);
    }

    @Test
    public void testEmbeddedPPTXTwoSlides() throws Exception {
        String str = getXML("testPPT_embedded_two_slides.pptx").xml;
        assertContains("<div class=\"embedded\" id=\"slide1_rId7\" />", str);
        assertContains("<div class=\"embedded\" id=\"slide2_rId7\" />", str);
    }

    @Test
    public void testMissingText() throws Exception {
        TikaTest.XMLResult xml = getXML("testWORD_missing_text.docx");
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", xml.metadata.get("Content-Type"));
        assertContains("BigCompany", xml.xml);
        assertContains("Seasoned", xml.xml);
        assertContains("Rich_text_in_cell", xml.xml);
    }

    @Test
    public void testExcelTextBox() throws Exception {
        assertContains("some autoshape", getXML("testEXCEL_textbox.xlsx").xml);
    }

    @Test
    public void testTurningOffTextBoxExtractionExcel() throws Exception {
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeShapeBasedContent(false);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        assertNotContained("autoshape", getXML("testEXCEL_textbox.xlsx", parseContext).xml);
    }

    @Test
    public void testWordMissingOOXMLBeans() throws Exception {
        PrintStream printStream = System.err;
        for (String str : new String[]{"testWORD_missing_ooxml_bean1.docx"}) {
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            System.setErr(new PrintStream((OutputStream) byteArrayOutputStream, true, StandardCharsets.UTF_8.name()));
            getXML(str);
            System.setErr(printStream);
            Assertions.assertTrue(byteArrayOutputStream.toString(StandardCharsets.UTF_8.name()).length() == 0);
        }
    }

    @Test
    public void testPPTXAutodate() throws Exception {
        assertContains("<p>Now</p>\n<p>2011-12-19 10:20:04 AM</p>\n", getXML("testPPT_autodate.pptx").xml);
    }

    @Test
    public void testDOCXThumbnail() throws Exception {
        String str = getXML("testDOCX_Thumbnail.docx").xml;
        int indexOf = str.indexOf("This file contains a thumbnail");
        int indexOf2 = str.indexOf("<div class=\"embedded\" id=\"/docProps/thumbnail.emf\" />");
        Assertions.assertTrue(indexOf != -1);
        Assertions.assertTrue(indexOf2 != -1);
        Assertions.assertTrue(indexOf < indexOf2);
    }

    @Test
    public void testXLSXThumbnail() throws Exception {
        String str = getXML("testXLSX_Thumbnail.xlsx").xml;
        int indexOf = str.indexOf("This file contains an embedded thumbnail by default");
        int indexOf2 = str.indexOf("<div class=\"embedded\" id=\"/docProps/thumbnail.wmf\" />");
        Assertions.assertTrue(indexOf != -1);
        Assertions.assertTrue(indexOf2 != -1);
        Assertions.assertTrue(indexOf < indexOf2);
    }

    @Test
    public void testPPTXThumbnail() throws Exception {
        String str = getXML("testPPTX_Thumbnail.pptx").xml;
        int indexOf = str.indexOf("<body><div class=\"slide-content\"><p>This file contains an embedded thumbnail");
        int indexOf2 = str.indexOf("<div class=\"embedded\" id=\"/docProps/thumbnail.jpeg\" />");
        Assertions.assertTrue(indexOf != -1);
        Assertions.assertTrue(indexOf2 != -1);
        Assertions.assertTrue(indexOf < indexOf2);
    }

    @Test
    public void testEncrypted() throws Exception {
        HashMap hashMap = new HashMap();
        hashMap.put("testWORD_protected_passtika.docx", "This is an encrypted Word 2007 File");
        hashMap.put("testPPT_protected_passtika.pptx", "This is an encrypted PowerPoint 2007 slide.");
        hashMap.put("testEXCEL_protected_passtika.xlsx", "This is an Encrypted Excel spreadsheet.");
        hashMap.put("testEXCEL_protected_passtika_2.xlsx", "This is an Encrypted Excel spreadsheet with a ChunkedCipherInputStream.");
        PasswordProvider passwordProvider = new PasswordProvider() { // from class: org.apache.tika.parser.microsoft.ooxml.OOXMLParserTest.2
            public String getPassword(Metadata metadata) {
                return "tika";
            }
        };
        ParseContext parseContext = new ParseContext();
        parseContext.set(PasswordProvider.class, passwordProvider);
        for (Map.Entry entry : hashMap.entrySet()) {
            assertContains((String) entry.getValue(), getXML((String) entry.getKey(), parseContext).xml);
        }
        new ParseContext();
        Iterator it = hashMap.entrySet().iterator();
        while (it.hasNext()) {
            boolean z = false;
            try {
                getXML((String) ((Map.Entry) it.next()).getKey());
            } catch (EncryptedDocumentException e) {
                z = true;
            }
            Assertions.assertTrue(z);
        }
    }

    @Test
    public void testDOCXParagraphNumbering() throws Exception {
        String str = getXML("testWORD_numbered_list.docx").xml;
        assertContains("1) This", str);
        assertContains("a) Is", str);
        assertContains("i) A multi", str);
        assertContains("ii) Level", str);
        assertContains("1. Within cell 1", str);
        assertContains("b. Cell b", str);
        assertContains("iii) List", str);
        assertContains("2) foo", str);
        assertContains("ii) baz", str);
        assertContains("ii) foo", str);
        assertContains("II. bar", str);
        assertContains("6. six", str);
        assertContains("7. seven", str);
        assertContains("a. seven a", str);
        assertContains("e. seven e", str);
        assertContains("2. A ii 2", str);
        assertContains("3. page break list 3", str);
        assertContains("Some-1-CrazyFormat Greek numbering with crazy format - alpha", str);
        assertContains("1.1.1. 1.1.1", str);
        assertContains("1.1. 1.2-&gt;1.1  //set the value", str);
    }

    @Test
    public void testDOCXOverrideParagraphNumbering() throws Exception {
        String str = getXML("testWORD_override_list_numbering.docx").xml;
        assertContains("<p>1.1.1.1...1 1.1.1.1...1</p>", str);
        assertContains("1st.2.3someText 1st.2.3someText", str);
        assertContains("1st.2.2someOtherText.1 1st.2.2someOtherText.1", str);
        assertContains("5th 5th", str);
        assertContains("1.a.I 1.a.I", str);
        assertContains("<p>1.b.III 1.b.III</p>", str);
        assertContains("2.a.I 2.a.I", str);
        assertContains("<p>2.b 2.b</p>", str);
        assertContains("(1)) (1))", str);
        assertContains("2.17 2.17", str);
        assertContains("2.18.2.1 2.18.2.1", str);
        assertContains("<p>2 2</p>", str);
        assertContains("<p>1 1</p>", str);
        assertContains("<p>A A</p>", str);
        assertContains("<p>B B</p>", str);
        assertContains("<p>C C</p>", str);
        assertContains("<p>4 4</p>", str);
        assertContains(">00 00", str);
        assertContains(">01 01", str);
        assertContains(">01. 01.", str);
        assertContains(">01..1 01..1", str);
        assertContains(">02 02", str);
    }

    @Test
    public void testExcelHeaderAndFooterExtraction() throws Exception {
        TikaTest.XMLResult xml = getXML("testEXCEL_headers_footers.xlsx");
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", xml.metadata.get("Content-Type"));
        Assertions.assertEquals("Internal spreadsheet", xml.metadata.get(TikaCoreProperties.TITLE));
        Assertions.assertEquals("Aeham Abushwashi", xml.metadata.get(TikaCoreProperties.CREATOR));
        String str = xml.xml;
        assertContains("John Smith1", str);
        assertContains("John Smith50", str);
        assertContains("1 Corporate HQ", str);
        assertContains("Header - Corporate Spreadsheet", str);
        assertContains("Header - For Internal Use Only", str);
        assertContains("Header - Author: John Smith", str);
        assertContains("Footer - Corporate Spreadsheet", str);
        assertContains("Footer - For Internal Use Only", str);
        assertContains("Footer - Author: John Smith", str);
    }

    @Test
    public void testExcelHeaderAndFooterNotExtraction() throws Exception {
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeHeadersAndFooters(false);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        String str = getXML("testEXCEL_headers_footers.xlsx", parseContext).xml;
        assertNotContained("Header - Corporate Spreadsheet", str);
        assertNotContained("Header - For Internal Use Only", str);
        assertNotContained("Header - Author: John Smith", str);
        assertNotContained("Footer - Corporate Spreadsheet", str);
        assertNotContained("Footer - For Internal Use Only", str);
        assertNotContained("Footer - Author: John Smith", str);
        InputStream resourceAsStream = OfficeParserTest.class.getResourceAsStream("tika-config-headers-footers.xml");
        try {
            AutoDetectParser autoDetectParser = new AutoDetectParser(new TikaConfig(resourceAsStream));
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
            String str2 = getXML("testEXCEL_headers_footers.xlsx", autoDetectParser).xml;
            assertContains("John Smith1", str2);
            assertContains("John Smith50", str2);
            assertContains("1 Corporate HQ", str2);
            assertNotContained("Header - Corporate Spreadsheet", str2);
            assertNotContained("Header - For Internal Use Only", str2);
            assertNotContained("Header - Author: John Smith", str2);
            assertNotContained("Footer - Corporate Spreadsheet", str2);
            assertNotContained("Footer - For Internal Use Only", str2);
            assertNotContained("Footer - Author: John Smith", str2);
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testMultiAuthorsManagers() throws Exception {
        TikaTest.XMLResult xml = getXML("testWORD_multi_authors.docx");
        String[] values = xml.metadata.getValues(TikaCoreProperties.CREATOR);
        Assertions.assertEquals(3, values.length);
        Assertions.assertEquals("author2", values[1]);
        String[] values2 = xml.metadata.getValues(OfficeOpenXMLExtended.MANAGER);
        Assertions.assertEquals(2, values2.length);
        Assertions.assertEquals("manager1", values2[0]);
        Assertions.assertEquals("manager2", values2[1]);
    }

    @Test
    public void testHyperlinksInXLSX() throws Exception {
        String str = getXML("testEXCEL_hyperlinks.xlsx").xml;
        assertContains("<a href=\"http://tika.apache.org/\">", str);
        assertContains("<a href=\"mailto:user@tika.apache.org?subject=help\">", str);
        assertContains("<a href=\"linked_file.txt.htm\">", str);
        assertContains("<a href=\"http://tika.apache.org/1.12/gettingstarted.html\">", str);
    }

    @Test
    public void testOrigSourcePath() throws Exception {
        Metadata metadata = (Metadata) getRecursiveMetadata("test_recursive_embedded.docx").get(2);
        assertContains("C:\\Users\\tallison\\AppData\\Local\\Temp\\embed1.zip", Arrays.asList(metadata.getValues(TikaCoreProperties.ORIGINAL_RESOURCE_NAME)));
        assertContains("C:\\Users\\tallison\\Desktop\\tmp\\New folder (2)\\embed1.zip", Arrays.asList(metadata.getValues(TikaCoreProperties.ORIGINAL_RESOURCE_NAME)));
    }

    @Test
    public void testBigIntegersWGeneralFormat() throws Exception {
        String str = getXML("testEXCEL_big_numbers.xlsx").xml;
        assertContains("123456789012345", str);
        assertContains("123456789012346", str);
        DecimalFormatSymbols decimalFormatSymbols = new DecimalFormatSymbols(LocaleUtil.getUserLocale());
        assertContains("1" + decimalFormatSymbols.getDecimalSeparator() + "23456789012345E+15</td>\t<td>1" + decimalFormatSymbols.getDecimalSeparator() + "23456789012345E+15", str);
    }

    @Test
    public void testBigIntegersWGeneralFormatWLocaleIT() throws Exception {
        LocaleUtil.setUserLocale(Locale.ITALIAN);
        try {
            String str = getXML("testEXCEL_big_numbers.xlsx").xml;
            assertContains("123456789012345", str);
            assertContains("123456789012346", str);
            DecimalFormatSymbols decimalFormatSymbols = new DecimalFormatSymbols(LocaleUtil.getUserLocale());
            assertContains("1" + decimalFormatSymbols.getDecimalSeparator() + "23456789012345E+15</td>\t<td>1" + decimalFormatSymbols.getDecimalSeparator() + "23456789012345E+15", str);
            LocaleUtil.setUserLocale(USER_LOCALE);
        } catch (Throwable th) {
            LocaleUtil.setUserLocale(USER_LOCALE);
            throw th;
        }
    }

    @Test
    public void testBoldHyperlink() throws Exception {
        String replaceAll = getXML("testWORD_boldHyperlink.docx").xml.replaceAll("\\s+", " ");
        assertContains("<a href=\"http://tika.apache.org/\">hyper <b>link</b></a>", replaceAll);
        assertContains("<a href=\"http://tika.apache.org/\"><b>hyper</b> link</a>; bold", replaceAll);
    }

    @Test
    public void testLongForIntExceptionInSummaryDetails() throws Exception {
        assertContains("bold", getXML("testWORD_totalTimeOutOfRange.docx").xml);
    }

    @Test
    public void testMacrosInDocm() throws Exception {
        Iterator it = getRecursiveMetadata("testWORD_macros.docm").iterator();
        while (it.hasNext()) {
            if (((Metadata) it.next()).get("Content-Type").equals("text/x-vbasic")) {
                Assertions.fail("Shouldn't have extracted macros as default");
            }
        }
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setExtractMacros(true);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        Metadata metadata = new Metadata();
        metadata.add(TikaCoreProperties.TIKA_CONTENT.getName(), "Sub Embolden()");
        metadata.add(TikaCoreProperties.TIKA_CONTENT.getName(), "Sub Italicize()");
        metadata.add("Content-Type", "text/x-vbasic");
        metadata.add(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
        assertContainsAtLeast(metadata, getRecursiveMetadata("testWORD_macros.docm", parseContext));
        InputStream resourceAsStream = getResourceAsStream("tika-config-dom-macros.xml");
        try {
            assertContainsAtLeast(metadata, getRecursiveMetadata("testWORD_macros.docm", new AutoDetectParser(new TikaConfig(resourceAsStream))));
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testMacrosInPptm() throws Exception {
        Iterator it = getRecursiveMetadata("testPPT_macros.pptm").iterator();
        while (it.hasNext()) {
            if (((Metadata) it.next()).get("Content-Type").equals("text/x-vbasic")) {
                Assertions.fail("Shouldn't have extracted macros as default");
            }
        }
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setExtractMacros(true);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        Metadata metadata = new Metadata();
        metadata.add(TikaCoreProperties.TIKA_CONTENT.getName(), "Sub Embolden()");
        metadata.add(TikaCoreProperties.TIKA_CONTENT.getName(), "Sub Italicize()");
        metadata.add("Content-Type", "text/x-vbasic");
        metadata.add(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
        assertContainsAtLeast(metadata, getRecursiveMetadata("testPPT_macros.pptm", parseContext));
        InputStream resourceAsStream = getResourceAsStream("tika-config-dom-macros.xml");
        try {
            assertContainsAtLeast(metadata, getRecursiveMetadata("testPPT_macros.pptm", new AutoDetectParser(new TikaConfig(resourceAsStream))));
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testMacroinXlsm() throws Exception {
        Iterator it = getRecursiveMetadata("testEXCEL_macro.xlsm").iterator();
        while (it.hasNext()) {
            if (((Metadata) it.next()).get("Content-Type").equals("text/x-vbasic")) {
                Assertions.fail("Shouldn't have extracted macros as default");
            }
        }
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setExtractMacros(true);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        Metadata metadata = new Metadata();
        metadata.add(TikaCoreProperties.TIKA_CONTENT.getName(), "Sub Dirty()");
        metadata.add(TikaCoreProperties.TIKA_CONTENT.getName(), "dirty dirt dirt");
        metadata.add("Content-Type", "text/x-vbasic");
        metadata.add(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.MACRO.toString());
        assertContainsAtLeast(metadata, getRecursiveMetadata("testEXCEL_macro.xlsm", parseContext));
        InputStream resourceAsStream = getResourceAsStream("tika-config-dom-macros.xml");
        try {
            assertContainsAtLeast(metadata, getRecursiveMetadata("testEXCEL_macro.xlsm", new AutoDetectParser(new TikaConfig(resourceAsStream))));
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    public void testBatch() throws Exception {
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setUseSAXDocxExtractor(true);
        long currentTimeMillis = System.currentTimeMillis();
        int i = 0;
        loop0: for (int i2 = 0; i2 < 100; i2++) {
            for (File file : getResourceAsFile("/test-documents").listFiles()) {
                if (file.getName().endsWith(".docx")) {
                    try {
                        TikaInputStream tikaInputStream = TikaInputStream.get(file);
                        try {
                            ParseContext parseContext = new ParseContext();
                            parseContext.set(OfficeParserConfig.class, officeParserConfig);
                            parseContext.set(Parser.class, new EmptyParser());
                            getXML(tikaInputStream, AUTO_DETECT_PARSER, new Metadata(), parseContext);
                            if (tikaInputStream != null) {
                                tikaInputStream.close();
                            }
                        } catch (Throwable th) {
                            if (tikaInputStream != null) {
                                try {
                                    tikaInputStream.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            }
                            throw th;
                            break loop0;
                        }
                    } catch (Exception e) {
                        i++;
                    }
                }
            }
        }
        System.out.println("elapsed: " + (System.currentTimeMillis() - currentTimeMillis) + " with " + i + " exceptions");
    }

    @Test
    public void testInitializationViaConfig() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("/org/apache/tika/parser/microsoft/tika-config-sax-docx.xml");
        try {
            Assertions.assertNotNull(resourceAsStream);
            assertContains("engaging title", getXML("testWORD_2006ml.docx", new AutoDetectParser(new TikaConfig(resourceAsStream)), new Metadata()).xml);
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testExcelXLSB() throws Exception {
        DefaultDetector defaultDetector = new DefaultDetector();
        Metadata metadata = new Metadata();
        metadata.add("resourceName", "excel.xlsb");
        InputStream resourceAsStream = getResourceAsStream("/test-documents/testEXCEL.xlsb");
        try {
            MediaType detect = defaultDetector.detect(resourceAsStream, metadata);
            Assertions.assertEquals("application/vnd.ms-excel.sheet.binary.macroenabled.12", detect.toString());
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
            Assertions.assertEquals(false, Boolean.valueOf(new OfficeParser().getSupportedTypes(new ParseContext()).contains(detect)));
            Assertions.assertTrue(new OOXMLParser().getSupportedTypes(new ParseContext()).contains(detect));
            ParseContext parseContext = new ParseContext();
            parseContext.set(Locale.class, Locale.US);
            assertContains("This is an example spreadsheet", getText("testEXCEL.xlsb", new Metadata(), parseContext));
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testXLSBVarious() throws Exception {
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setExtractMacros(true);
        ParseContext parseContext = new ParseContext();
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        List recursiveMetadata = getRecursiveMetadata("testEXCEL_various.xlsb", parseContext);
        Assertions.assertEquals(4, recursiveMetadata.size());
        String str = ((Metadata) recursiveMetadata.get(0)).get(TikaCoreProperties.TIKA_CONTENT);
        assertContains("<td>13</td>", str);
        assertContains("<td>13.1211231321</td>", str);
        assertContains("<td>$   3.03</td>", str);
        assertContains("<td>20%</td>", str);
        assertContains("<td>13.12</td>", str);
        assertContains("<td>123456789012345</td>", str);
        assertContains("<td>1.23456789012345E+15</td>", str);
        assertContains("test comment2", str);
        assertContains("comment4 (end of row)", str);
        assertContains("<td>1/4</td>", str);
        assertContains("<td>3/9/17</td>", str);
        assertContains("<td>4</td>", str);
        assertContains("<td>2</td>", str);
        assertContains("<td>   46/1963</td>", str);
        assertContains("<td>  3/128</td>", str);
        assertContains("test textbox", str);
        assertContains("test WordArt", str);
        assertContains("<a href=\"http://lucene.apache.org/\">http://lucene.apache.org/</a>", str);
        assertContains("<a href=\"http://tika.apache.org/\">http://tika.apache.org/</a>", str);
        assertContains("OddLeftHeader OddCenterHeader OddRightHeader", str);
        assertContains("EvenLeftHeader EvenCenterHeader EvenRightHeader", str);
        assertContains("FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader", str);
        assertContains("OddLeftFooter OddCenterFooter OddRightFooter", str);
        assertContains("EvenLeftFooter EvenCenterFooter EvenRightFooter", str);
        assertContains("FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter", str);
    }

    @Test
    public void testXLSBNoHeaderFooters() throws Exception {
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setIncludeHeadersAndFooters(false);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        String str = getXML("testEXCEL_various.xlsb", parseContext).xml;
        assertNotContained("OddLeftHeader OddCenterHeader OddRightHeader", str);
        assertNotContained("EvenLeftHeader EvenCenterHeader EvenRightHeader", str);
        assertNotContained("FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader", str);
        assertNotContained("OddLeftFooter OddCenterFooter OddRightFooter", str);
        assertNotContained("EvenLeftFooter EvenCenterFooter EvenRightFooter", str);
        assertNotContained("FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter", str);
    }

    @Test
    public void testPOI61034() throws Exception {
        Matcher matcher = Pattern.compile("<h1>(Sheet\\d+)</h1>").matcher(getXML("testEXCEL_poi-61034.xlsx").xml);
        HashSet hashSet = new HashSet();
        while (matcher.find()) {
            String group = matcher.group(1);
            if (hashSet.contains(group)) {
                Assertions.fail("Should only see each sheet once: " + group);
            }
            hashSet.add(group);
        }
    }

    @Test
    public void testXLSBOriginalPath() throws Exception {
        Assertions.assertEquals("C:\\Users\\tallison\\Desktop\\working\\TIKA-1945\\", getXML("testEXCEL_diagramData.xlsb").metadata.get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
    }

    @Test
    public void testXLSXOriginalPath() throws Exception {
        Assertions.assertEquals("C:\\Users\\tallison\\Desktop\\working\\TIKA-1945\\", getXML("testEXCEL_diagramData.xlsx").metadata.get(TikaCoreProperties.ORIGINAL_RESOURCE_NAME));
    }

    @Test
    public void testXLSBDiagramData() throws Exception {
        assertContains("SmartArt", getXML("testEXCEL_diagramData.xlsb").xml);
    }

    @Test
    public void testXLSXDiagramData() throws Exception {
        assertContains("SmartArt", getXML("testEXCEL_diagramData.xlsx").xml);
    }

    @Test
    public void testDOCXDiagramData() throws Exception {
        assertContains("From here", getXML("testWORD_diagramData.docx").xml);
    }

    @Test
    public void testPPTXDiagramData() throws Exception {
        assertContains("President", getXML("testPPT_diagramData.pptx").xml);
    }

    @Test
    public void testXLSXChartData() throws Exception {
        String str = getXML("testEXCEL_charts.xlsx").xml;
        assertContains("peach", str);
        assertContains("March\tApril", str);
        assertNotContained("chartSpace", str);
    }

    @Test
    public void testXLSBChartData() throws Exception {
        String str = getXML("testEXCEL_charts.xlsb").xml;
        assertContains("peach", str);
        assertContains("March\tApril", str);
        assertNotContained("chartSpace", str);
    }

    @Test
    public void testDOCXChartData() throws Exception {
        String str = getXML("testWORD_charts.docx").xml;
        assertContains("peach", str);
        assertContains("March\tApril", str);
        assertNotContained("chartSpace", str);
    }

    @Test
    public void testPPTXChartData() throws Exception {
        String str = getXML("testPPT_charts.pptx").xml;
        assertContains("peach", str);
        assertContains("March\tApril", str);
        assertNotContained("chartSpace", str);
    }

    @Test
    public void testPPTXGroups() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("testPPT_groups.pptx");
        Assertions.assertEquals(3, recursiveMetadata.size());
        String str = ((Metadata) recursiveMetadata.get(0)).get(TikaCoreProperties.TIKA_CONTENT);
        assertContains("WordArt1", str);
        assertContains("WordArt2", str);
        assertContainsCount("Ungrouped text box", str, 1);
        assertContains("Text box1", str);
        assertContains("Text box2", str);
        assertContains("Text box3", str);
        assertContains("Text box4", str);
        assertContains("Text box5", str);
        assertContains("href=\"http://tika.apache.org", str);
        assertContains("smart1", str);
        assertContains("MyTitle", str);
        Assertions.assertEquals("/image1.jpg", ((Metadata) recursiveMetadata.get(1)).get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH));
        Assertions.assertEquals("/thumbnail.jpeg", ((Metadata) recursiveMetadata.get(2)).get(TikaCoreProperties.EMBEDDED_RESOURCE_PATH));
    }

    @Test
    public void testXLSXPhoneticStrings() throws Exception {
        assertContains("日本オラクル ニホン", getXML("testEXCEL_phonetic.xlsx").xml);
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setConcatenatePhoneticRuns(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        assertNotContained("日本オラクル ニホン", getXML("testEXCEL_phonetic.xlsx", parseContext).xml);
        assertNotContained("日本オラクル ニホン", getXML("testEXCEL_phonetic.xlsx", new AutoDetectParser(new TikaConfig(OfficeParser.class.getResourceAsStream("tika-config-exclude-phonetic.xml")))).xml);
    }

    @Test
    public void testDOCXPhoneticStrings() throws Exception {
        assertContains("東京 (とうきょう)", getXML("testWORD_phonetic.docx").xml);
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setConcatenatePhoneticRuns(false);
        ParseContext parseContext = new ParseContext();
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        String str = getXML("testWORD_phonetic.docx", parseContext).xml;
        assertContains("東京", str);
        assertNotContained("と", str);
    }

    @Test
    public void testEmbeddedMedia() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("testPPT_embeddedMP3.pptx");
        Assertions.assertEquals(4, recursiveMetadata.size());
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.presentationml.presentation", ((Metadata) recursiveMetadata.get(0)).get("Content-Type"));
        Assertions.assertEquals("audio/mpeg", ((Metadata) recursiveMetadata.get(1)).get("Content-Type"));
        Assertions.assertEquals("image/png", ((Metadata) recursiveMetadata.get(2)).get("Content-Type"));
        Assertions.assertEquals("image/jpeg", ((Metadata) recursiveMetadata.get(3)).get("Content-Type"));
    }

    @Test
    public void testEmbeddedXLSInOLEObject() throws Exception {
        List recursiveMetadata = getRecursiveMetadata("testPPT_oleWorkbook.pptx");
        Assertions.assertEquals(4, recursiveMetadata.size());
        Metadata metadata = (Metadata) recursiveMetadata.get(2);
        assertContains("<h1>Sheet1</h1>", metadata.get(TikaCoreProperties.TIKA_CONTENT));
        assertContains("<td>1</td>", metadata.get(TikaCoreProperties.TIKA_CONTENT));
        Assertions.assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get("Content-Type"));
    }

    @Test
    public void testSigned() throws Exception {
        Assertions.assertEquals("true", getXML("testWORD_signed.docx").metadata.get(TikaCoreProperties.HAS_SIGNATURE));
        Assertions.assertEquals("true", getXML("testEXCEL_signed.xlsx").metadata.get(TikaCoreProperties.HAS_SIGNATURE));
        Assertions.assertEquals("true", getXML("testPPT_signed.pptx").metadata.get(TikaCoreProperties.HAS_SIGNATURE));
    }

    @Test
    public void testTruncatedSAXDocx() throws Exception {
        ParseContext parseContext = new ParseContext();
        OfficeParserConfig officeParserConfig = new OfficeParserConfig();
        officeParserConfig.setUseSAXDocxExtractor(true);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
        Assertions.assertThrows(TikaException.class, () -> {
            getRecursiveMetadata("testWORD_truncated.docx", parseContext);
        });
    }

    @Test
    public void testDateFormat() throws Exception {
        InputStream resourceAsStream = getResourceAsStream("tika-config-custom-date-override.xml");
        try {
            String str = getXML("testEXCEL_dateFormats.xlsx", new AutoDetectParser(new TikaConfig(resourceAsStream))).xml;
            assertContains("2018-09-20", str);
            assertContains("1996-08-10", str);
            if (resourceAsStream != null) {
                resourceAsStream.close();
            }
        } catch (Throwable th) {
            if (resourceAsStream != null) {
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    @Test
    public void testDocSecurity() throws Exception {
        Assertions.assertEquals("PasswordProtected", ((Metadata) getRecursiveMetadata("protectedFile.xlsx").get(0)).get(OfficeOpenXMLExtended.DOC_SECURITY_STRING));
        Assertions.assertEquals("ReadOnlyEnforced", ((Metadata) getRecursiveMetadata("testWORD_docSecurity.docx").get(0)).get(OfficeOpenXMLExtended.DOC_SECURITY_STRING));
    }

    @Test
    public void testMultiThreaded() throws Exception {
        ParseContext[] parseContextArr = new ParseContext[5];
        for (int i = 0; i < parseContextArr.length; i++) {
            parseContextArr[i] = new ParseContext();
        }
        HashSet hashSet = new HashSet();
        hashSet.add(".pptx");
        hashSet.add(".docx");
        hashSet.add(".xlsx");
        hashSet.add(".ppt");
        hashSet.add(".doc");
        hashSet.add(".xls");
        testMultiThreaded(new RecursiveParserWrapper(AUTO_DETECT_PARSER), parseContextArr, 5, 5, file -> {
            String lowerCase = file.getName().toLowerCase(Locale.ENGLISH);
            int lastIndexOf = lowerCase.lastIndexOf(".");
            return hashSet.contains(lastIndexOf > -1 ? lowerCase.substring(lastIndexOf) : "");
        });
    }
}
