package org.apache.tika.parser.html;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.apache.tika.metadata.Metadata;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/tika/parser/html/HtmlEncodingDetectorTest.class */
public class HtmlEncodingDetectorTest {
    @Test
    public void basic() throws IOException {
        assertWindows1252("<meta charset='WINDOWS-1252'>");
    }

    @Disabled("can we can prove this harms detection")
    @Test
    public void utf16() throws IOException {
        assertCharset("<meta charset='UTF-16BE'>", StandardCharsets.UTF_8);
    }

    @Test
    public void xUserDefined() throws IOException {
        assertWindows1252("<meta charset='x-user-defined'>");
    }

    @Test
    public void withSlash() throws IOException {
        assertWindows1252("<meta/charset='WINDOWS-1252'>");
    }

    @Disabled("until we do a full parse")
    @Test
    public void insideTag() throws IOException {
        assertWindows1252("<meta name='description'content='If I write charset=UTF-8 here, it doesnt mean the page is in UTF-8'/><meta charset='WINDOWS-1252'>");
    }

    @Disabled("until we do a full parse")
    @Test
    public void missingAttribute() throws IOException {
        assertWindows1252("<meta content='charset=UTF-8'><meta charset='WINDOWS-1252'>");
    }

    @Disabled("until we do a full parse")
    @Test
    public void insideSpecialTag() throws IOException {
        for (byte b : "?!/".getBytes(StandardCharsets.US_ASCII)) {
            assertWindows1252("<" + ((char) b) + "<meta charset='UTF-8'><meta charset='WINDOWS-1252'>");
        }
    }

    @Disabled("until we can prove this harms detection")
    @Test
    public void spaceBeforeTag() throws IOException {
        assertWindows1252("< meta charset='UTF-8'><meta charset='WINDOWS-1252'>");
    }

    @Test
    public void invalidAttribute() throws IOException {
        assertWindows1252("<meta badcharset='UTF-8' charset='WINDOWS-1252'>");
    }

    @Disabled("until we can prove this harms detection")
    @Test
    public void unmatchedQuote() throws IOException {
        assertWindows1252("<meta http-equiv='content-type' content='charset=\"UTF-8'><meta charset='WINDOWS-1252'>");
    }

    @Disabled("until we do a full parse")
    @Test
    public void withCompactComment() throws IOException {
        assertWindows1252("<!--<meta charset='UTF-8'>--><!--><meta charset='WINDOWS-1252'>");
    }

    private void assertWindows1252(String str) throws IOException {
        assertCharset(str, Charset.forName("WINDOWS-1252"));
    }

    private void assertCharset(String str, Charset charset) throws IOException {
        Assertions.assertEquals(charset, detectCharset(str), str + " should be detected as " + charset);
    }

    private Charset detectCharset(String str) throws IOException {
        Metadata metadata = new Metadata();
        return new HtmlEncodingDetector().detect(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)), metadata);
    }
}
