package com.qwazr.utils;

import java.util.Arrays;
import java.util.HashSet;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.commons.text.WordUtils;
import org.w3c.dom.Node;

/* loaded from: input_file:com/qwazr/utils/HtmlUtils.class */
public class HtmlUtils {
    private static final Pattern removeTagPattern = Pattern.compile("<[^>]*>");
    private static final Pattern removeBrPattern1 = Pattern.compile("\\.\\p{Space}+<br\\p{Space}*/?>", 2);
    private static final Pattern removeEndTagBlockPattern1 = Pattern.compile("\\.\\p{Space}+</(p|td|div|h1|h2|h3|h4|h5|h6|hr|li|option|pre|select|table|tbody|td|textarea|tfoot|thead|th|title|tr|ul)>", 2);
    private static final Pattern removeEndTagBlockPattern2 = Pattern.compile("</(p|td|div|h1|h2|h3|h4|h5|h6|hr|li|option|pre|select|table|tbody|td|textarea|tfoot|thead|th|title|tr|ul)>", 2);
    private static final Pattern removeBrPattern2 = Pattern.compile("<br\\p{Space}*/?>", 2);
    private static final Pattern removeScriptObjectStylePattern = Pattern.compile("<(script|object|style)[^>]*>[^<]*</(script|object|style)>", 2);
    private static final HashSet<String> sentenceTagSet = new HashSet<>(Arrays.asList("p", "td", "div", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "li", "option", "pre", "select", "table", "tbody", "td", "textarea", "tfoot", "thead", "th", "title", "tr", "ul"));
    private static final HashSet<String> excludedTagSet = new HashSet<>(Arrays.asList("script", "style", "object"));
    public static final int DEFAULT_MAX_RECURSION = 256;

    public static final String removeTag(String str) {
        String replaceAll;
        String replaceAll2;
        String replaceAll3;
        String replaceAll4;
        String replaceAll5;
        String replaceAll6;
        if (StringUtils.isEmpty(str)) {
            return str;
        }
        String replaceConsecutiveSpaces = StringUtils.replaceConsecutiveSpaces(str, " ");
        synchronized (removeScriptObjectStylePattern) {
            replaceAll = removeScriptObjectStylePattern.matcher(replaceConsecutiveSpaces).replaceAll("");
        }
        synchronized (removeBrPattern1) {
            replaceAll2 = removeBrPattern1.matcher(replaceAll).replaceAll("</p>");
        }
        synchronized (removeEndTagBlockPattern1) {
            replaceAll3 = removeEndTagBlockPattern1.matcher(replaceAll2).replaceAll("</p>");
        }
        synchronized (removeEndTagBlockPattern2) {
            replaceAll4 = removeEndTagBlockPattern2.matcher(replaceAll3).replaceAll(". ");
        }
        synchronized (removeBrPattern2) {
            replaceAll5 = removeBrPattern2.matcher(replaceAll4).replaceAll(". ");
        }
        synchronized (removeTagPattern) {
            replaceAll6 = removeTagPattern.matcher(replaceAll5).replaceAll("");
        }
        return StringUtils.replaceConsecutiveSpaces(replaceAll6, " ");
    }

    public static final String removeTag(String str, String[] strArr) {
        Matcher matcher;
        if (strArr == null) {
            str = StringUtils.replaceConsecutiveSpaces(str, " ");
        }
        StringBuffer stringBuffer = new StringBuffer();
        synchronized (removeTagPattern) {
            matcher = removeTagPattern.matcher(str);
        }
        while (matcher.find()) {
            boolean z = false;
            String group = matcher.group();
            if (strArr != null) {
                int length = strArr.length;
                int i = 0;
                while (true) {
                    if (i >= length) {
                        break;
                    }
                    if (strArr[i].equals(group)) {
                        z = true;
                        break;
                    }
                    i++;
                }
            }
            matcher.appendReplacement(stringBuffer, z ? group : "");
        }
        matcher.appendTail(stringBuffer);
        return stringBuffer.toString();
    }

    public static final String htmlWrap(String str, int i) {
        if (!StringUtils.isEmpty(str) && str.length() >= i) {
            return WordUtils.wrap(StringUtils.replace(str, "&shy;", ""), i, "&shy;", true);
        }
        return str;
    }

    public static final String htmlWrapReduce(String str, int i, int i2) {
        if (!StringUtils.isEmpty(str) && str.length() >= i2) {
            String[] split = StringUtils.split(WordUtils.wrap(StringUtils.replace(str, "&shy;", ""), i, "\u00ad", true), (char) 173);
            StringBuilder sb = new StringBuilder();
            int length = split[0].length();
            for (int length2 = split.length - 1; length2 > 0; length2--) {
                String str2 = split[length2];
                length += str2.length();
                if (length >= i2) {
                    break;
                }
                sb.insert(0, str2);
            }
            sb.insert(0, (char) 8230);
            sb.insert(0, split[0]);
            return sb.toString();
        }
        return str;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static void domTextExtractor(Node node, int i, StringBuffer stringBuffer, Consumer<String> consumer) {
        String textContent;
        if (i == 0) {
            throw new IllegalStateException("Max recursion reached (getTextContent)");
        }
        short nodeType = node.getNodeType();
        if (nodeType == 8) {
            return;
        }
        String nodeName = node.getNodeName();
        String lowerCase = nodeName == null ? null : nodeName.toLowerCase();
        if (excludedTagSet.contains(lowerCase)) {
            return;
        }
        if (nodeType == 3 && (textContent = node.getTextContent()) != null && !textContent.isEmpty()) {
            String trim = StringUtils.replaceConsecutiveSpaces(textContent.replace('\r', ' ').replace('\n', ' '), " ").trim();
            if (!trim.isEmpty()) {
                String unescapeHtml4 = StringEscapeUtils.unescapeHtml4(trim);
                if (stringBuffer.length() > 0) {
                    stringBuffer.append(' ');
                }
                stringBuffer.append(unescapeHtml4);
            }
        }
        DomUtils.iterator(node.getChildNodes()).forEach(node2 -> {
            domTextExtractor(node2, i - 1, stringBuffer, consumer);
        });
        if (lowerCase == null || stringBuffer.length() == 0 || !sentenceTagSet.contains(lowerCase)) {
            return;
        }
        consumer.accept(stringBuffer.toString());
        stringBuffer.setLength(0);
    }

    public static void domTextExtractor(Node node, int i, Consumer<String> consumer) {
        StringBuffer stringBuffer = new StringBuffer();
        domTextExtractor(node, i, stringBuffer, consumer);
        if (stringBuffer.length() > 0) {
            consumer.accept(stringBuffer.toString());
        }
    }

    public static void domTextExtractor(Node node, Consumer<String> consumer) {
        domTextExtractor(node, DEFAULT_MAX_RECURSION, consumer);
    }
}
