/*
 * Decompiled with CFR 0.152.
 */
package com.qwazr.utils;

import com.qwazr.utils.DomUtils;
import com.qwazr.utils.StringUtils;
import java.util.Arrays;
import java.util.HashSet;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.commons.lang3.text.WordUtils;
import org.w3c.dom.Node;

public class HtmlUtils {
    private static final Pattern removeTagPattern = Pattern.compile("<[^>]*>");
    private static final Pattern removeBrPattern1 = Pattern.compile("\\.\\p{Space}+<br\\p{Space}*/?>", 2);
    private static final Pattern removeEndTagBlockPattern1 = Pattern.compile("\\.\\p{Space}+</(p|td|div|h1|h2|h3|h4|h5|h6|hr|li|option|pre|select|table|tbody|td|textarea|tfoot|thead|th|title|tr|ul)>", 2);
    private static final Pattern removeEndTagBlockPattern2 = Pattern.compile("</(p|td|div|h1|h2|h3|h4|h5|h6|hr|li|option|pre|select|table|tbody|td|textarea|tfoot|thead|th|title|tr|ul)>", 2);
    private static final Pattern removeBrPattern2 = Pattern.compile("<br\\p{Space}*/?>", 2);
    private static final Pattern removeScriptObjectStylePattern = Pattern.compile("<(script|object|style)[^>]*>[^<]*</(script|object|style)>", 2);
    private static final HashSet<String> sentenceTagSet = new HashSet<String>(Arrays.asList("p", "td", "div", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "li", "option", "pre", "select", "table", "tbody", "td", "textarea", "tfoot", "thead", "th", "title", "tr", "ul"));
    private static final HashSet<String> excludedTagSet = new HashSet<String>(Arrays.asList("script", "style", "object"));
    public static final int DEFAULT_MAX_RECURSION = 256;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static final String removeTag(String text) {
        if (StringUtils.isEmpty(text)) {
            return text;
        }
        text = StringUtils.replaceConsecutiveSpaces(text, " ");
        Pattern pattern = removeScriptObjectStylePattern;
        synchronized (pattern) {
            text = removeScriptObjectStylePattern.matcher(text).replaceAll("");
        }
        pattern = removeBrPattern1;
        synchronized (pattern) {
            text = removeBrPattern1.matcher(text).replaceAll("</p>");
        }
        pattern = removeEndTagBlockPattern1;
        synchronized (pattern) {
            text = removeEndTagBlockPattern1.matcher(text).replaceAll("</p>");
        }
        pattern = removeEndTagBlockPattern2;
        synchronized (pattern) {
            text = removeEndTagBlockPattern2.matcher(text).replaceAll(". ");
        }
        pattern = removeBrPattern2;
        synchronized (pattern) {
            text = removeBrPattern2.matcher(text).replaceAll(". ");
        }
        pattern = removeTagPattern;
        synchronized (pattern) {
            text = removeTagPattern.matcher(text).replaceAll("");
        }
        text = StringUtils.replaceConsecutiveSpaces(text, " ");
        return text;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public static final String removeTag(String text, String[] allowedTags) {
        Matcher matcher;
        if (allowedTags == null) {
            text = StringUtils.replaceConsecutiveSpaces(text, " ");
        }
        StringBuffer sb = new StringBuffer();
        Pattern pattern = removeTagPattern;
        synchronized (pattern) {
            matcher = removeTagPattern.matcher(text);
        }
        while (matcher.find()) {
            boolean allowed = false;
            String group = matcher.group();
            if (allowedTags != null) {
                for (String tag : allowedTags) {
                    if (!tag.equals(group)) continue;
                    allowed = true;
                    break;
                }
            }
            matcher.appendReplacement(sb, allowed ? group : "");
        }
        matcher.appendTail(sb);
        return sb.toString();
    }

    public static final String htmlWrap(String text, int wrapLength) {
        if (StringUtils.isEmpty(text)) {
            return text;
        }
        if (text.length() < wrapLength) {
            return text;
        }
        text = StringUtils.replace(text, "&shy;", "");
        return WordUtils.wrap(text, wrapLength, "&shy;", true);
    }

    public static final String htmlWrapReduce(String text, int wrapLength, int maxSize) {
        String frag;
        if (StringUtils.isEmpty(text)) {
            return text;
        }
        if (text.length() < maxSize) {
            return text;
        }
        text = StringUtils.replace(text, "&shy;", "");
        text = WordUtils.wrap(text, wrapLength, "\u00ad", true);
        String[] frags = StringUtils.split(text, '\u00ad');
        StringBuilder sb = new StringBuilder();
        int l = frags[0].length();
        for (int i = frags.length - 1; i > 0 && (l += (frag = frags[i]).length()) < maxSize; --i) {
            sb.insert(0, frag);
        }
        sb.insert(0, '\u2026');
        sb.insert(0, frags[0]);
        return sb.toString();
    }

    private static void domTextExtractor(Node node, int recursion, StringBuffer buffer, Consumer<String> output) {
        String text;
        String nodeName;
        if (recursion == 0) {
            throw new IllegalStateException("Max recursion reached (getTextContent)");
        }
        short nodeType = node.getNodeType();
        if (nodeType == 8) {
            return;
        }
        String s = node.getNodeName();
        String string = nodeName = s == null ? null : s.toLowerCase();
        if (excludedTagSet.contains(nodeName)) {
            return;
        }
        if (nodeType == 3 && (text = node.getTextContent()) != null && !text.isEmpty()) {
            text = text.replace('\r', ' ').replace('\n', ' ');
            text = StringUtils.replaceConsecutiveSpaces(text, " ");
            if (!(text = text.trim()).isEmpty()) {
                text = StringEscapeUtils.unescapeHtml4(text);
                if (buffer.length() > 0) {
                    buffer.append(' ');
                }
                buffer.append(text);
            }
        }
        DomUtils.iterator(node.getChildNodes()).forEach(child -> HtmlUtils.domTextExtractor(child, recursion - 1, buffer, output));
        if (nodeName == null || buffer.length() == 0) {
            return;
        }
        if (sentenceTagSet.contains(nodeName)) {
            output.accept(buffer.toString());
            buffer.setLength(0);
        }
    }

    public static void domTextExtractor(Node node, int maxRecursion, Consumer<String> consumer) {
        StringBuffer buffer = new StringBuffer();
        HtmlUtils.domTextExtractor(node, maxRecursion, buffer, consumer);
        if (buffer.length() > 0) {
            consumer.accept(buffer.toString());
        }
    }

    public static void domTextExtractor(Node node, Consumer<String> consumer) {
        HtmlUtils.domTextExtractor(node, 256, consumer);
    }
}

