package org.opensextant.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.lang.Character;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringEscapeUtils;
import org.joda.time.Instant;
import org.opensextant.annotations.AnnotationHelper;
import org.opensextant.data.Language;
import org.opensextant.extractors.xcoord.DMSOrdinate;
import org.supercsv.cellprocessor.Optional;
import org.supercsv.cellprocessor.constraint.NotNull;
import org.supercsv.cellprocessor.ift.CellProcessor;
import org.supercsv.io.CsvListReader;
import org.supercsv.prefs.CsvPreference;

/* loaded from: input_file:org/opensextant/util/TextUtils.class */
public class TextUtils {
    private static final String ALPHAMAP_PLAIN_ASCII = "AaEeIiOoUuAaEeIiOoUuYyAaEeIiOoUuYyAaOoNnAaEeIiOoUuYyAaCcOoUuOoEeEeEeEeGgGgGgGgAaEeIiOoUuAaBbMmNnDdRr";
    private static final String ALPHAMAP_UNICODE = "ÀàÈèÌìÒòÙùÁáÉéÍíÓóÚúÝýÂâÊêÎîÔôÛûŶŷÃãÕõÑñÄäËëÏïÖöÜüŸÿÅåÇçŐőŰűØøĔĕĖėĘęĚěĜĝĞğĠġĢģĀāĒēĪīŌōŪūḀḁḂḃṀṁṄṅḎḏṞṟ";
    private static final String COMMON_DIACRITC_HASHMARKS = "\"'`´‘’";
    private static final int ASCII_END = 127;
    public static final char NL = '\n';
    public static final char CR = '\r';
    public static final char SP = ' ';
    public static final char TAB = '\t';
    public static final char DEL = 127;
    public static final int CASE_LOWER = 1;
    public static final int CASE_UPPER = 2;
    public static final String arabicLang = "ar";
    public static final String bahasaLang = "id";
    public static final String chineseLang = "zh";
    public static final String chineseTradLang = "zt";
    public static final String englishLang = "en";
    public static final String farsiLang = "fa";
    public static final String frenchLang = "fr";
    public static final String germanLang = "de";
    public static final String italianLang = "it";
    public static final String japaneseLang = "ja";
    public static final String koreanLang = "ko";
    public static final String portugueseLang = "pt";
    public static final String russianLang = "ru";
    public static final String spanishLang = "es";
    public static final String turkishLang = "tr";
    public static final String thaiLang = "th";
    public static final String vietnameseLang = "vi";
    public static final String romanianLang = "ro";
    private static final int LATIN1_END = 254;
    private static final int ONEKB = 1024;
    private static final Pattern SCRUB_SYM;
    private static final Pattern SCRUB_SYM2;
    private static final Pattern SCRUB_EMOTICONS;
    private static final Pattern SCRUB_ALPHASUP;
    private static final Pattern SCRUB_TILES1;
    private static final Pattern SCRUB_TILES2;
    private static final Pattern SCRUB_SYM_MISC;
    private static final Pattern SCRUB_PLAYCARDS;
    public static final Pattern hashtagPattern1;
    public static final Pattern hashtagPattern2;
    static Pattern urlHTTPPattern;
    static final Pattern commonPunct = Pattern.compile("[!$%&#*+;:<>=?/{}|~^\"\\u201D\\u201C\\[\\]]");
    static final Pattern delws = Pattern.compile("\\s+");
    static final Pattern multi_eol = Pattern.compile("(\n[ \t\r]*){3,}");
    static final Pattern multi_eol2 = Pattern.compile("(\n\r?){2,}");
    private static final Pattern wsRedux = Pattern.compile("[-\\s`\"´‘’]");
    private static final Pattern tokenizer = Pattern.compile("\\s+");
    static final Pattern CLEAN_WORD_RIGHT = Pattern.compile("[^\\p{L}\\p{Nd}]+$");
    static final Pattern CLEAN_WORD_LEFT = Pattern.compile("^[^\\p{L}\\p{Nd}]+");
    static final Pattern CLEAN_WORD_PUNCT = Pattern.compile("[\"'.`\\u00B4\\u2018\\u2019]");
    private static final Map<String, Language> languageMapISO639 = new HashMap();

    public static boolean hasIrregularPunctuation(String str) {
        return commonPunct.matcher(str).find();
    }

    public static int countIrregularPunctuation(String str) {
        int i = 0;
        while (commonPunct.matcher(str).find()) {
            i++;
        }
        return i;
    }

    public static final boolean isLatin(String str) {
        Character.UnicodeBlock of;
        char[] charArray = str.toCharArray();
        boolean z = true;
        int length = charArray.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            char c = charArray[i];
            if (!isASCII(c) && Character.isLetter(c) && (of = Character.UnicodeBlock.of(c)) != Character.UnicodeBlock.LATIN_1_SUPPLEMENT && of != Character.UnicodeBlock.LATIN_EXTENDED_A && of != Character.UnicodeBlock.LATIN_EXTENDED_B && of != Character.UnicodeBlock.LATIN_EXTENDED_C && of != Character.UnicodeBlock.LATIN_EXTENDED_D && of != Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
                z = false;
                break;
            }
            i++;
        }
        return z;
    }

    public static final boolean hasDiacritics(String str) {
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (ALPHAMAP_UNICODE.indexOf(charAt) >= 0 || COMMON_DIACRITC_HASHMARKS.indexOf(charAt) >= 0) {
                return true;
            }
        }
        return false;
    }

    public static String phoneticReduction(String str) {
        return wsRedux.matcher(replaceDiacritics(str)).replaceAll("");
    }

    public static String phoneticReduction(String str, boolean z) {
        return z ? wsRedux.matcher(str).replaceAll("") : wsRedux.matcher(replaceDiacritics(str)).replaceAll("");
    }

    public static final String replaceDiacritics(String str) {
        return Unimap.replaceDiacritics(str);
    }

    @Deprecated
    public static String replaceDiacriticsOriginal(String str) {
        if (str == null) {
            return null;
        }
        if ("".equals(str)) {
            return str;
        }
        StringBuilder sb = new StringBuilder();
        int length = str.length();
        for (int i = 0; i < length; i++) {
            char charAt = str.charAt(i);
            int indexOf = ALPHAMAP_UNICODE.indexOf(charAt);
            if (indexOf > -1) {
                sb.append(ALPHAMAP_PLAIN_ASCII.charAt(indexOf));
            } else {
                sb.append(charAt);
            }
        }
        return sb.toString();
    }

    public static final boolean isASCII(char c) {
        return c > 0 && c <= 127;
    }

    public static final boolean isASCIILetter(char c) {
        return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
    }

    public static boolean isASCII(byte[] bArr) {
        for (byte b : bArr) {
            if (b < 0 || b > Byte.MAX_VALUE) {
                return false;
            }
        }
        return true;
    }

    public static boolean isASCII(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (str.charAt(i) > 127) {
                return false;
            }
        }
        return true;
    }

    public static int countASCIIChars(byte[] bArr) {
        int i = 0;
        for (byte b : bArr) {
            if (b > 0 || b <= Byte.MAX_VALUE) {
                i++;
            }
        }
        return i;
    }

    public static String reduce_line_breaks(String str) {
        Matcher matcher = multi_eol.matcher(str);
        return matcher != null ? matcher.replaceAll("\n\n") : str;
    }

    public static String delete_whitespace(String str) {
        Matcher matcher = delws.matcher(str);
        return matcher != null ? matcher.replaceAll("") : str;
    }

    public static String squeeze_whitespace(String str) {
        Matcher matcher = delws.matcher(str);
        return matcher != null ? matcher.replaceAll(" ") : str;
    }

    public static String delete_eol(String str) {
        return str.replace('\n', ' ').replace('\r', ' ');
    }

    public static String delete_controls(String str) {
        if (str == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        for (char c : str.toCharArray()) {
            if ((c >= ' ' || c == '\t' || c == '\n') && c != 127) {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    public static boolean hasDigits(String str) {
        return countDigits(str) > 0;
    }

    public static int countDigits(String str) {
        return count_digits(str);
    }

    public static int count_digits(String str) {
        if (str == null) {
            return 0;
        }
        int i = 0;
        for (char c : str.toCharArray()) {
            if (Character.isDigit(c)) {
                i++;
            }
        }
        return i;
    }

    public static final boolean isNumeric(String str) {
        if (str == null) {
            return false;
        }
        char charAt = str.charAt(0);
        if (!Character.isDigit(charAt) && charAt != '.' && charAt != '-' && charAt != '+') {
            return false;
        }
        for (char c : str.toCharArray()) {
            if (c != '.' && c != ',' && c != '-' && c != '+' && c != 'e' && c != 'E' && c != ' ' && !Character.isDigit(c)) {
                return false;
            }
        }
        return true;
    }

    public static int count_ws(String str) {
        if (str == null) {
            return 0;
        }
        int i = 0;
        for (char c : str.toCharArray()) {
            if (Character.isWhitespace(c)) {
                i++;
            }
        }
        return i;
    }

    public static int countFormattingSpace(String str) {
        if (str == null) {
            return 0;
        }
        int i = 0;
        for (char c : str.toCharArray()) {
            if (c < ' ') {
                i++;
            }
        }
        return i;
    }

    public static boolean isUpper(String str) {
        return checkCase(str, 2);
    }

    public static boolean isLower(String str) {
        return checkCase(str, 1);
    }

    public static boolean checkCase(String str, int i) {
        if (str == null) {
            return false;
        }
        int i2 = 0;
        for (char c : str.toCharArray()) {
            if (Character.isLetter(c)) {
                if (i == 1) {
                    if (Character.isUpperCase(c)) {
                        return false;
                    }
                    if (Character.isLowerCase(c)) {
                        i2++;
                    }
                } else if (i != 2) {
                    continue;
                } else {
                    if (Character.isLowerCase(c)) {
                        return false;
                    }
                    if (Character.isUpperCase(c)) {
                        i2++;
                    }
                }
            }
        }
        return i2 > 0;
    }

    public static int[] measureCase(String str) {
        if (str == null) {
            return null;
        }
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        int[] iArr = new int[5];
        for (char c : str.toCharArray()) {
            if (Character.isLetter(c)) {
                i3++;
                if (Character.isUpperCase(c)) {
                    i++;
                } else if (Character.isLowerCase(c)) {
                    i2++;
                }
            } else if (Character.isWhitespace(c)) {
                i5++;
            } else {
                i4++;
            }
        }
        iArr[0] = i3;
        iArr[1] = i;
        iArr[2] = i2;
        iArr[3] = i4;
        iArr[4] = i5;
        return iArr;
    }

    public static boolean isUpperCaseDocument(int[] iArr) {
        int i = iArr[0];
        float f = iArr[1] / i;
        return i < 100 ? ((double) f) > 0.5d : i < 500 ? ((double) f) > 0.6d : ((double) f) > 0.75d;
    }

    public static boolean isLowerCaseDocument(int[] iArr) {
        int i = iArr[0];
        float f = iArr[2] / i;
        return i < 100 ? ((double) f) > 0.97d : ((double) f) > 0.98d;
    }

    public static int[] get_text_window(int i, int i2, int i3, int i4) {
        int i5 = i - i4;
        int i6 = i - 1;
        int i7 = i + i2;
        int i8 = i7 + i4;
        if (i5 < 0) {
            i5 = 0;
        }
        if (i6 < i5) {
            i6 = i5;
        }
        if (i8 >= i3) {
            i8 = i3;
        }
        if (i7 > i8) {
            i7 = i8;
        }
        return new int[]{i5, i6, i7, i8};
    }

    public static int[] get_text_window(int i, int i2, int i3) {
        int i4 = i3 / 2;
        int i5 = i - i4;
        int i6 = i + i4;
        if (i5 < 0) {
            i5 = 0;
        }
        if (i6 >= i2) {
            i6 = i2;
        }
        return new int[]{i5, i6};
    }

    public static String text_id(String str) throws NoSuchAlgorithmException, UnsupportedEncodingException {
        if (str == null) {
            return null;
        }
        MessageDigest messageDigest = MessageDigest.getInstance("SHA-1");
        messageDigest.update(str.getBytes(StandardCharsets.UTF_8));
        return b2hex(messageDigest.digest());
    }

    public static String b2hex(byte[] bArr) {
        return String.format("%032x", new BigInteger(1, bArr));
    }

    public static String md5_id(byte[] bArr) {
        StringBuilder sb = new StringBuilder(bArr.length * 2);
        for (byte b : bArr) {
            int i = b & 255;
            if (i < 16) {
                sb.append(GeonamesUtility.COUNTRY_ADM0_NORM);
            }
            sb.append(Integer.toHexString(i));
        }
        return sb.toString().toLowerCase();
    }

    public static List<String> string2list(String str, String str2) {
        if (str == null) {
            return null;
        }
        ArrayList arrayList = new ArrayList();
        for (String str3 : str.split(str2)) {
            String trim = str3.trim();
            if (!trim.isEmpty()) {
                arrayList.add(trim);
            }
        }
        return arrayList;
    }

    public static String fast_replace(String str, String str2, String str3) {
        StringBuilder sb = new StringBuilder();
        for (char c : str.toCharArray()) {
            if (str2.indexOf(c) >= 0) {
                sb.append(str3);
            } else {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    public static String removeAny(String str, String str2) {
        StringBuilder sb = new StringBuilder();
        for (char c : str.toCharArray()) {
            if (str2.indexOf(c) < 0) {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    public static String replaceAny(String str, String str2, String str3) {
        StringBuilder sb = new StringBuilder();
        for (char c : str.toCharArray()) {
            if (str2.indexOf(c) < 0) {
                sb.append(c);
            } else {
                sb.append(str3);
            }
        }
        return sb.toString();
    }

    public static String removeAnyLeft(String str, String str2) {
        boolean z = true;
        int i = 0;
        for (char c : str.toCharArray()) {
            if (!z || str2.indexOf(c) < 0) {
                z = false;
            } else {
                i++;
            }
        }
        return str.substring(i);
    }

    public static String normalizeTextEntity(String str) {
        if (StringUtils.isBlank(str)) {
            return "";
        }
        char[] charArray = str.toCharArray();
        int i = 0;
        int length = charArray.length - 1;
        while (i < length && !Character.isLetter(charArray[i]) && !Character.isDigit(charArray[i])) {
            i++;
        }
        if (i == length) {
            return null;
        }
        while (length > i && !Character.isLetter(charArray[length]) && !Character.isDigit(charArray[length])) {
            length--;
        }
        return (i == 0 && length == length) ? squeeze_whitespace(str) : length <= i ? "" : squeeze_whitespace(str.substring(i, length + 1));
    }

    public static String[] tokens(String str) {
        return tokenizer.split(str.trim());
    }

    public static final String[] tokensRight(String str) {
        if (str.length() == 0) {
            return null;
        }
        String[] split = multi_eol2.split(str);
        if (split.length == 0) {
            return null;
        }
        return tokens(split[split.length - 1]);
    }

    public static final String[] tokensLeft(String str) {
        if (str.length() == 0) {
            return null;
        }
        String[] split = multi_eol2.split(str);
        if (split.length == 0) {
            return null;
        }
        return tokens(split[0]);
    }

    public static String normalizeAbbreviation(String str) {
        return str.replace(".", "");
    }

    public static String removeDiacritics(String str) {
        String normalize = Normalizer.normalize(str, Normalizer.Form.NFD);
        StringBuilder sb = new StringBuilder();
        for (char c : normalize.toCharArray()) {
            if (Character.getType(c) != 6 && Character.getType(c) != 8 && Character.getType(c) != 7) {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    public static String normalizeUnicode(String str) {
        Normalizer.Form form = Normalizer.Form.NFD;
        return !Normalizer.isNormalized(str, form) ? Normalizer.normalize(str, form) : str;
    }

    public static String removePunctuation(String str) {
        return CLEAN_WORD_PUNCT.matcher(CLEAN_WORD_RIGHT.matcher(CLEAN_WORD_LEFT.matcher(str).replaceAll(" ")).replaceAll(" ")).replaceAll(" ").trim();
    }

    public static Map<String, Language> getLanguageMap() {
        return languageMapISO639;
    }

    public static void initLanguageData() {
        for (Locale locale : Locale.getAvailableLocales()) {
            addLanguage(new Language(locale.getISO3Language(), locale.getLanguage(), locale.getDisplayLanguage()));
        }
    }

    public static void initLOCLanguageData() throws IOException {
        CsvListReader csvListReader = new CsvListReader(new InputStreamReader(TextUtils.class.getResourceAsStream("/ISO-639-2_utf-8.txt"), StandardCharsets.UTF_8), new CsvPreference.Builder('\"', 124, "\n").build());
        CellProcessor[] cellProcessorArr = {new Optional(), new Optional(), new Optional(), new Optional(), new NotNull()};
        while (true) {
            List read = csvListReader.read(cellProcessorArr);
            if (read == null) {
                csvListReader.close();
                addLanguage(new Language("fra", frenchLang, "French"), true);
                addLanguage(new Language("zh-tw", chineseTradLang, "Chinese/Taiwain"), true);
                languageMapISO639.put("zho", new Language("zho", chineseLang, "Chinese"));
                languageMapISO639.put("zh-cn", new Language("chi", chineseLang, "Chinese"));
                languageMapISO639.put("farsi", new Language("per", farsiLang, "Farsi"));
                Language language = new Language("prs", "dr", "Dari");
                languageMapISO639.put("dari", language);
                languageMapISO639.put("prs", language);
                languageMapISO639.put("dr", language);
                languageMapISO639.put("fa-AF", language);
                Language language2 = new Language("eng", englishLang, "English");
                languageMapISO639.put("en-gb", language2);
                languageMapISO639.put("en-us", language2);
                languageMapISO639.put("en-au", language2);
                return;
            }
            String str = (String) read.get(3);
            if (!StringUtils.isBlank(str) && !"NAME".equals(str)) {
                List<String> string2list = string2list(str, AnnotationHelper.NUM_SEP);
                String str2 = (String) read.get(0);
                if (!str2.startsWith(FileUtility.COMMENT_CHAR)) {
                    addLanguage(new Language(str2, (String) read.get(2), string2list.get(0)));
                }
            }
        }
    }

    public static void addLanguage(Language language) {
        addLanguage(language, false);
    }

    public static void addLanguage(Language language, boolean z) {
        if (language == null) {
            return;
        }
        if (language.getCode() != null && (z || !languageMapISO639.containsKey(language.getCode()))) {
            languageMapISO639.put(language.getCode(), language);
        }
        if (language.getISO639_1_Code() != null && (z || !languageMapISO639.containsKey(language.getISO639_1_Code()))) {
            languageMapISO639.put(language.getISO639_1_Code(), language);
        }
        if (language.getNameCode() == null || languageMapISO639.containsKey(language.getNameCode())) {
            return;
        }
        languageMapISO639.put(language.getNameCode(), language);
    }

    public static String getLanguageName(String str) {
        Language language;
        if (str == null || (language = getLanguage(str)) == null) {
            return null;
        }
        return language.getName();
    }

    public static Language getLanguage(String str) {
        if (str == null) {
            return null;
        }
        String lowerCase = str.toLowerCase();
        Language language = languageMapISO639.get(lowerCase);
        if (language != null) {
            return language;
        }
        if (!lowerCase.contains("_")) {
            return null;
        }
        Language language2 = languageMapISO639.get(lowerCase.split("_")[0]);
        if (language2 != null) {
            return language2;
        }
        return null;
    }

    public static String getLanguageCode(String str) {
        Language language;
        if (str == null || (language = getLanguage(str)) == null) {
            return null;
        }
        return language.getCode();
    }

    private static boolean _isRomanceLanguage(String str) {
        return str.equals(spanishLang) || str.equals(portugueseLang) || str.equals(italianLang) || str.equals(frenchLang) || str.equals(romanianLang);
    }

    public static boolean isEuroLanguage(String str) {
        Language language = getLanguage(str);
        if (language == null) {
            return false;
        }
        String iSO639_1_Code = language.getISO639_1_Code();
        return _isRomanceLanguage(iSO639_1_Code) || iSO639_1_Code.equals(germanLang) || iSO639_1_Code.equals(englishLang);
    }

    public static boolean isRomanceLanguage(String str) {
        Language language = getLanguage(str);
        if (language == null) {
            return false;
        }
        return _isRomanceLanguage(language.getISO639_1_Code());
    }

    public static boolean isEnglish(String str) {
        Language language = getLanguage(str);
        if (language == null) {
            return false;
        }
        return language.getISO639_1_Code().equals(englishLang);
    }

    public static boolean isChinese(String str) {
        Language language = getLanguage(str);
        if (language == null) {
            return false;
        }
        String iSO639_1_Code = language.getISO639_1_Code();
        return iSO639_1_Code.equals(chineseLang) || iSO639_1_Code.equals(chineseTradLang);
    }

    public static boolean isCJK(String str) {
        Language language = getLanguage(str);
        if (language == null) {
            return false;
        }
        String iSO639_1_Code = language.getISO639_1_Code();
        if (StringUtils.isBlank(iSO639_1_Code)) {
            return false;
        }
        return iSO639_1_Code.equals(koreanLang) || iSO639_1_Code.equals(japaneseLang) || iSO639_1_Code.equals(chineseLang) || iSO639_1_Code.equals(chineseTradLang);
    }

    public static double measureCJKText(String str) {
        if (str == null) {
            return -1.0d;
        }
        return countCJKChars(str.toCharArray()) / str.length();
    }

    public static int countCJKChars(char[] cArr) {
        int i = 0;
        for (char c : cArr) {
            if (c >= LATIN1_END && isCJK(Character.UnicodeBlock.of(c))) {
                i++;
            }
        }
        return i;
    }

    public static boolean hasCJKText(String str) {
        if (str == null) {
            return false;
        }
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (charAt >= LATIN1_END && isCJK(Character.UnicodeBlock.of(charAt))) {
                return true;
            }
        }
        return false;
    }

    public static boolean isCJK(Character.UnicodeBlock unicodeBlock) {
        return isChinese(unicodeBlock) || isJapanese(unicodeBlock) || isKorean(unicodeBlock);
    }

    public static boolean isChinese(Character.UnicodeBlock unicodeBlock) {
        return unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B || unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C || unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D || unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS || unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || unicodeBlock == Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT || unicodeBlock == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || unicodeBlock == Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS || unicodeBlock == Character.UnicodeBlock.KANGXI_RADICALS || unicodeBlock == Character.UnicodeBlock.YI_SYLLABLES || unicodeBlock == Character.UnicodeBlock.YI_RADICALS || unicodeBlock == Character.UnicodeBlock.BOPOMOFO || unicodeBlock == Character.UnicodeBlock.BOPOMOFO_EXTENDED || unicodeBlock == Character.UnicodeBlock.KANBUN;
    }

    public static boolean isKorean(Character.UnicodeBlock unicodeBlock) {
        return unicodeBlock == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO || unicodeBlock == Character.UnicodeBlock.HANGUL_JAMO || unicodeBlock == Character.UnicodeBlock.HANGUL_SYLLABLES || unicodeBlock == Character.UnicodeBlock.HANGUL_JAMO_EXTENDED_A || unicodeBlock == Character.UnicodeBlock.HANGUL_JAMO_EXTENDED_B;
    }

    public static boolean isJapanese(Character.UnicodeBlock unicodeBlock) {
        return unicodeBlock == Character.UnicodeBlock.HIRAGANA || unicodeBlock == Character.UnicodeBlock.KATAKANA || unicodeBlock == Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS;
    }

    public static byte[] compress(String str) throws IOException {
        return compress(str, FileUtility.DEFAULT_ENCODING);
    }

    public static byte[] compress(String str, String str2) throws IOException {
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        GZIPOutputStream gZIPOutputStream = new GZIPOutputStream(byteArrayOutputStream);
        gZIPOutputStream.write(str.getBytes(str2));
        gZIPOutputStream.close();
        return byteArrayOutputStream.toByteArray();
    }

    public static String uncompress(byte[] bArr) throws IOException {
        return uncompress(bArr, FileUtility.DEFAULT_ENCODING);
    }

    public static String uncompress(byte[] bArr, String str) throws IOException {
        GZIPInputStream gZIPInputStream = new GZIPInputStream(new ByteArrayInputStream(bArr));
        try {
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            try {
                byte[] bArr2 = new byte[ONEKB];
                while (true) {
                    int read = gZIPInputStream.read(bArr2);
                    if (read <= 0) {
                        String byteArrayOutputStream2 = byteArrayOutputStream.toString(str);
                        byteArrayOutputStream.close();
                        gZIPInputStream.close();
                        return byteArrayOutputStream2;
                    }
                    byteArrayOutputStream.write(bArr2, 0, read);
                }
            } finally {
            }
        } catch (Throwable th) {
            try {
                gZIPInputStream.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }

    public static String removeEmoticons(String str) {
        return SCRUB_EMOTICONS.matcher(str).replaceAll("{icon}");
    }

    public static String removeSymbols(String str) {
        return SCRUB_PLAYCARDS.matcher(SCRUB_SYM_MISC.matcher(SCRUB_TILES2.matcher(SCRUB_TILES1.matcher(SCRUB_ALPHASUP.matcher(SCRUB_SYM2.matcher(SCRUB_SYM.matcher(str).replaceAll("{sym}")).replaceAll("{sym2}")).replaceAll("{asup}")).replaceAll("{tile1}")).replaceAll("{tile2}")).replaceAll("{sym}")).replaceAll("{card}");
    }

    public static int countNonText(String str) {
        int i = 0;
        for (char c : str.toCharArray()) {
            if (!Character.isLetter(c) && Character.isDigit(c) && Character.isWhitespace(c)) {
                i++;
            }
        }
        return i;
    }

    public static Set<String> parseHashTags(String str) {
        return parseHashTags(str, false);
    }

    public static Set<String> parseHashTags(String str, boolean z) {
        if (!str.contains(FileUtility.COMMENT_CHAR)) {
            return null;
        }
        HashSet hashSet = null;
        Matcher matcher = hashtagPattern1.matcher(str);
        while (matcher.find()) {
            String group = matcher.group();
            if (hashSet == null) {
                hashSet = new HashSet();
            }
            hashSet.add(z ? group.toLowerCase() : group);
        }
        Matcher matcher2 = hashtagPattern2.matcher(str);
        while (matcher2.find()) {
            String group2 = matcher2.group();
            if (hashSet == null) {
                hashSet = new HashSet();
            }
            hashSet.add(z ? group2.toLowerCase() : group2);
        }
        return hashSet;
    }

    public static String parseNaturalLanguage(String str) {
        return parseNaturalLanguage(str, true, true, true, true);
    }

    public static String parseNaturalLanguage(String str, boolean z, boolean z2, boolean z3, boolean z4) {
        String str2 = str;
        if (z2) {
            str2 = urlHTTPPattern.matcher(str2).replaceAll(" ");
        }
        if (z) {
            str2 = str2.replace("&amp;", "&").replace("&gt;", "}").replace("&lt;", "{");
            if (str2.contains("&#")) {
                str2 = StringEscapeUtils.unescapeHtml4(str2);
            }
            if (str2.contains("&")) {
                str2 = str2.replace("&", DMSOrdinate.POSITIVE);
            }
        }
        if (z3) {
            str2 = str2.replace(FileUtility.COMMENT_CHAR, " ");
        }
        if (z4) {
            str2 = str2.replace("@", " ");
        }
        return squeeze_whitespace(str2.replace("…", "..."));
    }

    public static final Date parseDate(String str) {
        if (str == null) {
            return null;
        }
        return Instant.parse(str).toDate();
    }

    static {
        try {
            initLOCLanguageData();
            SCRUB_SYM = Pattern.compile("\\p{block=Miscellaneous Symbols And Pictographs}+");
            SCRUB_SYM2 = Pattern.compile("\\p{block=Transport and Map Symbols}+");
            SCRUB_EMOTICONS = Pattern.compile("\\p{block=Emoticons}+");
            SCRUB_ALPHASUP = Pattern.compile("\\p{block=Enclosed Alphanumeric Supplement}+");
            SCRUB_TILES1 = Pattern.compile("\\p{block=Mahjong Tiles}+");
            SCRUB_TILES2 = Pattern.compile("\\p{block=Domino Tiles}+");
            SCRUB_SYM_MISC = Pattern.compile("\\p{block=Miscellaneous Symbols}+");
            SCRUB_PLAYCARDS = Pattern.compile("\\p{block=Playing Cards}+");
            hashtagPattern1 = Pattern.compile("(#\\[\\w[\\d\\s\\w]+\\])", 256);
            hashtagPattern2 = Pattern.compile("(#\\w[\\d\\w]+)", 256);
            urlHTTPPattern = Pattern.compile("https?://[!-\u007f]+", 2);
        } catch (Exception e) {
            throw new RuntimeException("Failed to load static resources", e);
        }
    }
}
