package org.opensextant.extractors.langid;

import com.cybozu.labs.langdetect.Detector;
import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import java.io.File;
import java.lang.Character;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.opensextant.ConfigException;
import org.opensextant.data.Language;
import org.opensextant.util.TextUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/opensextant/extractors/langid/LangDetect.class */
public class LangDetect {
    private final Logger log;
    private String profilePath;
    private int workingSize;
    public static final int DEFAULT_WORKING_SIZE = 180;
    public static final Language LANGUAGE_ID_GROUP_ENGLISH = new Language("eng", TextUtils.englishLang, "English");
    public static final Language LANGUAGE_ID_GROUP_CJK = new Language("cjk", "cjk", "Chinese/Japanese/Korean");
    public static final Language LANGUAGE_ID_GROUP_UNKNOWN = new Language("unk", "unk", "Unknown");
    static final Map<String, String> lookupLanguage = new HashMap();
    static final Map<String, Integer> ignoredLanguage = new HashMap();
    public static final int MIN_LENGTH_UNK_TEXT_THRESHOLD = 16;
    public static double MIN_LANG_DETECT_PROBABILITY;

    public LangDetect() throws ConfigException {
        this.log = LoggerFactory.getLogger(getClass());
        this.profilePath = null;
        this.workingSize = -1;
        initLangId();
    }

    public LangDetect(String str) throws ConfigException {
        this.log = LoggerFactory.getLogger(getClass());
        this.profilePath = null;
        this.workingSize = -1;
        this.profilePath = str;
        initLangId();
    }

    public LangDetect(int i) throws ConfigException {
        this.log = LoggerFactory.getLogger(getClass());
        this.profilePath = null;
        this.workingSize = -1;
        setWorkingSize(i);
        initLangId();
    }

    public LangDetect(int i, String str) throws ConfigException {
        this.log = LoggerFactory.getLogger(getClass());
        this.profilePath = null;
        this.workingSize = -1;
        this.profilePath = str;
        setWorkingSize(i);
        initLangId();
    }

    public void setWorkingSize(int i) {
        this.workingSize = i;
    }

    public void initLangId() throws ConfigException {
        File file;
        boolean z = this.workingSize > 0 && this.workingSize < 180;
        boolean z2 = this.profilePath == null;
        if (this.profilePath == null) {
            this.profilePath = z ? "/langdetect/profiles.sm" : "/langdetect/profiles";
        }
        try {
            if (z2) {
                URL resource = LangDetect.class.getResource(this.profilePath);
                if (resource == null) {
                    throw new ConfigException("Failed to load profiles -- folder not in CLASSPATH");
                }
                file = new File(resource.getPath());
            } else {
                file = new File(this.profilePath);
            }
            DetectorFactory.loadProfile(file);
            if (z) {
                DetectorFactory.setSeed(0L);
            }
        } catch (Exception e) {
            throw new ConfigException("Failed to load profiles", e);
        }
    }

    public String detect(String str) throws LangDetectException {
        Detector create = DetectorFactory.create();
        create.append(str);
        return create.detect();
    }

    public Map<String, LangID> detect(String str, boolean z) throws LangDetectException {
        Detector create = DetectorFactory.create();
        create.append(str);
        String detect = create.detect();
        HashMap hashMap = new HashMap();
        for (com.cybozu.labs.langdetect.Language language : create.getProbabilities()) {
            hashMap.put(language.lang, new LangID(language.lang, language.prob, detect.equals(language.lang)));
        }
        return hashMap;
    }

    public static List<LangID> sort(Map<String, LangID> map) {
        ArrayList arrayList = new ArrayList(map.values());
        Collections.sort(arrayList);
        Collections.reverse(arrayList);
        return arrayList;
    }

    public Language guessLanguage(String str) {
        if (str == null) {
            return null;
        }
        try {
            String detect = detect(str);
            if (TextUtils.getLanguage(detect) == null) {
                return new Language(detect, detect);
            }
        } catch (Exception e) {
        }
        return alternativeLangID(str);
    }

    public static Language alternativeLangID(String str) {
        return TextUtils.isASCII(str.getBytes()) ? LANGUAGE_ID_GROUP_ENGLISH : TextUtils.measureCJKText(str) > 0.1d ? LANGUAGE_ID_GROUP_CJK : LANGUAGE_ID_GROUP_UNKNOWN;
    }

    public static Map<String, LangID> alternativeCJKLangID(String str) {
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int length = str.length();
        int i4 = 0;
        for (int i5 = 0; i5 < length; i5++) {
            char charAt = str.charAt(i5);
            if (charAt > ' ') {
                i4++;
                if (charAt >= 254) {
                    Character.UnicodeBlock of = Character.UnicodeBlock.of(charAt);
                    if (TextUtils.isJapanese(of)) {
                        i2++;
                    } else if (TextUtils.isKorean(of)) {
                        i3++;
                    } else if (TextUtils.isChinese(of)) {
                        i++;
                    }
                }
            }
        }
        if (i == 0 && i3 == 0 && i2 == 0) {
            return null;
        }
        HashMap hashMap = new HashMap();
        int i6 = i + i2 + i3;
        if (i2 > 0) {
            hashMap.put(TextUtils.japaneseLang, new LangID(TextUtils.japaneseLang, cjkRatio(i4, i6, i2), false));
        }
        if (i3 > 0) {
            hashMap.put(TextUtils.koreanLang, new LangID(TextUtils.koreanLang, cjkRatio(i4, i6, i3), false));
        }
        if (i > 0) {
            hashMap.put(TextUtils.chineseLang, new LangID(TextUtils.chineseLang, cjkRatio(i4, i6, i), i2 == 0 && i3 == 0));
        }
        return hashMap;
    }

    private static double cjkRatio(int i, int i2, int i3) {
        return ((i3 / i2) + (i2 / i)) / 2.0d;
    }

    public Language detectSocialMediaLang(String str, String str2) {
        return detectSocialMediaLang(str, str2, false);
    }

    /* JADX WARN: Multi-variable type inference failed */
    /* JADX WARN: Type inference failed for: r0v106 */
    /* JADX WARN: Type inference failed for: r0v24 */
    /* JADX WARN: Type inference failed for: r0v96 */
    /* JADX WARN: Type inference failed for: r0v97 */
    public Language detectSocialMediaLang(String str, String str2, boolean z) {
        String str3;
        int length = str2.length();
        boolean isASCII = TextUtils.isASCII(str2);
        if (length < 16 && isASCII) {
            return LANGUAGE_ID_GROUP_ENGLISH;
        }
        if (str != null && length < 16 && isASCII) {
            this.log.debug("Insufficient content: {}\t{}", str, str2);
            return new Language(str, String.format("~%s", str));
        }
        String str4 = null;
        boolean z2 = z && TextUtils.hasCJKText(str2);
        boolean z3 = -1;
        try {
            Map<String, LangID> detect = detect(str2, true);
            if (detect != null && detect.size() > 0) {
                LangID langID = sort(detect).get(0);
                z3 = langID.probability > MIN_LANG_DETECT_PROBABILITY ? 1 : -1;
                str4 = langID.langid;
                if (z2 && !TextUtils.isCJK(str4)) {
                    str4 = null;
                    this.log.debug("Nullify langdetect result -- content has CJK chars, you are looking for CJK langid");
                }
            }
        } catch (Exception e) {
            this.log.debug("Failure in lang-id", e);
            str4 = null;
        }
        if (str4 == null) {
            if (isASCII) {
                return LANGUAGE_ID_GROUP_ENGLISH;
            }
            Map<String, LangID> alternativeCJKLangID = alternativeCJKLangID(str2);
            if (alternativeCJKLangID != null) {
                List<LangID> sort = sort(alternativeCJKLangID);
                if (sort.size() == 1) {
                    String str5 = sort.get(0).langid;
                    this.log.debug("One Lang: {}\t{}", str5, str2);
                    return TextUtils.getLanguage(str5);
                }
                String str6 = sort.get(0).langid;
                this.log.debug("Multiple Lang: {}\t{}", str6, str2);
                return TextUtils.getLanguage(str6);
            }
            str4 = alternativeLangID(str2).getCode();
        }
        if (str4 == null) {
            return LANGUAGE_ID_GROUP_UNKNOWN;
        }
        if (ignoredLanguage.containsKey(str4)) {
            boolean isUpperCaseDocument = TextUtils.isUpperCaseDocument(TextUtils.measureCase(str2));
            if (isUpperCaseDocument && isASCII) {
                return LANGUAGE_ID_GROUP_ENGLISH;
            }
            return (length >= (isUpperCaseDocument ? 200 : 120) || str == null) ? (str == null && isASCII) ? LANGUAGE_ID_GROUP_ENGLISH : TextUtils.getLanguage(str4) : isASCII ? LANGUAGE_ID_GROUP_ENGLISH : new Language(str, str);
        }
        this.log.debug("LangDetect Lang: {}\t{}", str4, str2);
        if (str4.length() > 2 && (str3 = lookupLanguage.get(str4)) != null) {
            str4 = str3;
        }
        return z3 < 0 ? new Language(str4, String.format("~%s", str4)) : TextUtils.getLanguage(str4);
    }

    static {
        lookupLanguage.put("en-gb", TextUtils.englishLang);
        lookupLanguage.put("zh-cn", TextUtils.chineseLang);
        lookupLanguage.put("zh-tw", TextUtils.chineseLang);
        ignoredLanguage.put("cjk", 0);
        ignoredLanguage.put("unk", 0);
        ignoredLanguage.put("tl", -1);
        ignoredLanguage.put(TextUtils.romanianLang, -1);
        ignoredLanguage.put("ca", -1);
        ignoredLanguage.put(TextUtils.italianLang, -1);
        ignoredLanguage.put(TextUtils.frenchLang, -1);
        ignoredLanguage.put(TextUtils.spanishLang, -1);
        ignoredLanguage.put(TextUtils.germanLang, -1);
        ignoredLanguage.put("sv", -1);
        ignoredLanguage.put("da", -1);
        ignoredLanguage.put("~en", -1);
        MIN_LANG_DETECT_PROBABILITY = 0.6d;
    }
}
