package ai.platon.pulsar.boilerpipe.utils;

import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:ai/platon/pulsar/boilerpipe/utils/PageCategory.class */
public enum PageCategory {
    INDEX,
    DETAIL,
    SEARCH,
    MEDIA,
    BBS,
    TIEBA,
    BLOG,
    UNKNOWN;

    public static Pattern[] INDEX_PAGE_URL_PATTERNS = {Pattern.compile(".+tieba.baidu.com/.+search.+"), Pattern.compile(".+(index|list|tags|chanel).+")};
    public static Pattern SEARCH_PAGE_URL_PATTERN = Pattern.compile(".+(search|query|select).+");
    public static Pattern[] DETAIL_PAGE_URL_PATTERNS = {Pattern.compile(".+tieba.baidu.com/p/(\\d+)"), Pattern.compile(".+(detail|item|article|book|good|product|thread|view|post|content|/20[012][0-9]/{0,1}[01][0-9]/|/20[012]-[0-9]{0,1}-[01][0-9]/|/\\d{2,}/\\d{5,}|\\d{7,}).+")};
    public static Pattern MEDIA_PAGE_URL_PATTERN = Pattern.compile(".+(pic|picture|photo|avatar|photoshow|video).+");
    public static final String[] MEDIA_URL_SUFFIXES = {"js", "css", "jpg", "png", "jpeg", "gif"};

    public boolean is(PageCategory pageCategory) {
        return pageCategory == this;
    }

    public boolean isIndex() {
        return this == INDEX;
    }

    public boolean isDetail() {
        return this == DETAIL;
    }

    public boolean isSearch() {
        return this == SEARCH;
    }

    public boolean isMedia() {
        return this == MEDIA;
    }

    public boolean isBBS() {
        return this == BBS;
    }

    public boolean isTieBa() {
        return this == TIEBA;
    }

    public boolean isBlog() {
        return this == BLOG;
    }

    public boolean isUnknown() {
        return this == UNKNOWN;
    }

    public static PageCategory sniff(String str, int i, int i2) {
        if (str.isEmpty()) {
            return UNKNOWN;
        }
        PageCategory sniff = sniff(str);
        if (sniff.isDetail()) {
            return sniff;
        }
        if (i >= 100) {
            return sniffByTextDensity(i, i2);
        }
        if (i2 > 30) {
            sniff = INDEX;
        }
        return sniff;
    }

    private static PageCategory sniffByTextDensity(double d, double d2) {
        PageCategory pageCategory = UNKNOWN;
        if (d2 < 1.0d) {
            d2 = 1.0d;
        }
        if (d2 > 60.0d && d / d2 < 20.0d) {
            pageCategory = INDEX;
        } else if (d / d2 > 30.0d) {
            pageCategory = DETAIL;
        }
        return pageCategory;
    }

    public static PageCategory sniff(String str) {
        PageCategory pageCategory = UNKNOWN;
        if (StringUtils.isEmpty(str)) {
            return pageCategory;
        }
        String lowerCase = str.toLowerCase();
        if (lowerCase.endsWith("/")) {
            pageCategory = INDEX;
        } else if (StringUtils.countMatches(lowerCase, "/") <= 3) {
            pageCategory = INDEX;
        } else if (Stream.of((Object[]) INDEX_PAGE_URL_PATTERNS).anyMatch(pattern -> {
            return pattern.matcher(lowerCase).matches();
        })) {
            pageCategory = INDEX;
        } else if (Stream.of((Object[]) DETAIL_PAGE_URL_PATTERNS).anyMatch(pattern2 -> {
            return pattern2.matcher(lowerCase).matches();
        })) {
            pageCategory = DETAIL;
        } else if (SEARCH_PAGE_URL_PATTERN.matcher(lowerCase).matches()) {
            pageCategory = SEARCH;
        } else if (MEDIA_PAGE_URL_PATTERN.matcher(lowerCase).matches()) {
            pageCategory = MEDIA;
        }
        return pageCategory;
    }
}
