package ai.platon.pulsar.boilerpipe.utils;

import java.util.regex.Pattern;

/* loaded from: input_file:ai/platon/pulsar/boilerpipe/utils/UnicodeTokenizer.class */
public class UnicodeTokenizer {
    private static final Pattern PAT_WORD_BOUNDARY = Pattern.compile("\\b");
    private static final Pattern PAT_NOT_WORD_BOUNDARY = Pattern.compile("[\u2063]*([\\\"'\\.,\\!\\@\\-\\:\\;\\$\\?\\(\\)/])[\u2063]*");

    public static String[] tokenize(CharSequence charSequence) {
        return PAT_NOT_WORD_BOUNDARY.matcher(PAT_WORD_BOUNDARY.matcher(charSequence).replaceAll("\u2063")).replaceAll("$1").replaceAll("[ \u2063]+", " ").trim().split("[ ]+");
    }
}
