package org.cleartk.token.tokenizer;

import java.util.regex.Pattern;

/* loaded from: input_file:org/cleartk/token/tokenizer/Subtokenizer.class */
public class Subtokenizer extends Tokenizer_ImplBase {
    public static String subtokensRegex = "([a-zA-Z]+|[0-9]+|\\W)";
    public static Pattern subtokensPattern = Pattern.compile(subtokensRegex, 8);
    public static String multipleWhitespaceRegex = "(\\s+)";
    public static Pattern multipleWhitespacePattern = Pattern.compile(multipleWhitespaceRegex, 8);

    @Override // org.cleartk.token.tokenizer.Tokenizer_ImplBase
    public String[] getTokenTexts(String str) {
        return multipleWhitespacePattern.matcher(subtokensPattern.matcher(str).replaceAll(" $1 ")).replaceAll(" ").trim().split(" ");
    }
}
