package de.mrapp.textmining.util.tokenizer;

import de.mrapp.util.Condition;
import java.util.Arrays;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jetbrains.annotations.NotNull;

/* loaded from: input_file:de/mrapp/textmining/util/tokenizer/RegexTokenizer.class */
public class RegexTokenizer extends AbstractTokenizer<Substring> {
    private final Pattern pattern;

    public RegexTokenizer(@NotNull String str) {
        Condition.ensureNotNull(str, "The regular expression may not be null");
        Condition.ensureNotEmpty(str, "The regular expression may not be empty");
        this.pattern = Pattern.compile(str);
    }

    public RegexTokenizer(@NotNull Pattern pattern) {
        Condition.ensureNotNull(pattern, "The regular expression may not be null");
        this.pattern = pattern;
    }

    @NotNull
    public static RegexTokenizer splitByWhitespace() {
        return new RegexTokenizer("\\s+");
    }

    @NotNull
    public static RegexTokenizer splitByDelimiters(@NotNull String... strArr) {
        Condition.ensureNotNull(strArr, "The delimiters may not be null");
        return splitByDelimiters(Arrays.asList(strArr));
    }

    @NotNull
    public static RegexTokenizer splitByDelimiters(@NotNull Iterable<? extends CharSequence> iterable) {
        Condition.ensureTrue(iterable.iterator().hasNext(), "At least one delimiter must be given");
        StringBuilder sb = new StringBuilder();
        for (CharSequence charSequence : iterable) {
            Condition.ensureNotNull(charSequence, "Delimiters may not be null");
            Condition.ensureNotEmpty(charSequence, "Delimiters may not be empty");
            if (sb.length() > 0) {
                sb.append("|");
            }
            sb.append(charSequence);
        }
        return new RegexTokenizer(sb.toString());
    }

    @NotNull
    public final Pattern getPattern() {
        return this.pattern;
    }

    @Override // de.mrapp.textmining.util.tokenizer.AbstractTokenizer
    protected final void onTokenize(@NotNull String str, @NotNull Map<String, Substring> map) {
        int i = 0;
        Matcher matcher = this.pattern.matcher(str);
        while (matcher.find()) {
            addToken(map, str.substring(i, matcher.start()), i, (str2, i2) -> {
                return new Substring(str2, i2);
            });
            i = matcher.end();
        }
        addToken(map, str.substring(i, str.length()), i, (str3, i3) -> {
            return new Substring(str3, i3);
        });
    }
}
