package eu.monnetproject.tokenizer;

import eu.monnetproject.lang.Script;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/* loaded from: input_file:eu/monnetproject/tokenizer/FairlyGoodTokenizer.class */
public class FairlyGoodTokenizer implements Tokenizer {
    private static final Pattern pattern1 = Pattern.compile("(\\.\\.\\.+|[\\p{Po}\\p{Ps}\\p{Pe}\\p{Pi}\\p{Pf}–—―&&[^'\\.]]|(?<!(\\.|\\.\\p{L}))\\.(?=[\\p{Z}\\p{Pf}\\p{Pe}]|\\Z)|(?<!\\p{L})'(?!\\p{L}))");
    private static final Pattern pattern2 = Pattern.compile("\\p{C}|^\\p{Z}+|\\p{Z}+$");

    public static String[] split(String str) {
        return pattern2.matcher(pattern1.matcher(str).replaceAll(" $1 ")).replaceAll("").split("\\p{Z}+");
    }

    @Override // eu.monnetproject.tokenizer.Tokenizer
    public List<Token> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        for (String str2 : split(str)) {
            arrayList.add(new Token(str2));
        }
        return arrayList;
    }

    @Override // eu.monnetproject.tokenizer.Tokenizer
    public Script getScript() {
        return Script.LATIN;
    }
}
