package org.allenai.nlpstack.tokenize;

import java.util.regex.Pattern;
import org.allenai.nlpstack.core.Token;
import org.allenai.nlpstack.core.Tokenizer;
import org.allenai.nlpstack.core.Tokenizer$;
import scala.Predef$;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.immutable.List;
import scala.collection.immutable.List$;

/* compiled from: PennTokenizer.scala */
/* loaded from: input_file:org/allenai/nlpstack/tokenize/PennTokenizer$.class */
public final class PennTokenizer$ implements Tokenizer {
    public static final PennTokenizer$ MODULE$ = null;
    private final List<Tuple2<Pattern, String>> replacements;

    static {
        new PennTokenizer$();
    }

    public Seq<Token> apply(String str) {
        return Tokenizer.class.apply(this, str);
    }

    public List<Tuple2<Pattern, String>> replacements() {
        return this.replacements;
    }

    public Seq<Token> tokenize(String str) {
        return Tokenizer$.MODULE$.computeOffsets(Predef$.MODULE$.refArrayOps(((String) replacements().foldRight(str, new PennTokenizer$$anonfun$2())).trim().split("\\s+")), str);
    }

    private PennTokenizer$() {
        MODULE$ = this;
        Tokenizer.class.$init$(this);
        this.replacements = (List) List$.MODULE$.apply(Predef$.MODULE$.wrapRefArray(new Tuple2[]{new Tuple2("^\"", "`` "), new Tuple2("\\.\\.\\.", " ... "), new Tuple2("[,;:@#$%&]", " $0 "), new Tuple2("([^.]\\)\\([.])([])}>\"']*)[ \t]*$", "$1 $2$3 "), new Tuple2("[?!]", " $0 "), new Tuple2("[](){}<>]", " $0 "), new Tuple2("--", " $0 "), new Tuple2("$|^", " "), new Tuple2("\"", " '' "), new Tuple2(" ([^'])' ", " '$1 "), new Tuple2("'([sSmMdD]) ", " '$1 "), new Tuple2("'(ll|re|ve|LL|RE|VE) ", " '$1 "), new Tuple2("(n't|N'T) ", " $1 ")})).map(new PennTokenizer$$anonfun$1(), List$.MODULE$.canBuildFrom());
    }
}
