package com.google.appengine.api.search.dev;

import com.google.appengine.repackaged.com.google.common.collect.ImmutableSet;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.text.Normalizer;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;

/* loaded from: input_file:com/google/appengine/api/search/dev/WordSeparatorAnalyzer.class */
class WordSeparatorAnalyzer extends Analyzer {
    static final ImmutableSet<Character> WORD_SEPARATORS = ImmutableSet.of('!', '\"', '%', '(', ')', '*', new Character[]{',', '.', '/', ':', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~', '\t', '\n', '\f', '\r', ' ', '&', '#', '$', ';', '_'});

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/google/appengine/api/search/dev/WordSeparatorAnalyzer$WordSeparatorTokenizer.class */
    public class WordSeparatorTokenizer extends LetterTokenizer {
        public WordSeparatorTokenizer(Reader reader) {
            super(reader);
        }

        @Override // org.apache.lucene.analysis.CharTokenizer
        protected char normalize(char c) {
            return Character.toLowerCase(WordSeparatorAnalyzer.removeDiacriticals(Character.toString(c)).charAt(0));
        }

        @Override // org.apache.lucene.analysis.LetterTokenizer, org.apache.lucene.analysis.CharTokenizer
        protected boolean isTokenChar(char c) {
            return !WordSeparatorAnalyzer.WORD_SEPARATORS.contains(new Character(c));
        }
    }

    @Override // org.apache.lucene.analysis.Analyzer
    public TokenStream tokenStream(String str, Reader reader) {
        return new StandardFilter(new WordSeparatorTokenizer(reader));
    }

    public static List<String> tokenList(String str) {
        TokenStream tokenStream = new WordSeparatorAnalyzer().tokenStream("", new StringReader(str));
        TermAttribute termAttribute = (TermAttribute) tokenStream.addAttribute(TermAttribute.class);
        ArrayList arrayList = new ArrayList();
        while (tokenStream.incrementToken()) {
            try {
                arrayList.add(termAttribute.term());
            } catch (IOException e) {
                return new ArrayList();
            }
        }
        return arrayList;
    }

    public static String normalize(String str) {
        StringBuilder sb = new StringBuilder();
        List<String> list = tokenList(str);
        for (int i = 0; i < list.size(); i++) {
            sb.append(list.get(i));
            if (i != list.size() - 1) {
                sb.append(" ");
            }
        }
        return sb.toString();
    }

    public static String removeDiacriticals(String str) {
        return Normalizer.normalize(str, Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
    }
}
