package org.unlaxer.jaddress.tokenizer;

import java.lang.Character;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/* loaded from: input_file:org/unlaxer/jaddress/tokenizer/StringTypeTokenizer.class */
public class StringTypeTokenizer {
    static Map<Character.UnicodeBlock, CharacterType> convertMap;

    public StringTypeTokenizer() {
        convertMap = new HashMap();
        convertMap.put(Character.UnicodeBlock.HIRAGANA, CharacterType.hiragana);
        convertMap.put(Character.UnicodeBlock.KATAKANA, CharacterType.katakana);
        convertMap.put(Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, CharacterType.kanji);
        convertMap.put(Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, CharacterType.symbol);
        convertMap.put(Character.UnicodeBlock.GENERAL_PUNCTUATION, CharacterType.symbol);
        convertMap.put(Character.UnicodeBlock.MATHEMATICAL_OPERATORS, CharacterType.symbol);
    }

    private CharacterType getCharacterType(char c) {
        Character.UnicodeBlock of = Character.UnicodeBlock.of(c);
        if (convertMap.containsKey(of)) {
            return convertMap.get(of);
        }
        String valueOf = String.valueOf(c);
        if (of == Character.UnicodeBlock.BASIC_LATIN) {
            if (valueOf.matches("[0-9]")) {
                return CharacterType.digit;
            }
            if (valueOf.matches("[a-zA-Z]")) {
                return CharacterType.alphabet;
            }
        } else if (of == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
            if (valueOf.matches("[０-９]")) {
                return CharacterType.digit;
            }
            if (valueOf.matches("[ａ-ｚＡ-Ｚ]")) {
                return CharacterType.alphabet;
            }
        }
        if (valueOf.matches("\\W")) {
            return CharacterType.symbol;
        }
        throw new UnsupportedOperationException(String.format("char[%s],UnicodeBlock[%s]", Character.valueOf(c), of));
    }

    public List<TokenWithCharacterKind> tokenize(String str) {
        ArrayList arrayList = new ArrayList();
        for (String str2 : str.split(" +|\u3000+")) {
            StringBuilder sb = new StringBuilder();
            CharacterType characterType = null;
            for (int i = 0; i < str2.length(); i++) {
                char charAt = str2.charAt(i);
                CharacterType characterType2 = getCharacterType(charAt);
                if (characterType != characterType2 && sb.length() > 0) {
                    arrayList.add(new TokenWithCharacterKind(sb.toString(), characterType));
                    sb.setLength(0);
                }
                sb.append(charAt);
                characterType = characterType2;
            }
            if (sb.length() > 0) {
                arrayList.add(new TokenWithCharacterKind(sb.toString(), characterType));
            }
        }
        return arrayList;
    }

    @Deprecated
    public List<TokenWithCharacterKind> tokenizeRaw(String str) {
        ArrayList arrayList = new ArrayList();
        for (String str2 : str.split(" +|\u3000+")) {
            StringBuilder sb = new StringBuilder();
            Character.UnicodeBlock unicodeBlock = null;
            for (int i = 0; i < str2.length(); i++) {
                char charAt = str2.charAt(i);
                Character.UnicodeBlock of = Character.UnicodeBlock.of(charAt);
                if (unicodeBlock != of && sb.length() > 0) {
                    arrayList.add(new TokenWithCharacterKind(sb.toString(), unicodeBlock));
                    sb.setLength(0);
                }
                sb.append(charAt);
                unicodeBlock = of;
            }
            if (sb.length() > 0) {
                arrayList.add(new TokenWithCharacterKind(sb.toString(), unicodeBlock));
            }
        }
        return arrayList;
    }
}
