package org.unlaxer.jaddress.util.normalize;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UncheckedIOException;
import java.nio.charset.Charset;
import java.util.Set;
import java.util.stream.Collectors;

import io.vavr.Lazy;

public class 常用漢字{
		
		static Lazy<Set<String>> kanjiSet = Lazy.of(()->{
			try(InputStream resourceAsStream = WordReplacer.class.getResourceAsStream("/jyoyo2010.tsv");
					BufferedReader reader = new BufferedReader(new InputStreamReader(resourceAsStream, Charset.forName("utf8")))){
				
				return reader.lines()
						.filter(x->false == x.startsWith("#"))
						.map(x->x.split("\t")[0])
						.collect(Collectors.toSet());
//				.forEach(x->{
//					String replace = ItaijiReplacer.replace(x);
//					if(false == x.equals(replace)) {
//						System.out.println(x + ":" + replace);
//					}
//				});
			} catch (IOException e) {
				throw new UncheckedIOException(e);
			}		
		});
		
		public static  Set<String> get(){
			return kanjiSet.get();
		}
	}