package nlp4j.wiki;

import com.google.gson.Gson;
import com.google.gson.JsonObject;
import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.Normalizer;
import java.util.zip.GZIPInputStream;
import org.apache.commons.io.FileUtils;

/* loaded from: input_file:nlp4j/wiki/CirrussearchReader.class */
public class CirrussearchReader {
    public static void main(String[] strArr) {
        Path path = Paths.get("C:/usr/local/data/wiki/20220501/jawiki-20220502-cirrussearch-content.json.gz", new String[0]);
        Gson gson = new Gson();
        File file = new File("/usr/local/data/wiki/wiki_ibm_temp.txt");
        try {
            InputStream newInputStream = Files.newInputStream(path, new OpenOption[0]);
            try {
                GZIPInputStream gZIPInputStream = new GZIPInputStream(newInputStream);
                try {
                    InputStreamReader inputStreamReader = new InputStreamReader(gZIPInputStream, StandardCharsets.UTF_8);
                    try {
                        BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
                        int i = 0;
                        while (true) {
                            try {
                                String readLine = bufferedReader.readLine();
                                if (readLine == null) {
                                    break;
                                }
                                i++;
                                if (i % 2 != 1) {
                                    JsonObject jsonObject = (JsonObject) gson.fromJson(readLine, JsonObject.class);
                                    if (jsonObject.get("text") != null && !jsonObject.get("text").isJsonNull()) {
                                        String normalize = Normalizer.normalize(jsonObject.get("text").getAsString(), Normalizer.Form.NFKC);
                                        if (normalize.contains("IBM")) {
                                            System.err.print(".");
                                            FileUtils.write(file, normalize + "\n", "UTF-8", true);
                                        }
                                    }
                                }
                            } catch (Throwable th) {
                                try {
                                    bufferedReader.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                                throw th;
                            }
                        }
                        bufferedReader.close();
                        inputStreamReader.close();
                        gZIPInputStream.close();
                        if (newInputStream != null) {
                            newInputStream.close();
                        }
                    } catch (Throwable th3) {
                        try {
                            inputStreamReader.close();
                        } catch (Throwable th4) {
                            th3.addSuppressed(th4);
                        }
                        throw th3;
                    }
                } catch (Throwable th5) {
                    try {
                        gZIPInputStream.close();
                    } catch (Throwable th6) {
                        th5.addSuppressed(th6);
                    }
                    throw th5;
                }
            } finally {
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
