package org.dstadler.commoncrawl.index;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.logging.Logger;
import org.apache.commons.lang3.StringUtils;
import org.dstadler.commoncrawl.Utils;
import org.dstadler.commons.http.HttpClientWrapper;
import org.dstadler.commons.logging.jdk.LoggerFactory;

/* loaded from: input_file:org/dstadler/commoncrawl/index/DownloadFromCommonCrawl.class */
public class DownloadFromCommonCrawl {
    private static final Logger log = LoggerFactory.make();

    public static void main(String[] strArr) throws Exception {
        LoggerFactory.initLogging();
        Utils.ensureDownloadDir();
        HttpClientWrapper httpClientWrapper = new HttpClientWrapper("", (String) null, 600000);
        Throwable th = null;
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(DownloadURLIndex.COMMON_CRAWL_FILE), 1048576);
            Throwable th2 = null;
            int i = 0;
            int i2 = 0;
            long j = 0;
            while (true) {
                try {
                    try {
                        String readLine = bufferedReader.readLine();
                        if (readLine == null) {
                            break;
                        }
                        log.info("Downloading line " + (i + 1) + ": " + String.format("%.4f", Double.valueOf((j / DownloadURLIndex.COMMON_CRAWL_FILE.length()) * 100.0d)) + "%, having " + i2 + " downloaded: " + StringUtils.abbreviate(readLine, 50));
                        CDXItem parse = CDXItem.parse(readLine);
                        if (Utils.downloadFileFromCommonCrawl(httpClientWrapper.getHttpClient(), parse.url, parse.getDocumentLocation(), true) != null) {
                            i2++;
                        }
                        j += readLine.length() + 1;
                        i++;
                    } catch (Throwable th3) {
                        th2 = th3;
                        throw th3;
                    }
                } catch (Throwable th4) {
                    if (bufferedReader != null) {
                        if (th2 != null) {
                            try {
                                bufferedReader.close();
                            } catch (Throwable th5) {
                                th2.addSuppressed(th5);
                            }
                        } else {
                            bufferedReader.close();
                        }
                    }
                    throw th4;
                }
            }
            log.info("End of file " + DownloadURLIndex.COMMON_CRAWL_FILE + " reached after " + i + " items");
            if (bufferedReader != null) {
                if (0 != 0) {
                    try {
                        bufferedReader.close();
                    } catch (Throwable th6) {
                        th2.addSuppressed(th6);
                    }
                } else {
                    bufferedReader.close();
                }
            }
            if (httpClientWrapper != null) {
                if (0 == 0) {
                    httpClientWrapper.close();
                    return;
                }
                try {
                    httpClientWrapper.close();
                } catch (Throwable th7) {
                    th.addSuppressed(th7);
                }
            }
        } catch (Throwable th8) {
            if (httpClientWrapper != null) {
                if (0 != 0) {
                    try {
                        httpClientWrapper.close();
                    } catch (Throwable th9) {
                        th.addSuppressed(th9);
                    }
                } else {
                    httpClientWrapper.close();
                }
            }
            throw th8;
        }
    }
}
