package org.dstadler.commoncrawl.index;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.logging.Logger;
import org.apache.commons.lang3.StringUtils;
import org.dstadler.commoncrawl.Utils;
import org.dstadler.commons.http.HttpClientWrapper;
import org.dstadler.commons.logging.jdk.LoggerFactory;

/* loaded from: input_file:org/dstadler/commoncrawl/index/DownloadFromCommonCrawl.class */
public class DownloadFromCommonCrawl {
    private static final Logger log = LoggerFactory.make();

    public static void main(String[] strArr) throws Exception {
        LoggerFactory.initLogging();
        Utils.ensureDownloadDir();
        HttpClientWrapper httpClientWrapper = new HttpClientWrapper("", (String) null, 600000);
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(DownloadURLIndex.COMMON_CRAWL_FILE), 1048576);
            int i = 0;
            int i2 = 0;
            int i3 = 0;
            long j = 0;
            while (true) {
                try {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        log.info("End of file " + DownloadURLIndex.COMMON_CRAWL_FILE + " reached after " + i + " items");
                        bufferedReader.close();
                        httpClientWrapper.close();
                        return;
                    }
                    log.info("Downloading line " + (i + 1) + ": " + String.format("%.4f", Double.valueOf((j / DownloadURLIndex.COMMON_CRAWL_FILE.length()) * 100.0d)) + "%, having " + i2 + " downloaded: " + StringUtils.abbreviate(readLine, 50) + (i3 > 0 ? ", " + i3 + " file-names too long" : ""));
                    CDXItem parse = CDXItem.parse(readLine);
                    try {
                        if (Utils.downloadFileFromCommonCrawl(httpClientWrapper.getHttpClient(), parse.url, parse.getDocumentLocation(), true) != null) {
                            i2++;
                        }
                    } catch (IOException e) {
                        if (!e.getMessage().contains("Der Dateiname ist zu lang")) {
                            throw e;
                        }
                        i3++;
                    }
                    j += readLine.length() + 1;
                    i++;
                } finally {
                }
            }
        } catch (Throwable th) {
            try {
                httpClientWrapper.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
