package org.archive.crawler.util;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.LineNumberReader;
import java.io.PrintWriter;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.archive.crawler.frontier.RecoveryJournal;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/util/RecoveryLogMapper.class */
public class RecoveryLogMapper {
    private static final char LOG_LINE_START_CHAR = RecoveryJournal.F_ADD.charAt(0);
    private static final Logger logger = Logger.getLogger(RecoveryLogMapper.class.getName());
    private PrintWriter seedNotFoundPrintWriter;
    private Map<String, String> crawledUrlToSeedMap;
    private Map<String, Set<String>> seedUrlToDiscoveredUrlsMap;
    private Set<String> successfullyCrawledUrls;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/util/RecoveryLogMapper$SuccessfullyCrawledURLsIterator.class */
    public class SuccessfullyCrawledURLsIterator implements Iterator<String> {
        private String nextValue = null;
        private Iterator discoveredUrlsIterator;

        public SuccessfullyCrawledURLsIterator(String str) throws SeedUrlNotFoundException {
            Set set = (Set) RecoveryLogMapper.this.getSeedUrlToDiscoveredUrlsMap().get(str);
            if (set == null) {
                throw new SeedUrlNotFoundException("Seed URL " + str + "  not found in seed list");
            }
            this.discoveredUrlsIterator = set.iterator();
        }

        private void populateNextValue() {
            while (true) {
                if (!(this.nextValue == null) || !this.discoveredUrlsIterator.hasNext()) {
                    return;
                }
                String str = (String) this.discoveredUrlsIterator.next();
                boolean contains = RecoveryLogMapper.this.getSuccessfullyCrawledUrls().contains(str);
                if (RecoveryLogMapper.getLogger().isLoggable(Level.FINE)) {
                    RecoveryLogMapper.getLogger().fine("populateNextValue: curDiscoveredUrl=" + str + ", succCrawled=" + contains);
                }
                if (contains) {
                    this.nextValue = str;
                }
            }
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            populateNextValue();
            return this.nextValue != null;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public String next() {
            populateNextValue();
            String str = this.nextValue;
            this.nextValue = null;
            return str;
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException("SuccessfullyCrawledURLsIterator.remove: not supported.");
        }
    }

    public RecoveryLogMapper(String str) throws FileNotFoundException, IOException, SeedUrlNotFoundException {
        this.seedNotFoundPrintWriter = null;
        this.crawledUrlToSeedMap = new HashMap();
        this.seedUrlToDiscoveredUrlsMap = new HashMap();
        this.successfullyCrawledUrls = new HashSet();
        load(str);
    }

    public RecoveryLogMapper(String str, String str2) throws FileNotFoundException, IOException, SeedUrlNotFoundException {
        this.seedNotFoundPrintWriter = null;
        this.crawledUrlToSeedMap = new HashMap();
        this.seedUrlToDiscoveredUrlsMap = new HashMap();
        this.successfullyCrawledUrls = new HashSet();
        this.seedNotFoundPrintWriter = new PrintWriter(new FileOutputStream(str2));
        load(str);
    }

    protected void load(String str) throws FileNotFoundException, IOException, SeedUrlNotFoundException {
        LineNumberReader lineNumberReader = new LineNumberReader(RecoveryJournal.getBufferedReader(new File(str)));
        while (true) {
            String readLine = lineNumberReader.readLine();
            if (readLine == null) {
                lineNumberReader.close();
                if (this.seedNotFoundPrintWriter != null) {
                    this.seedNotFoundPrintWriter.close();
                    return;
                }
                return;
            }
            if (readLine.length() != 0 && readLine.charAt(0) == LOG_LINE_START_CHAR) {
                String[] split = readLine.split("\\s+");
                int length = split.length;
                String str2 = split[1];
                if (str2.startsWith("dns:")) {
                    continue;
                } else if (readLine.startsWith(RecoveryJournal.F_ADD)) {
                    if (length == 2) {
                        if (logger.isLoggable(Level.FINE)) {
                            logger.fine("F_ADD with 2 words --> seed URL (" + str2 + ")");
                        }
                        if (this.seedUrlToDiscoveredUrlsMap.get(str2) == null) {
                            this.seedUrlToDiscoveredUrlsMap.put(str2, new HashSet());
                        }
                    } else {
                        String str3 = split[length - 1];
                        if (logger.isLoggable(Level.FINE)) {
                            logger.fine("F_ADD with 3+ words --> new URL " + str2 + " via URL " + str3);
                        }
                        String str4 = this.crawledUrlToSeedMap.get(str3);
                        if (str4 == null) {
                            if (logger.isLoggable(Level.FINE)) {
                                logger.fine("\tvia URL is a seed");
                            }
                            this.crawledUrlToSeedMap.put(str2, str3);
                            str4 = str3;
                        } else {
                            if (logger.isLoggable(Level.FINE)) {
                                logger.fine("\tvia URL discovered via seed URL " + str4);
                            }
                            this.crawledUrlToSeedMap.put(str2, str4);
                        }
                        Set<String> set = this.seedUrlToDiscoveredUrlsMap.get(str4);
                        if (set == null) {
                            String str5 = "recover log " + str + " at line " + lineNumberReader.getLineNumber() + " listed F+ URL (" + str3 + ") for which found no seed list.";
                            if (this.seedNotFoundPrintWriter == null) {
                                throw new SeedUrlNotFoundException(str5);
                            }
                            this.seedNotFoundPrintWriter.println(str5);
                        } else {
                            set.add(str2);
                        }
                    }
                } else if (readLine.startsWith(RecoveryJournal.F_SUCCESS)) {
                    if (logger.isLoggable(Level.FINE)) {
                        logger.fine("F_SUCCESS for URL " + str2);
                    }
                    this.successfullyCrawledUrls.add(str2);
                }
            }
        }
    }

    public String getSeedForUrl(String str) {
        return this.seedUrlToDiscoveredUrlsMap.get(str) != null ? str : this.crawledUrlToSeedMap.get(str);
    }

    public Map getSeedUrlToDiscoveredUrlsMap() {
        return this.seedUrlToDiscoveredUrlsMap;
    }

    public Set getSuccessfullyCrawledUrls() {
        return this.successfullyCrawledUrls;
    }

    public static Logger getLogger() {
        return logger;
    }

    public Iterator<String> getIteratorOfURLsSuccessfullyCrawledFromSeedUrl(String str) throws SeedUrlNotFoundException {
        return new SuccessfullyCrawledURLsIterator(str);
    }

    public Collection<String> getSeedCollection() {
        return this.seedUrlToDiscoveredUrlsMap.keySet();
    }

    public static void main(String[] strArr) {
        if (strArr.length < 1) {
            System.out.println("Usage: RecoveryLogMapper recoverLogFileName");
            Runtime.getRuntime().exit(-1);
        }
        try {
            RecoveryLogMapper recoveryLogMapper = new RecoveryLogMapper(strArr[0]);
            for (String str : recoveryLogMapper.getSeedCollection()) {
                System.out.println("URLs successfully crawled from seed URL " + str);
                Iterator<String> iteratorOfURLsSuccessfullyCrawledFromSeedUrl = recoveryLogMapper.getIteratorOfURLsSuccessfullyCrawledFromSeedUrl(str);
                while (iteratorOfURLsSuccessfullyCrawledFromSeedUrl.hasNext()) {
                    System.out.println("    -> " + iteratorOfURLsSuccessfullyCrawledFromSeedUrl.next());
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
