package org.archive.crawler.processor;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.SortedMap;
import java.util.TreeMap;
import org.archive.crawler.datamodel.CandidateURI;
import org.archive.crawler.settings.SimpleType;
import org.archive.util.iterator.LineReadingIterator;
import org.archive.util.iterator.RegexpLineIterator;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/processor/LexicalCrawlMapper.class */
public class LexicalCrawlMapper extends CrawlMapper {
    private static final long serialVersionUID = 1;
    public static final String ATTR_MAP_SOURCE = "map-source";
    public static final String DEFAULT_MAP_SOURCE = "";
    TreeMap<String, String> map;

    public LexicalCrawlMapper(String str) {
        super(str, "LexicalCrawlMapper. Maps URIs to a named crawler by a lexical comparison of the URI's classKey to a supplied ranges map.");
        this.map = new TreeMap<>();
        addElementToDefinition(new SimpleType(ATTR_MAP_SOURCE, "Path (or HTTP URL) to map specification file. Each line should include 2 whitespace-separated tokens: the first a key indicating the end of a range, the second the crawler node to which URIs in the key range should be mapped.", ""));
    }

    @Override // org.archive.crawler.processor.CrawlMapper
    protected String map(CandidateURI candidateURI) {
        SortedMap<String, String> tailMap = this.map.tailMap(getController().getFrontier().getClassKey(candidateURI));
        if (tailMap.isEmpty()) {
            tailMap = this.map;
        }
        return tailMap.get(tailMap.firstKey());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.archive.crawler.processor.CrawlMapper, org.archive.crawler.framework.Processor
    public void initialTasks() {
        super.initialTasks();
        try {
            loadMap();
        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }
    }

    protected void loadMap() throws IOException {
        InputStreamReader inputStreamReader;
        this.map.clear();
        String str = (String) getUncheckedAttribute(null, ATTR_MAP_SOURCE);
        if (str.startsWith("http://")) {
            inputStreamReader = new InputStreamReader(new URL(str).openConnection().getInputStream());
        } else {
            File file = new File(str);
            if (!file.isAbsolute()) {
                file = new File(getSettingsHandler().getOrder().getController().getDisk(), str);
            }
            inputStreamReader = new FileReader(file);
        }
        BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
        RegexpLineIterator regexpLineIterator = new RegexpLineIterator(new LineReadingIterator(bufferedReader), RegexpLineIterator.COMMENT_LINE, RegexpLineIterator.TRIMMED_ENTRY_TRAILING_COMMENT, RegexpLineIterator.ENTRY);
        while (regexpLineIterator.hasNext()) {
            String[] split = regexpLineIterator.next().split("\\s+");
            this.map.put(split[0], split[1]);
        }
        bufferedReader.close();
    }
}
