package org.archive.crawler.url.canonicalize;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:site-search/heritrix/heritrix-1.12.1.jar:org/archive/crawler/url/canonicalize/StripExtraSlashes.class */
public class StripExtraSlashes extends BaseRule {
    private static final String DESCRIPTION = "Strip any extra slashes, '/', found in the path. Use this rule to equate 'http://www.archive.org//A//B/index.html' and 'http://www.archive.org/A/B/index.html'.";
    private static final Pattern REGEX = Pattern.compile("(^https?://.*?)//+(.*)");

    public StripExtraSlashes(String str) {
        super(str, DESCRIPTION);
    }

    @Override // org.archive.crawler.url.CanonicalizationRule
    public String canonicalize(String str, Object obj) {
        Matcher matcher = REGEX.matcher(str);
        while (true) {
            Matcher matcher2 = matcher;
            if (!matcher2.matches()) {
                return str;
            }
            str = matcher2.group(1) + "/" + matcher2.group(2);
            matcher = REGEX.matcher(str);
        }
    }
}
