package lt.tokenmill.crawling.parser;

import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lt.tokenmill.crawling.data.HttpSource;
import lt.tokenmill.crawling.parser.data.MatchedDate;
import lt.tokenmill.crawling.parser.utils.HttpSourceTester;
import org.joda.time.DateTime;
import org.jsoup.nodes.Document;

/* loaded from: input_file:lt/tokenmill/crawling/parser/DateParser.class */
public class DateParser {
    private static List<String> FORMATS = Lists.newArrayList(new String[]{"yyyy-MM-dd'T'HH:mm:ssZZZ", "yyyy-MM-dd'T'HH:mm:ssXXX", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd HH:mm:ss Z", "yyyy-MM-dd HH:mm:ss", "EEE MMM dd HH:mm:ss z yyyy", "EEE, dd MMM yyyy HH:mm:ss Z", "EEEE, dd MMMM yyyy h:mma z", "MMM dd, yyyy hh:mma z", "EEE MMM dd, yyyy H:mma z", "MMddyyyy", "dd MMMM yyyy", "dd MMM, yyyy HH:mm", "MMMM dd, yyyy, HH:mm z", "MMMM dd, yyyy", "EEE, MMMM dd, yyyy", "yyyy/MM/dd"});
    private static List<SimpleDateFormat> FORMATTERS = formatsToFormatters(FORMATS);
    private static final List<String> DATE_META_KEYS = Lists.newArrayList(new String[]{"og:article:published_time", "article:published_time", "om.content_publish_dte", "sailthru.date", "revision_date", "parsely-pub-date", "pubdate", HttpSourceTester.DATE, "published_at", "publish_date"});
    private static final List<String> ITEMPROP_SELECTORS = Lists.newArrayList(new String[]{"[itemprop*=datePublished]", "[itemprop*=dateModified]"});

    private static List<SimpleDateFormat> formatsToFormatters(List<String> list) {
        return (List) list.stream().map(DateParser::createFormatter).collect(Collectors.toList());
    }

    public static MatchedDate parse(MatchedDate matchedDate, HttpSource httpSource) {
        String trim = Strings.nullToEmpty(matchedDate.getValue()).trim();
        Iterator it = httpSource.getDateRegexps().iterator();
        while (it.hasNext()) {
            Matcher matcher = Pattern.compile((String) it.next()).matcher(trim);
            if (matcher.matches() && matcher.groupCount() > 0) {
                trim = matcher.group(1);
            }
        }
        matchedDate.setValue(trim.replace("ET", "EST"));
        return (MatchedDate) Stream.concat(FORMATTERS.stream(), formatsToFormatters(httpSource.getDateFormats()).stream()).map(simpleDateFormat -> {
            return parse(matchedDate, simpleDateFormat);
        }).filter(matchedDate2 -> {
            return matchedDate2.getDate() != null;
        }).findFirst().orElse(matchedDate);
    }

    private static SimpleDateFormat createFormatter(String str) {
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat(str);
        simpleDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
        simpleDateFormat.setLenient(false);
        return simpleDateFormat;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public static MatchedDate parse(MatchedDate matchedDate, SimpleDateFormat simpleDateFormat) {
        try {
            matchedDate.setDate(new DateTime(simpleDateFormat.parse(matchedDate.getValue())));
            matchedDate.setPattern(simpleDateFormat.toPattern());
            return matchedDate;
        } catch (Exception e) {
            return matchedDate;
        }
    }

    public static List<MatchedDate> extractFromMeta(Document document) {
        HashMap newHashMap = Maps.newHashMap();
        document.select("meta").forEach(element -> {
            String attr = element.attr("name");
            String attr2 = element.attr("property");
            String attr3 = element.attr("content");
            if (!Strings.isNullOrEmpty(attr)) {
                newHashMap.put(attr.toLowerCase(), attr3);
            } else {
                if (Strings.isNullOrEmpty(attr2)) {
                    return;
                }
                newHashMap.put(attr2.toLowerCase(), attr3);
            }
        });
        return (List) DATE_META_KEYS.stream().filter(str -> {
            return newHashMap.get(str) != null;
        }).map(str2 -> {
            return new MatchedDate((String) newHashMap.get(str2), "META:" + str2);
        }).collect(Collectors.toList());
    }

    public static List<MatchedDate> extractFromProperties(Document document) {
        ArrayList newArrayList = Lists.newArrayList();
        for (String str : ITEMPROP_SELECTORS) {
            document.select(str).forEach(element -> {
                String attr = element.attr("datetime");
                String attr2 = element.attr("content");
                String attr3 = element.attr(HttpSourceTester.TITLE);
                if (!Strings.isNullOrEmpty(attr)) {
                    newArrayList.add(new MatchedDate(attr, str));
                } else if (!Strings.isNullOrEmpty(attr2)) {
                    newArrayList.add(new MatchedDate(attr2, str));
                } else {
                    if (Strings.isNullOrEmpty(attr3)) {
                        return;
                    }
                    newArrayList.add(new MatchedDate(attr3, str));
                }
            });
        }
        return newArrayList;
    }
}
