package lt.tokenmill.crawling.parser.utils;

import com.github.jsonldjava.core.JsonLdOptions;
import com.github.jsonldjava.core.JsonLdProcessor;
import com.github.jsonldjava.utils.JsonUtils;
import com.google.common.base.Strings;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringEscapeUtils;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:lt/tokenmill/crawling/parser/utils/JsonLdParser.class */
public class JsonLdParser {
    private static final Logger LOG = LoggerFactory.getLogger(JsonLdParser.class);
    private static final String SCHEMA_CLASS_ARTICLE = "http://schema.org/Article";
    private static final String SCHEMA_CLASS_NEWS_ARTICLE = "http://schema.org/NewsArticle";
    private static final String SCHEMA_ATTR_ARTICLE = "http://schema.org/article";
    private static final String SCHEMA_ATTR_ARTICLE_BODY = "http://schema.org/articleBody";
    private static final String SCHEMA_ATTR_HEADLINE = "http://schema.org/headline";
    private static final String SCHEMA_ATTR_PUBLISHED = "http://schema.org/datePublished";

    /* loaded from: input_file:lt/tokenmill/crawling/parser/utils/JsonLdParser$JsonLdArticle.class */
    public static class JsonLdArticle {
        private String headline;
        private String datePublished;
        private String articleBody;

        public String getHeadline() {
            return this.headline;
        }

        public void setHeadline(String str) {
            this.headline = str;
        }

        public String getDatePublished() {
            return this.datePublished;
        }

        public void setDatePublished(String str) {
            this.datePublished = str;
        }

        public String getArticleBody() {
            return this.articleBody;
        }

        public void setArticleBody(String str) {
            this.articleBody = str;
        }
    }

    public static JsonLdArticle parse(List<String> list) {
        try {
            for (String str : list) {
                if (!Strings.isNullOrEmpty(str)) {
                    Map compact = JsonLdProcessor.compact(JsonUtils.fromString(str.replaceAll("http://www\\.schema\\.org", "http://schema.org").replaceAll("\"http://schema\\.org\"", "\"http://schema.org/\"")), new HashMap(), new JsonLdOptions());
                    String objects = Objects.toString(compact.get("@type"), null);
                    if (objects == null && (compact.get(SCHEMA_ATTR_ARTICLE) instanceof Map)) {
                        compact = (Map) compact.get(SCHEMA_ATTR_ARTICLE);
                        objects = Objects.toString(compact.get("@type"), null);
                    }
                    if (SCHEMA_CLASS_ARTICLE.equalsIgnoreCase(objects) || SCHEMA_CLASS_NEWS_ARTICLE.equalsIgnoreCase(objects)) {
                        JsonLdArticle jsonLdArticle = new JsonLdArticle();
                        String objects2 = Objects.toString(compact.get(SCHEMA_ATTR_HEADLINE), null);
                        if (!Strings.isNullOrEmpty(objects2)) {
                            objects2 = StringEscapeUtils.unescapeHtml4(objects2);
                        }
                        jsonLdArticle.setHeadline(objects2);
                        String objects3 = Objects.toString(compact.get(SCHEMA_ATTR_ARTICLE_BODY), null);
                        if (!Strings.isNullOrEmpty(objects3)) {
                            objects3 = StringEscapeUtils.unescapeHtml4(objects3);
                        }
                        jsonLdArticle.setArticleBody(objects3);
                        Object obj = compact.get(SCHEMA_ATTR_PUBLISHED);
                        String str2 = null;
                        if (obj instanceof String) {
                            str2 = (String) obj;
                        } else if (obj instanceof Map) {
                            str2 = Objects.toString(((Map) obj).get("@value"), null);
                        }
                        jsonLdArticle.setDatePublished(str2);
                        return jsonLdArticle;
                    }
                }
            }
            return null;
        } catch (Exception e) {
            LOG.warn("Failed to parse ld+json", e);
            return null;
        }
    }

    public static List<String> extractJsonLdParts(Document document) {
        return (List) document.select("script[type=\"application/ld+json\"]").stream().map((v0) -> {
            return v0.html();
        }).collect(Collectors.toList());
    }
}
