package eu.socialsensor.sfc.builder.solrQueryBuilder;

import edu.washington.cs.knowitall.morpha.MorphaStemmer;
import eu.socialsensor.framework.common.domain.Item;
import eu.socialsensor.framework.common.domain.Stopwords;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:eu/socialsensor/sfc/builder/solrQueryBuilder/KeywordsExtractor.class */
public class KeywordsExtractor {
    public static final int EXTRACTED_KEYWORDS_LIMIT = 500;
    private List<Item> items;
    private Stopwords stopwords = new Stopwords();
    private Map<String, Integer> popularKeywords = new HashMap();
    private Map<String, Integer> popularHashtags = new HashMap();
    private Map<String, String> wordsToReplace = new HashMap();
    private Set<String> textContent = new HashSet();
    private Map<String, Double> rankedKeywords = new HashMap();
    private Map<String, Double> rankedHashtags = new HashMap();
    double keywordsAVG = 0.0d;
    double hashtagsAVG = 0.0d;

    public KeywordsExtractor(List<Item> list) {
        this.items = list;
    }

    public Map<String, String> getWordsToReplace() {
        return this.wordsToReplace;
    }

    public void processItemsText() {
        for (Item item : this.items) {
            String title = item.getTitle();
            String description = item.getDescription();
            if (title != null && description != null && title.equals(description)) {
                description = "";
            }
            if (title != null && !title.isEmpty()) {
                String eraseAccounts = eraseAccounts(eraseReferences(eraseEmailAddresses(eraseWebLinks(title.toLowerCase()))));
                if (!this.textContent.contains(eraseAccounts)) {
                    countWords(eraseAccounts);
                    this.textContent.add(eraseAccounts);
                }
            }
            if (description != null && !description.isEmpty()) {
                String eraseAccounts2 = eraseAccounts(eraseReferences(eraseEmailAddresses(eraseWebLinks(description.toLowerCase()))));
                if (!this.textContent.contains(eraseAccounts2)) {
                    countWords(eraseAccounts2);
                    this.textContent.add(eraseAccounts2);
                }
            }
        }
        processPopularHashtags();
        processPopularKeywords();
        sortElements();
    }

    public Set<String> getTextContent() {
        return this.textContent;
    }

    public List<String> getTopKeywords() {
        ArrayList arrayList = new ArrayList();
        for (Map.Entry<String, Double> entry : this.rankedKeywords.entrySet()) {
            if (arrayList.size() >= 500) {
                break;
            }
            if (entry.getValue().doubleValue() > this.keywordsAVG) {
                arrayList.add(entry.getKey());
            }
        }
        return arrayList;
    }

    public Map<String, Double> getTopHashtags() {
        HashMap hashMap = new HashMap();
        for (Map.Entry<String, Double> entry : this.rankedHashtags.entrySet()) {
            if (entry.getValue().doubleValue() > this.hashtagsAVG) {
                hashMap.put(entry.getKey(), entry.getValue());
            }
        }
        return hashMap;
    }

    public Set<String> getRankedKeywords() {
        return this.rankedKeywords.keySet();
    }

    public Set<String> getRankedHashtags() {
        return this.rankedKeywords.keySet();
    }

    private void countWords(String str) {
        String[] split = str.split("[^a-zA-Z0-9#'][^a-zA-Z0-9#']*");
        for (int i = 0; i < split.length; i++) {
            if (!this.stopwords.is(split[i]) && !split[i].equals(" ") && !split[i].isEmpty()) {
                if (split[i].contains("#")) {
                    String[] split2 = split[i].split("#");
                    for (int i2 = 0; i2 < split2.length; i2++) {
                        if (!this.stopwords.is(split2[i2]) && !split2[i2].equals(" ") && !split2[i2].isEmpty() && split2[i2].length() > 1) {
                            if (this.popularHashtags.containsKey(split2[i2])) {
                                this.popularHashtags.put(split2[i2], Integer.valueOf(this.popularHashtags.get(split2[i2]).intValue() + 1));
                            } else {
                                this.popularHashtags.put(split2[i2], 1);
                            }
                        }
                    }
                } else if (split[i].length() > 1) {
                    if (this.popularKeywords.containsKey(split[i])) {
                        this.popularKeywords.put(split[i], Integer.valueOf(this.popularKeywords.get(split[i]).intValue() + 1));
                    } else {
                        this.popularKeywords.put(split[i], 1);
                    }
                }
            }
        }
    }

    private String eraseWebLinks(String str) {
        ArrayList<String> arrayList = new ArrayList();
        Matcher matcher = Pattern.compile("\\(?\\b(http://|www[.])[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_()|]").matcher(str);
        while (matcher.find()) {
            arrayList.add(matcher.group());
        }
        for (String str2 : arrayList) {
            while (str.contains(str2)) {
                str = str.replace(str2, "");
            }
        }
        Matcher matcher2 = Pattern.compile("http").matcher(str);
        while (matcher2.find()) {
            arrayList.add(matcher2.group());
        }
        for (String str3 : arrayList) {
            while (str.contains(str3)) {
                str = str.replace(str3, "");
            }
        }
        return str;
    }

    private String eraseEmailAddresses(String str) {
        ArrayList<String> arrayList = new ArrayList();
        Matcher matcher = Pattern.compile("[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.]*").matcher(str);
        while (matcher.find()) {
            arrayList.add(matcher.group());
        }
        for (String str2 : arrayList) {
            while (str.contains(str2)) {
                str = str.replace(str2, "");
            }
        }
        return str;
    }

    private String eraseReferences(String str) {
        ArrayList<String> arrayList = new ArrayList();
        Matcher matcher = Pattern.compile("[a-zA-Z0-9._%+-]+/[a-zA-Z0-9.]*").matcher(str);
        while (matcher.find()) {
            arrayList.add(matcher.group());
        }
        for (String str2 : arrayList) {
            while (str.contains(str2)) {
                str = str.replace(str2, "");
            }
        }
        return str;
    }

    private String eraseAccounts(String str) {
        ArrayList<String> arrayList = new ArrayList();
        Matcher matcher = Pattern.compile("@[a-zA-Z0-9.]*").matcher(str);
        while (matcher.find()) {
            arrayList.add(matcher.group());
        }
        for (String str2 : arrayList) {
            while (str.contains(str2)) {
                str = str.replace(str2, "");
            }
        }
        return str;
    }

    private void sortElements() {
        HashSet hashSet = new HashSet();
        int i = 0;
        int[] iArr = new int[this.popularKeywords.size()];
        Iterator<Integer> it = this.popularKeywords.values().iterator();
        while (it.hasNext()) {
            iArr[i] = it.next().intValue();
            i++;
        }
        int i2 = 0;
        int[] iArr2 = new int[this.popularHashtags.size()];
        Iterator<Integer> it2 = this.popularHashtags.values().iterator();
        while (it2.hasNext()) {
            iArr2[i2] = it2.next().intValue();
            i2++;
        }
        QuickSort quickSort = new QuickSort();
        quickSort.sort(iArr);
        int[] results = quickSort.getResults();
        quickSort.sort(iArr2);
        int[] results2 = quickSort.getResults();
        if (results == null) {
            this.keywordsAVG = 0.0d;
        } else {
            this.keywordsAVG = Calculator.computeAverage(results);
            for (int length = results.length - 1; length > 0; length--) {
                for (Map.Entry<String, Integer> entry : this.popularKeywords.entrySet()) {
                    if (entry.getValue().intValue() == results[length] && !hashSet.contains(entry.getKey())) {
                        this.rankedKeywords.put(entry.getKey(), Double.valueOf(entry.getValue().doubleValue()));
                    }
                }
            }
        }
        if (results2 == null) {
            this.hashtagsAVG = 0.0d;
            return;
        }
        this.hashtagsAVG = Calculator.computeAverage(results2);
        hashSet.clear();
        for (int length2 = results2.length - 1; length2 > 0; length2--) {
            for (Map.Entry<String, Integer> entry2 : this.popularHashtags.entrySet()) {
                if (entry2.getValue().intValue() == results2[length2] && !hashSet.contains(entry2.getKey())) {
                    this.rankedHashtags.put(entry2.getKey(), Double.valueOf(entry2.getValue().doubleValue()));
                }
            }
        }
    }

    public void printKeywordsANDHashtags() {
        System.out.println("----Keywords----");
        for (Map.Entry<String, Integer> entry : this.popularKeywords.entrySet()) {
            System.out.println(entry.getKey() + ":" + entry.getValue());
        }
        System.out.println("----Hashtags----");
        for (Map.Entry<String, Integer> entry2 : this.popularHashtags.entrySet()) {
            System.out.println(entry2.getKey() + ":" + entry2.getValue());
        }
    }

    private void processPopularKeywords() {
        for (String str : this.popularKeywords.keySet()) {
            for (String str2 : this.popularKeywords.keySet()) {
                if (!str.equals(str2) && str2.length() > str.length()) {
                    if (str.equals(MorphaStemmer.stem(str2))) {
                        this.wordsToReplace.put(str2, str);
                        this.popularKeywords.put(str, Integer.valueOf(this.popularKeywords.get(str).intValue() + this.popularKeywords.get(str2).intValue()));
                    } else if (str2.contains(str) && Math.abs(str2.length() - str.length()) <= 2) {
                        if (this.popularKeywords.get(str).intValue() > this.popularKeywords.get(str2).intValue()) {
                            this.wordsToReplace.put(str2, str);
                            this.popularKeywords.put(str, Integer.valueOf(this.popularKeywords.get(str).intValue() + this.popularKeywords.get(str2).intValue()));
                        } else {
                            this.wordsToReplace.put(str, str2);
                            this.popularKeywords.put(str2, Integer.valueOf(this.popularKeywords.get(str).intValue() + this.popularKeywords.get(str2).intValue()));
                        }
                    }
                }
            }
        }
        Iterator<String> it = this.wordsToReplace.keySet().iterator();
        while (it.hasNext()) {
            this.popularKeywords.remove(it.next());
        }
    }

    private void processPopularHashtags() {
        for (String str : this.popularHashtags.keySet()) {
            for (String str2 : this.popularKeywords.keySet()) {
                if (str.contains(str2)) {
                    this.popularHashtags.put(str, Integer.valueOf(this.popularHashtags.get(str).intValue() + this.popularKeywords.get(str2).intValue()));
                }
            }
        }
    }

    public void printRankedKeywordsANDHashtags() {
        System.out.println("----Ranked Keywords----");
        for (Map.Entry<String, Double> entry : this.rankedKeywords.entrySet()) {
            System.out.println(entry.getKey() + " , " + entry.getValue());
        }
        for (Map.Entry<String, Double> entry2 : this.rankedHashtags.entrySet()) {
            System.out.println(entry2.getKey() + " , " + entry2.getValue());
        }
    }

    public static void main(String[] strArr) {
    }
}
