/*
 * Decompiled with CFR 0.152.
 */
package org.datacleaner.beans.transform;

import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import javax.inject.Named;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.ExternalDocumentation;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.MatchingAndStandardizationCategory;
import org.datacleaner.reference.Dictionary;

@Named(value="Remove dictionary matches")
@Description(value="Removes any part of a string that is matched against a dictionary. Use it to standardize or prepare values, for instance by removing adjectives that make comparison of similar terms difficult.")
@ExternalDocumentation(value={@ExternalDocumentation.DocumentationLink(title="Segmenting customers on messy data", url="https://www.youtube.com/watch?v=iy-j5s-uHz4", type=ExternalDocumentation.DocumentationType.VIDEO, version="4.0")})
@Categorized(value={MatchingAndStandardizationCategory.class})
public class RemoveDictionaryMatchesTransformer
implements Transformer {
    public static final String PROPERTY_DICTIONARY = "Dictionary";
    public static final String PROPERTY_COLUMN = "Column";
    @Configured(value="Dictionary")
    Dictionary _dictionary;
    @Configured(value="Column")
    InputColumn<String> _column;
    private final Splitter SPLITTER = Splitter.on((CharMatcher)CharMatcher.WHITESPACE).omitEmptyStrings();

    public RemoveDictionaryMatchesTransformer() {
    }

    public RemoveDictionaryMatchesTransformer(InputColumn<String> column, Dictionary dictionary) {
        this();
        this._column = column;
        this._dictionary = dictionary;
    }

    public OutputColumns getOutputColumns() {
        String name = this._column.getName() + " (" + this._dictionary.getName() + " removed)";
        return new OutputColumns(String.class, new String[]{name});
    }

    public Object[] transform(InputRow inputRow) {
        String value = (String)inputRow.getValue(this._column);
        String result = this.transform(value);
        return new Object[]{result};
    }

    public String transform(String value) {
        if (Strings.isNullOrEmpty((String)value)) {
            return value;
        }
        StringBuilder sb = new StringBuilder();
        Iterable tokens = this.SPLITTER.split((CharSequence)value);
        for (String token : tokens) {
            if (this._dictionary.containsValue(token)) continue;
            if (sb.length() != 0) {
                sb.append(' ');
            }
            sb.append(token);
        }
        return sb.toString();
    }
}

