/*
 * Decompiled with CFR 0.152.
 */
package org.datacleaner.beans.transform;

import java.util.StringTokenizer;
import javax.inject.Named;
import org.datacleaner.api.Alias;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.ExternalDocumentation;
import org.datacleaner.api.HasLabelAdvice;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.ImproveSuperCategory;
import org.datacleaner.reference.SynonymCatalog;

@Named(value="Synonym lookup")
@Alias(value={"Synonym replacement"})
@Description(value="Replaces strings with their synonyms")
@ExternalDocumentation(value={@ExternalDocumentation.DocumentationLink(title="Segmenting customers on messy data", url="https://www.youtube.com/watch?v=iy-j5s-uHz4", type=ExternalDocumentation.DocumentationType.VIDEO, version="4.0"), @ExternalDocumentation.DocumentationLink(title="Understanding and using Synonyms", url="https://www.youtube.com/watch?v=_YiPaA8bFt4", type=ExternalDocumentation.DocumentationType.VIDEO, version="2.0")})
@Categorized(superCategory=ImproveSuperCategory.class)
public class SynonymLookupTransformer
implements Transformer,
HasLabelAdvice {
    @Configured
    InputColumn<String> column;
    @Configured
    SynonymCatalog synonymCatalog;
    @Configured
    @Description(value="Retain original value in case no synonym is found (otherwise null)")
    boolean retainOriginalValue = true;
    @Configured
    @Description(value="Tokenize and look up every token of the input, rather than looking up the complete input string?")
    boolean lookUpEveryToken = false;

    public SynonymLookupTransformer() {
    }

    public SynonymLookupTransformer(InputColumn<String> column, SynonymCatalog synonymCatalog, boolean retainOriginalValue) {
        this();
        this.column = column;
        this.synonymCatalog = synonymCatalog;
        this.retainOriginalValue = retainOriginalValue;
    }

    public OutputColumns getOutputColumns() {
        return new OutputColumns(String.class, new String[]{this.column.getName() + " (synonyms replaced)"});
    }

    public String getSuggestedLabel() {
        if (this.synonymCatalog == null) {
            return null;
        }
        return "Lookup: " + this.synonymCatalog.getName();
    }

    public String[] transform(InputRow inputRow) {
        String originalValue = (String)inputRow.getValue(this.column);
        if (originalValue == null) {
            return new String[1];
        }
        if (this.lookUpEveryToken) {
            String delim = " \t\n\r\f.,!?\"'+-_:;/\\\\()%@";
            StringBuilder sb = new StringBuilder();
            StringTokenizer tokenizer = new StringTokenizer(originalValue, " \t\n\r\f.,!?\"'+-_:;/\\\\()%@", true);
            int numTokens = tokenizer.countTokens();
            for (int i = 0; i < numTokens; ++i) {
                String token = tokenizer.nextToken();
                if (token.matches(" \t\n\r\f.,!?\"'+-_:;/\\\\()%@")) {
                    sb.append(token);
                    continue;
                }
                String replacedToken = this.lookup(token);
                if (replacedToken == null) {
                    sb.append(token);
                    continue;
                }
                sb.append(replacedToken);
            }
            return new String[]{sb.toString()};
        }
        String replacedValue = this.lookup(originalValue);
        return new String[]{replacedValue};
    }

    private String lookup(String originalValue) {
        String replacedValue = this.synonymCatalog.getMasterTerm(originalValue);
        if (this.retainOriginalValue && replacedValue == null) {
            return originalValue;
        }
        return replacedValue;
    }
}

