/*
 * Decompiled with CFR 0.152.
 */
package org.datacleaner.beans.transform;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import javax.inject.Inject;
import javax.inject.Named;
import org.apache.metamodel.util.HasName;
import org.datacleaner.api.Categorized;
import org.datacleaner.api.Configured;
import org.datacleaner.api.Description;
import org.datacleaner.api.InputColumn;
import org.datacleaner.api.InputRow;
import org.datacleaner.api.NumberProperty;
import org.datacleaner.api.OutputColumns;
import org.datacleaner.api.OutputRowCollector;
import org.datacleaner.api.Provided;
import org.datacleaner.api.Transformer;
import org.datacleaner.components.categories.TextCategory;

@Named(value="Tokenizer")
@Description(value="Tokenizes a String value (splits into substrings).")
@Categorized(value={TextCategory.class})
public class TokenizerTransformer
implements Transformer {
    @Inject
    @Configured(value="Number of tokens")
    @Description(value="Defines the max amount of tokens to expect")
    @NumberProperty(zero=false, negative=false)
    Integer numTokens = 2;
    @Inject
    @Configured
    InputColumn<String> column;
    @Inject
    @Configured
    @Description(value="Characters to tokenize by")
    char[] delimiters = new char[]{' ', '\t', '\n', '\r', '\f'};
    @Inject
    @Configured
    @Description(value="Add tokens as columns or as separate rows?")
    TokenTarget tokenTarget = TokenTarget.COLUMNS;
    @Inject
    @Provided
    OutputRowCollector outputRowCollector;

    public TokenizerTransformer() {
    }

    public TokenizerTransformer(InputColumn<String> column, Integer numTokens) {
        this.column = column;
        this.numTokens = numTokens;
    }

    public OutputColumns getOutputColumns() {
        if (this.tokenTarget == TokenTarget.COLUMNS) {
            String[] names = new String[this.numTokens.intValue()];
            for (int i = 0; i < names.length; ++i) {
                names[i] = this.column.getName() + " (token " + (i + 1) + ")";
            }
            return new OutputColumns(String.class, names);
        }
        return new OutputColumns(String.class, this.column.getName() + " (token)", new String[0]);
    }

    public String[] transform(InputRow inputRow) {
        List<String> allTokens = this.getTokens(inputRow);
        if (this.tokenTarget == TokenTarget.COLUMNS) {
            return allTokens.toArray(new String[this.numTokens.intValue()]);
        }
        for (String token : allTokens) {
            this.outputRowCollector.putValues(new Object[]{token});
        }
        return null;
    }

    private List<String> getTokens(InputRow inputRow) {
        String value = (String)inputRow.getValue(this.column);
        ArrayList<String> tokens = new ArrayList<String>();
        if (value == null) {
            return tokens;
        }
        StringTokenizer tokenizer = new StringTokenizer(value, new String(this.delimiters));
        while (tokenizer.hasMoreTokens()) {
            String nextToken = tokenizer.nextToken();
            tokens.add(nextToken);
        }
        return tokens;
    }

    public static enum TokenTarget implements HasName
    {
        COLUMNS,
        ROWS;


        public String getName() {
            if (this == COLUMNS) {
                return "Columns";
            }
            return "Rows";
        }
    }
}

