/*
 * Decompiled with CFR 0.152.
 */
package org.codelibs.elasticsearch.dynarank.minhash.analysis;

import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.codelibs.elasticsearch.dynarank.guava.common.hash.HashCode;
import org.codelibs.elasticsearch.dynarank.guava.common.hash.HashFunction;
import org.codelibs.elasticsearch.dynarank.guava.common.io.BaseEncoding;
import org.codelibs.elasticsearch.dynarank.minhash.util.FastBitSet;

public class MinHashTokenFilter
extends TokenFilter {
    private final CharTermAttribute termAttr = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posIncrAttr = (PositionIncrementAttribute)this.addAttribute(PositionIncrementAttribute.class);
    private final OffsetAttribute offsetAttr = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
    private HashFunction[] hashFunctions;
    private int hashBit;
    private long[] minHashValues;
    private String minHash;

    public MinHashTokenFilter(TokenStream input, HashFunction[] hashFunctions, int hashBit) {
        super(input);
        this.hashFunctions = hashFunctions;
        this.hashBit = hashBit;
        this.minHashValues = new long[hashFunctions.length];
    }

    public final boolean incrementToken() throws IOException {
        int funcSize = this.hashFunctions.length;
        while (this.input.incrementToken()) {
            String term = this.termAttr.toString();
            for (int i = 0; i < funcSize; ++i) {
                HashCode hashCode = this.hashFunctions[i].hashUnencodedChars(term);
                long value = hashCode.asLong();
                if (value >= this.minHashValues[i]) continue;
                this.minHashValues[i] = value;
            }
        }
        if (this.minHash != null) {
            return false;
        }
        this.minHash = BaseEncoding.base64().encode(MinHashTokenFilter.calcMinHash(this.minHashValues, this.hashBit));
        this.termAttr.setEmpty().append(this.minHash);
        this.posIncrAttr.setPositionIncrement(0);
        this.offsetAttr.setOffset(0, this.minHash.length());
        return true;
    }

    public void reset() throws IOException {
        super.reset();
        Arrays.fill(this.minHashValues, Long.MAX_VALUE);
        this.minHash = null;
    }

    protected static byte[] calcMinHash(long[] minHashValues, int hashBit) {
        boolean shift = true;
        int radix = 2;
        long mask = 1L;
        int pos = 0;
        int nbits = minHashValues.length * hashBit;
        FastBitSet bitSet = new FastBitSet(nbits);
        for (long i : minHashValues) {
            for (int j = 0; j < hashBit; ++j) {
                bitSet.set(pos, (int)(i & 1L) == 1);
                ++pos;
                i >>>= 1;
            }
        }
        return bitSet.toByteArray();
    }
}

