/*
 * Decompiled with CFR 0.152.
 */
package hivemall.knn.lsh;

import hivemall.model.FeatureValue;
import hivemall.utils.hadoop.WritableUtils;
import hivemall.utils.hashing.MurmurHash3;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Random;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.io.IntWritable;

@Description(name="minhashes", value="_FUNC_(array<> features [, int numHashes, int keyGroup [, boolean noWeight]]) - Returns minhash values")
@UDFType(deterministic=true, stateful=false)
public final class MinHashesUDF
extends UDF {
    private int[] _seeds = null;

    private int[] prepareSeeds(int numHashes) {
        int[] seeds = this._seeds;
        if (seeds == null || seeds.length != numHashes) {
            seeds = new int[numHashes];
            Random rand = new Random(31L);
            for (int i = 0; i < numHashes; ++i) {
                seeds[i] = rand.nextInt();
            }
            this._seeds = seeds;
        }
        return seeds;
    }

    public List<IntWritable> evaluate(List<Integer> features) throws HiveException {
        return this.evaluate(features, 5, 2);
    }

    public List<IntWritable> evaluate(List<Integer> features, int numHashes, int keyGroups) throws HiveException {
        int[] seeds = this.prepareSeeds(numHashes);
        List<FeatureValue> featureList = MinHashesUDF.parseFeatures(features);
        return MinHashesUDF.computeSignatures(featureList, numHashes, keyGroups, seeds);
    }

    public List<IntWritable> evaluate(List<String> features, boolean noWeight) throws HiveException {
        return this.evaluate(features, 5, 2, noWeight);
    }

    public List<IntWritable> evaluate(List<String> features, int numHashes, int keyGroups, boolean noWeight) throws HiveException {
        int[] seeds = this.prepareSeeds(numHashes);
        List<FeatureValue> featureList = MinHashesUDF.parseFeatures(features, noWeight);
        return MinHashesUDF.computeSignatures(featureList, numHashes, keyGroups, seeds);
    }

    private static List<FeatureValue> parseFeatures(List<Integer> features) {
        ArrayList<FeatureValue> ftvec = new ArrayList<FeatureValue>(features.size());
        for (Integer f : features) {
            if (f == null) continue;
            FeatureValue fv = new FeatureValue((Object)f, 1.0f);
            ftvec.add(fv);
        }
        return ftvec;
    }

    private static List<FeatureValue> parseFeatures(List<String> features, boolean noWeight) {
        ArrayList<FeatureValue> ftvec = new ArrayList<FeatureValue>(features.size());
        for (String f : features) {
            if (f == null) continue;
            FeatureValue fv = noWeight ? new FeatureValue((Object)f, 1.0f) : FeatureValue.parse(f);
            ftvec.add(fv);
        }
        return ftvec;
    }

    private static List<IntWritable> computeSignatures(List<FeatureValue> features, int numHashes, int keyGroups, int[] seeds) throws HiveException {
        IntWritable[] hashes = new IntWritable[numHashes];
        PriorityQueue<Integer> minhashes = new PriorityQueue<Integer>();
        for (int i = 0; i < numHashes; ++i) {
            float weightedMinHashValues = Float.MAX_VALUE;
            for (FeatureValue fv : features) {
                float w;
                Object f = fv.getFeature();
                assert (f != null);
                String fs = f.toString();
                int hashIndex = Math.abs(MurmurHash3.murmurhash3_x86_32(fs, seeds[i]));
                float hashValue = MinHashesUDF.calcWeightedHashValue(hashIndex, w = fv.getValueAsFloat());
                if (!(hashValue < weightedMinHashValues)) continue;
                weightedMinHashValues = hashValue;
                minhashes.offer(hashIndex);
            }
            hashes[i] = WritableUtils.val(MinHashesUDF.getSignature(minhashes, keyGroups));
            minhashes.clear();
        }
        return Arrays.asList(hashes);
    }

    private static float calcWeightedHashValue(int hashIndex, float w) throws HiveException {
        if (w < 0.0f) {
            throw new HiveException("Non-negative value is not accepted for a feature weight");
        }
        if (w == 0.0f) {
            return Float.MAX_VALUE;
        }
        return (float)hashIndex / w;
    }

    private static int getSignature(PriorityQueue<Integer> candidates, int keyGroups) {
        int numCandidates = candidates.size();
        if (numCandidates == 0) {
            return 0;
        }
        int size = Math.min(numCandidates, keyGroups);
        int result = 1;
        for (int i = 0; i < size; ++i) {
            int nextmin = candidates.poll();
            result = 31 * result + nextmin;
        }
        return result & Integer.MAX_VALUE;
    }
}

