/*
 * Decompiled with CFR 0.152.
 */
package hivemall.ftvec.hashing;

import hivemall.UDFWithOptions;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.hashing.MurmurHash3;
import hivemall.utils.lang.Primitives;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.io.Text;

@Description(name="feature_hashing", value="_FUNC_(array<string> features [, const string options]) - returns a hashed feature vector in array<string>")
@UDFType(deterministic=true, stateful=false)
public final class FeatureHashingUDF
extends UDFWithOptions {
    @Nullable
    private ListObjectInspector _listOI;
    private int _numFeatures = 0x1000000;
    @Nullable
    private List<Text> _returnObj;

    public String getDisplayString(String[] children) {
        return "feature_hashing(" + Arrays.toString(children) + ')';
    }

    @Override
    protected Options getOptions() {
        Options opts = new Options();
        opts.addOption("features", "num_features", true, "The number of features [default: 16777217 (2^24)]");
        return opts;
    }

    @Override
    protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException {
        CommandLine cl = this.parseOptions(optionValue);
        this._numFeatures = Primitives.parseInt(cl.getOptionValue("num_features"), this._numFeatures);
        return cl;
    }

    public ObjectInspector initialize(@Nonnull ObjectInspector[] argOIs) throws UDFArgumentException {
        if (argOIs.length != 1 && argOIs.length != 2) {
            throw new UDFArgumentLengthException("The feature_hashing function takes 1 or 2 arguments: " + argOIs.length);
        }
        ObjectInspector argOI0 = argOIs[0];
        ListObjectInspector listObjectInspector = this._listOI = HiveUtils.isListOI(argOI0) ? (ListObjectInspector)argOI0 : null;
        if (argOIs.length == 2) {
            String opts = HiveUtils.getConstString(argOIs[1]);
            this.processOptions(opts);
        }
        if (this._listOI == null) {
            return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
        }
        return ObjectInspectorFactory.getStandardListObjectInspector((ObjectInspector)PrimitiveObjectInspectorFactory.writableStringObjectInspector);
    }

    public Object evaluate(@Nonnull GenericUDF.DeferredObject[] arguments) throws HiveException {
        Object arg0 = arguments[0].get();
        if (arg0 == null) {
            return null;
        }
        if (this._listOI == null) {
            return this.evaluateScalar(arg0);
        }
        return this.evaluateList(arg0);
    }

    @Nonnull
    private Text evaluateScalar(@Nonnull Object arg0) {
        String fv = arg0.toString();
        return new Text(FeatureHashingUDF.featureHashing(fv, this._numFeatures));
    }

    @Nonnull
    private List<Text> evaluateList(@Nonnull Object arg0) {
        int len = this._listOI.getListLength(arg0);
        List<Text> list = this._returnObj;
        if (list == null) {
            this._returnObj = list = new ArrayList<Text>(len);
        } else {
            list.clear();
        }
        int numFeatures = this._numFeatures;
        for (int i = 0; i < len; ++i) {
            Object obj = this._listOI.getListElement(arg0, i);
            if (obj == null) continue;
            String fv = obj.toString();
            Text t = new Text(FeatureHashingUDF.featureHashing(fv, numFeatures));
            list.add(t);
        }
        return list;
    }

    @Nonnull
    static String featureHashing(@Nonnull String fv, int numFeatures) {
        int headPos = fv.indexOf(58);
        if (headPos == -1) {
            if (fv.equals("0")) {
                return fv;
            }
            int h = FeatureHashingUDF.mhash(fv, numFeatures);
            return String.valueOf(h);
        }
        int tailPos = fv.lastIndexOf(58);
        if (headPos == tailPos) {
            String v;
            double d;
            String f = fv.substring(0, headPos);
            String tail = fv.substring(headPos);
            if (f.equals("0") && (d = Double.parseDouble(v = fv.substring(headPos + 1))) == 1.0) {
                return fv;
            }
            int h = FeatureHashingUDF.mhash(f, numFeatures);
            return h + tail;
        }
        String field = fv.substring(0, headPos + 1);
        String f = fv.substring(headPos + 1, tailPos);
        int h = FeatureHashingUDF.mhash(f, numFeatures);
        String v = fv.substring(tailPos);
        return field + h + v;
    }

    static int mhash(@Nonnull String word, int numFeatures) {
        int r = MurmurHash3.murmurhash3_x86_32(word, 0, word.length(), -1756908916) % numFeatures;
        if (r < 0) {
            r += numFeatures;
        }
        return r + 1;
    }
}

