/*
 * Decompiled with CFR 0.152.
 */
package hivemall.ftvec.trans;

import hivemall.UDFWithOptions;
import hivemall.utils.hadoop.HiveUtils;
import hivemall.utils.hashing.MurmurHash3;
import hivemall.utils.lang.Primitives;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.annotation.Nonnull;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Options;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.io.Text;

@Description(name="ffm_features", value="_FUNC_(const array<string> featureNames, feature1, feature2, .. [, const string options]) - Takes categroical variables and returns a feature vector array<string> in a libffm format <field>:<index>:<value>")
@UDFType(deterministic=true, stateful=false)
public final class FFMFeaturesUDF
extends UDFWithOptions {
    private String[] _featureNames;
    private PrimitiveObjectInspector[] _inputOIs;
    private List<Text> _result;
    private boolean _mhash = true;
    private int _numFeatures = 0x200000;
    private int _numFields = 1024;

    @Override
    protected Options getOptions() {
        Options opts = new Options();
        opts.addOption("no_hash", "disable_feature_hashing", false, "Wheather to disable feature hashing [default: false]");
        opts.addOption("hash", "feature_hashing", true, "The number of bits for feature hashing in range [18,31] [default:21]");
        opts.addOption("fields", "num_fields", true, "The number of fields [default:1024]");
        return opts;
    }

    @Override
    protected CommandLine processOptions(@Nonnull String optionValue) throws UDFArgumentException {
        CommandLine cl = this.parseOptions(optionValue);
        int hashbits = Primitives.parseInt(cl.getOptionValue("feature_hashing"), 21);
        if (hashbits < 18 || hashbits > 31) {
            throw new UDFArgumentException("-feature_hashing MUST be in range [18,31]: " + hashbits);
        }
        int numFeatures = 1 << hashbits;
        int numFields = Primitives.parseInt(cl.getOptionValue("num_fields"), 1024);
        if (numFields <= 1) {
            throw new UDFArgumentException("-num_fields MUST be greater than 1: " + numFields);
        }
        this._numFeatures = numFeatures;
        this._numFields = numFields;
        return cl;
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public ObjectInspector initialize(@Nonnull ObjectInspector[] argOIs) throws UDFArgumentException {
        int numFeatures;
        int numArgOIs = argOIs.length;
        if (numArgOIs < 2) {
            throw new UDFArgumentLengthException("the number of arguments must be greater that or equals to 2: " + numArgOIs);
        }
        this._featureNames = HiveUtils.getConstStringArray(argOIs[0]);
        if (this._featureNames == null) {
            throw new UDFArgumentException("#featureNames should not be null");
        }
        int numFeatureNames = this._featureNames.length;
        if (numFeatureNames < 1) {
            throw new UDFArgumentException("#featureNames must be greater than or equals to 1: " + numFeatureNames);
        }
        for (String featureName : this._featureNames) {
            if (featureName.indexOf(58) == -1) continue;
            throw new UDFArgumentException("featureName should not include colon: " + featureName);
        }
        int lastArgIndex = numArgOIs - 1;
        if (lastArgIndex > numFeatureNames) {
            if (lastArgIndex != numFeatureNames + 1 || !HiveUtils.isConstString(argOIs[lastArgIndex])) throw new UDFArgumentException("Unexpected arguments for _FUNC_(const array<string> featureNames, feature1, feature2, .. [, const string options])");
            String optionValue = HiveUtils.getConstString(argOIs[lastArgIndex]);
            this.processOptions(optionValue);
            numFeatures = numArgOIs - 2;
        } else {
            numFeatures = lastArgIndex;
        }
        if (numFeatureNames != numFeatures) {
            throw new UDFArgumentLengthException("#featureNames '" + numFeatureNames + "' != #features '" + numFeatures + "'");
        }
        this._inputOIs = new PrimitiveObjectInspector[numFeatures];
        for (int i = 0; i < numFeatures; ++i) {
            ObjectInspector oi = argOIs[i + 1];
            this._inputOIs[i] = HiveUtils.asPrimitiveObjectInspector(oi);
        }
        this._result = new ArrayList<Text>(numFeatures);
        return ObjectInspectorFactory.getStandardListObjectInspector((ObjectInspector)PrimitiveObjectInspectorFactory.writableStringObjectInspector);
    }

    public List<Text> evaluate(@Nonnull GenericUDF.DeferredObject[] arguments) throws HiveException {
        this._result.clear();
        StringBuilder builder = new StringBuilder(128);
        int size = this._featureNames.length;
        for (int i = 0; i < size; ++i) {
            String fv;
            PrimitiveObjectInspector oi;
            String s;
            Object argument = arguments[i + 1].get();
            if (argument == null || (s = PrimitiveObjectInspectorUtils.getString((Object)argument, (PrimitiveObjectInspector)(oi = this._inputOIs[i]))).isEmpty()) continue;
            if (s.indexOf(58) != -1) {
                throw new HiveException("feature index SHOULD NOT include colon: " + s);
            }
            String featureName = this._featureNames[i];
            String feature = featureName + '#' + s;
            if (this._mhash) {
                int field = MurmurHash3.murmurhash3(this._featureNames[i], this._numFields);
                int index = MurmurHash3.murmurhash3(feature, this._numFeatures) + this._numFields;
                fv = builder.append(field).append(':').append(index).append(":1").toString();
                builder.setLength(0);
            } else {
                fv = builder.append(featureName).append(':').append(feature).append(":1").toString();
                builder.setLength(0);
            }
            this._result.add(new Text(fv));
        }
        return this._result;
    }

    public String getDisplayString(String[] children) {
        return "ffm_features(" + Arrays.toString(children) + ")";
    }
}

