/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.examples.ml;

import java.util.Arrays;
import java.util.List;
import org.apache.spark.ml.feature.Bucketizer;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.Metadata;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

public class JavaBucketizerExample {
    public static void main(String[] args) {
        SparkSession spark = SparkSession.builder().appName("JavaBucketizerExample").getOrCreate();
        double[] splits = new double[]{Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
        List<Row> data = Arrays.asList(RowFactory.create((Object[])new Object[]{-999.9}), RowFactory.create((Object[])new Object[]{-0.5}), RowFactory.create((Object[])new Object[]{-0.3}), RowFactory.create((Object[])new Object[]{0.0}), RowFactory.create((Object[])new Object[]{0.2}), RowFactory.create((Object[])new Object[]{999.9}));
        StructType schema = new StructType(new StructField[]{new StructField("features", DataTypes.DoubleType, false, Metadata.empty())});
        Dataset dataFrame = spark.createDataFrame(data, schema);
        Bucketizer bucketizer = new Bucketizer().setInputCol("features").setOutputCol("bucketedFeatures").setSplits(splits);
        Dataset bucketedData = bucketizer.transform(dataFrame);
        System.out.println("Bucketizer output with " + (bucketizer.getSplits().length - 1) + " buckets");
        bucketedData.show();
        double[][] splitsArray = new double[][]{{Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY}, {Double.NEGATIVE_INFINITY, -0.3, 0.0, 0.3, Double.POSITIVE_INFINITY}};
        List<Row> data2 = Arrays.asList(RowFactory.create((Object[])new Object[]{-999.9, -999.9}), RowFactory.create((Object[])new Object[]{-0.5, -0.2}), RowFactory.create((Object[])new Object[]{-0.3, -0.1}), RowFactory.create((Object[])new Object[]{0.0, 0.0}), RowFactory.create((Object[])new Object[]{0.2, 0.4}), RowFactory.create((Object[])new Object[]{999.9, 999.9}));
        StructType schema2 = new StructType(new StructField[]{new StructField("features1", DataTypes.DoubleType, false, Metadata.empty()), new StructField("features2", DataTypes.DoubleType, false, Metadata.empty())});
        Dataset dataFrame2 = spark.createDataFrame(data2, schema2);
        Bucketizer bucketizer2 = new Bucketizer().setInputCols(new String[]{"features1", "features2"}).setOutputCols(new String[]{"bucketedFeatures1", "bucketedFeatures2"}).setSplitsArray((double[][])splitsArray);
        Dataset bucketedData2 = bucketizer2.transform(dataFrame2);
        System.out.println("Bucketizer output with [" + (bucketizer2.getSplitsArray()[0].length - 1) + ", " + (bucketizer2.getSplitsArray()[1].length - 1) + "] buckets for each input column");
        bucketedData2.show();
        spark.stop();
    }
}

