/*
 * Decompiled with CFR 0.152.
 */
package org.apache.spark.ml.feature;

import java.io.IOException;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.feature.CountVectorizer$;
import org.apache.spark.ml.feature.CountVectorizerModel;
import org.apache.spark.ml.feature.CountVectorizerParams;
import org.apache.spark.ml.feature.CountVectorizerParams$class;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.DoubleParam;
import org.apache.spark.ml.param.IntParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.shared.HasInputCol$class;
import org.apache.spark.ml.param.shared.HasOutputCol$class;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.DefaultParamsWritable$class;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable$class;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.rdd.RDD;
import org.apache.spark.rdd.RDD$;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.util.collection.OpenHashMap;
import scala.Array$;
import scala.Function0;
import scala.Function1;
import scala.Function2;
import scala.MatchError;
import scala.None$;
import scala.Predef$;
import scala.Serializable;
import scala.Some;
import scala.Tuple2;
import scala.collection.Iterable;
import scala.collection.Iterable$;
import scala.collection.Seq;
import scala.math.Ordering;
import scala.package$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

@ScalaSignature(bytes="\u0006\u0001\u0005Ue\u0001B\u0001\u0003\u00015\u0011qbQ8v]R4Vm\u0019;pe&TXM\u001d\u0006\u0003\u0007\u0011\tqAZ3biV\u0014XM\u0003\u0002\u0006\r\u0005\u0011Q\u000e\u001c\u0006\u0003\u000f!\tQa\u001d9be.T!!\u0003\u0006\u0002\r\u0005\u0004\u0018m\u00195f\u0015\u0005Y\u0011aA8sO\u000e\u00011\u0003\u0002\u0001\u000f-e\u00012a\u0004\t\u0013\u001b\u0005!\u0011BA\t\u0005\u0005%)5\u000f^5nCR|'\u000f\u0005\u0002\u0014)5\t!!\u0003\u0002\u0016\u0005\t!2i\\;oiZ+7\r^8sSj,'/T8eK2\u0004\"aE\f\n\u0005a\u0011!!F\"pk:$h+Z2u_JL'0\u001a:QCJ\fWn\u001d\t\u00035ui\u0011a\u0007\u0006\u00039\u0011\tA!\u001e;jY&\u0011ad\u0007\u0002\u0016\t\u00164\u0017-\u001e7u!\u0006\u0014\u0018-\\:Xe&$\u0018M\u00197f\u0011!\u0001\u0003A!b\u0001\n\u0003\n\u0013aA;jIV\t!\u0005\u0005\u0002$S9\u0011AeJ\u0007\u0002K)\ta%A\u0003tG\u0006d\u0017-\u0003\u0002)K\u00051\u0001K]3eK\u001aL!AK\u0016\u0003\rM#(/\u001b8h\u0015\tAS\u0005K\u0002 [M\u0002\"AL\u0019\u000e\u0003=R!\u0001\r\u0004\u0002\u0015\u0005tgn\u001c;bi&|g.\u0003\u00023_\t)1+\u001b8dK\u0006\nA'A\u00032]Ur\u0003\u0007\u0003\u00057\u0001\t\u0005\t\u0015!\u0003#\u0003\u0011)\u0018\u000e\u001a\u0011)\u0007Uj3\u0007C\u0003:\u0001\u0011\u0005!(\u0001\u0004=S:LGO\u0010\u000b\u0003wq\u0002\"a\u0005\u0001\t\u000b\u0001B\u0004\u0019\u0001\u0012)\u0007qj3\u0007K\u00029[MBQ!\u000f\u0001\u0005\u0002\u0001#\u0012a\u000f\u0015\u0004\u007f5\u001a\u0004\"B\"\u0001\t\u0003!\u0015aC:fi&s\u0007/\u001e;D_2$\"!\u0012$\u000e\u0003\u0001AQa\u0012\"A\u0002\t\nQA^1mk\u0016D3AQ\u00174\u0011\u0015Q\u0005\u0001\"\u0001L\u00031\u0019X\r^(viB,HoQ8m)\t)E\nC\u0003H\u0013\u0002\u0007!\u0005K\u0002J[MBQa\u0014\u0001\u0005\u0002A\u000bAb]3u->\u001c\u0017MY*ju\u0016$\"!R)\t\u000b\u001ds\u0005\u0019\u0001*\u0011\u0005\u0011\u001a\u0016B\u0001+&\u0005\rIe\u000e\u001e\u0015\u0004\u001d6\u001a\u0004\"B,\u0001\t\u0003A\u0016\u0001C:fi6Kg\u000e\u0012$\u0015\u0005\u0015K\u0006\"B$W\u0001\u0004Q\u0006C\u0001\u0013\\\u0013\taVE\u0001\u0004E_V\u0014G.\u001a\u0015\u0004-6\u001a\u0004\"B0\u0001\t\u0003\u0001\u0017\u0001C:fi6\u000b\u0007\u0010\u0012$\u0015\u0005\u0015\u000b\u0007\"B$_\u0001\u0004Q\u0006f\u00010.G\u0006\nA-A\u00033]Qr\u0003\u0007C\u0003g\u0001\u0011\u0005q-\u0001\u0005tKRl\u0015N\u001c+G)\t)\u0005\u000eC\u0003HK\u0002\u0007!\fK\u0002f[MBQa\u001b\u0001\u0005\u00021\f\u0011b]3u\u0005&t\u0017M]=\u0015\u0005\u0015k\u0007\"B$k\u0001\u0004q\u0007C\u0001\u0013p\u0013\t\u0001XEA\u0004C_>dW-\u00198)\u0007)l#/I\u0001t\u0003\u0015\u0011d\u0006\r\u00181\u0011\u0015)\b\u0001\"\u0011w\u0003\r1\u0017\u000e\u001e\u000b\u0003%]DQ\u0001\u001f;A\u0002e\fq\u0001Z1uCN,G\u000fM\u0002{\u0003\u000b\u0001Ba\u001f@\u0002\u00025\tAP\u0003\u0002~\r\u0005\u00191/\u001d7\n\u0005}d(a\u0002#bi\u0006\u001cX\r\u001e\t\u0005\u0003\u0007\t)\u0001\u0004\u0001\u0005\u0017\u0005\u001dq/!A\u0001\u0002\u000b\u0005\u0011\u0011\u0002\u0002\u0004?\u0012\n\u0014\u0003BA\u0006\u0003#\u00012\u0001JA\u0007\u0013\r\ty!\n\u0002\b\u001d>$\b.\u001b8h!\r!\u00131C\u0005\u0004\u0003+)#aA!os\"\u001aA/\f:\t\u000f\u0005m\u0001\u0001\"\u0011\u0002\u001e\u0005yAO]1og\u001a|'/\\*dQ\u0016l\u0017\r\u0006\u0003\u0002 \u0005-\u0002\u0003BA\u0011\u0003Oi!!a\t\u000b\u0007\u0005\u0015B0A\u0003usB,7/\u0003\u0003\u0002*\u0005\r\"AC*ueV\u001cG\u000fV=qK\"A\u0011QFA\r\u0001\u0004\ty\"\u0001\u0004tG\",W.\u0019\u0015\u0005\u00033i3\u0007C\u0004\u00024\u0001!\t%!\u000e\u0002\t\r|\u0007/\u001f\u000b\u0004w\u0005]\u0002\u0002CA\u001d\u0003c\u0001\r!a\u000f\u0002\u000b\u0015DHO]1\u0011\t\u0005u\u00121I\u0007\u0003\u0003\u007fQ1!!\u0011\u0005\u0003\u0015\u0001\u0018M]1n\u0013\u0011\t)%a\u0010\u0003\u0011A\u000b'/Y7NCBDC!!\r.g!\u001a\u0001!L\u001a\b\u000f\u00055#\u0001#\u0001\u0002P\u0005y1i\\;oiZ+7\r^8sSj,'\u000fE\u0002\u0014\u0003#2a!\u0001\u0002\t\u0002\u0005M3\u0003CA)\u0003+\nY&!\u0019\u0011\u0007\u0011\n9&C\u0002\u0002Z\u0015\u0012a!\u00118z%\u00164\u0007\u0003\u0002\u000e\u0002^mJ1!a\u0018\u001c\u0005U!UMZ1vYR\u0004\u0016M]1ngJ+\u0017\rZ1cY\u0016\u00042\u0001JA2\u0013\r\t)'\n\u0002\r'\u0016\u0014\u0018.\u00197ju\u0006\u0014G.\u001a\u0005\bs\u0005EC\u0011AA5)\t\ty\u0005\u0003\u0005\u0002n\u0005EC\u0011IA8\u0003\u0011aw.\u00193\u0015\u0007m\n\t\bC\u0004\u0002t\u0005-\u0004\u0019\u0001\u0012\u0002\tA\fG\u000f\u001b\u0015\u0006\u0003Wj\u0013qO\u0011\u0003\u0003s\nQ!\r\u00187]AB!\"! \u0002R\u0005\u0005I\u0011BA@\u0003-\u0011X-\u00193SKN|GN^3\u0015\u0005\u0005\u0005\u0005\u0003BAB\u0003\u001bk!!!\"\u000b\t\u0005\u001d\u0015\u0011R\u0001\u0005Y\u0006twM\u0003\u0002\u0002\f\u0006!!.\u0019<b\u0013\u0011\ty)!\"\u0003\r=\u0013'.Z2uQ\u0015\t\t&LA<Q\u0015\tY%LA<\u0001")
public class CountVectorizer
extends Estimator<CountVectorizerModel>
implements CountVectorizerParams,
DefaultParamsWritable {
    private final String uid;
    private final IntParam vocabSize;
    private final DoubleParam minDF;
    private final DoubleParam maxDF;
    private final DoubleParam minTF;
    private final BooleanParam binary;
    private final Param<String> outputCol;
    private final Param<String> inputCol;

    public static MLReader<CountVectorizer> read() {
        return CountVectorizer$.MODULE$.read();
    }

    public static CountVectorizer load(String string) {
        return CountVectorizer$.MODULE$.load(string);
    }

    @Override
    public MLWriter write() {
        return DefaultParamsWritable$class.write(this);
    }

    @Override
    public void save(String path) throws IOException {
        MLWritable$class.save(this, path);
    }

    @Override
    public IntParam vocabSize() {
        return this.vocabSize;
    }

    @Override
    public DoubleParam minDF() {
        return this.minDF;
    }

    @Override
    public DoubleParam maxDF() {
        return this.maxDF;
    }

    @Override
    public DoubleParam minTF() {
        return this.minTF;
    }

    @Override
    public BooleanParam binary() {
        return this.binary;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$vocabSize_$eq(IntParam x$1) {
        this.vocabSize = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minDF_$eq(DoubleParam x$1) {
        this.minDF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$maxDF_$eq(DoubleParam x$1) {
        this.maxDF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$minTF_$eq(DoubleParam x$1) {
        this.minTF = x$1;
    }

    @Override
    public void org$apache$spark$ml$feature$CountVectorizerParams$_setter_$binary_$eq(BooleanParam x$1) {
        this.binary = x$1;
    }

    @Override
    public int getVocabSize() {
        return CountVectorizerParams$class.getVocabSize(this);
    }

    @Override
    public double getMinDF() {
        return CountVectorizerParams$class.getMinDF(this);
    }

    @Override
    public double getMaxDF() {
        return CountVectorizerParams$class.getMaxDF(this);
    }

    @Override
    public StructType validateAndTransformSchema(StructType schema) {
        return CountVectorizerParams$class.validateAndTransformSchema(this, schema);
    }

    @Override
    public double getMinTF() {
        return CountVectorizerParams$class.getMinTF(this);
    }

    @Override
    public boolean getBinary() {
        return CountVectorizerParams$class.getBinary(this);
    }

    @Override
    public final Param<String> outputCol() {
        return this.outputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasOutputCol$_setter_$outputCol_$eq(Param x$1) {
        this.outputCol = x$1;
    }

    @Override
    public final String getOutputCol() {
        return HasOutputCol$class.getOutputCol(this);
    }

    @Override
    public final Param<String> inputCol() {
        return this.inputCol;
    }

    @Override
    public final void org$apache$spark$ml$param$shared$HasInputCol$_setter_$inputCol_$eq(Param x$1) {
        this.inputCol = x$1;
    }

    @Override
    public final String getInputCol() {
        return HasInputCol$class.getInputCol(this);
    }

    @Override
    public String uid() {
        return this.uid;
    }

    public CountVectorizer setInputCol(String value) {
        return (CountVectorizer)this.set(this.inputCol(), value);
    }

    public CountVectorizer setOutputCol(String value) {
        return (CountVectorizer)this.set(this.outputCol(), value);
    }

    public CountVectorizer setVocabSize(int value) {
        return (CountVectorizer)this.set(this.vocabSize(), BoxesRunTime.boxToInteger((int)value));
    }

    public CountVectorizer setMinDF(double value) {
        return (CountVectorizer)this.set(this.minDF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setMaxDF(double value) {
        return (CountVectorizer)this.set(this.maxDF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setMinTF(double value) {
        return (CountVectorizer)this.set(this.minTF(), BoxesRunTime.boxToDouble((double)value));
    }

    public CountVectorizer setBinary(boolean value) {
        return (CountVectorizer)this.set(this.binary(), BoxesRunTime.boxToBoolean((boolean)value));
    }

    @Override
    public CountVectorizerModel fit(Dataset<?> dataset) {
        this.transformSchema(dataset.schema(), true);
        int vocSize = BoxesRunTime.unboxToInt((Object)this.$(this.vocabSize()));
        RDD input = dataset.select(this.$(this.inputCol()), (Seq)Predef$.MODULE$.wrapRefArray((Object[])new String[0])).rdd().map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Seq<String> apply(Row x$1) {
                return (Seq)x$1.getAs(0);
            }
        }, ClassTag$.MODULE$.apply(Seq.class));
        boolean countingRequired = BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) < 1.0 || BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) < 1.0;
        None$ maybeInputSize = countingRequired ? new Some((Object)BoxesRunTime.boxToLong((long)input.cache().count())) : None$.MODULE$;
        double minDf = BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) : BoxesRunTime.unboxToDouble((Object)this.$(this.minDF())) * (double)BoxesRunTime.unboxToLong((Object)maybeInputSize.get());
        double maxDf = BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) >= 1.0 ? BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) : BoxesRunTime.unboxToDouble((Object)this.$(this.maxDF())) * (double)BoxesRunTime.unboxToLong((Object)maybeInputSize.get());
        Predef$.MODULE$.require(maxDf >= minDf, (Function0)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply() {
                return "maxDF must be >= minDF.";
            }
        });
        RDD allWordCounts = RDD$.MODULE$.rddToPairRDDFunctions(input.flatMap((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Iterable<Tuple2<String, Tuple2<Object, Object>>> apply(Seq<String> x0$1) {
                Seq<String> seq = x0$1;
                OpenHashMap.mcJ.sp wc = new OpenHashMap.mcJ.sp(ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.Long());
                seq.foreach((Function1)new Serializable(this, (OpenHashMap)wc){
                    public static final long serialVersionUID = 0L;
                    private final OpenHashMap wc$1;

                    public final long apply(String w2) {
                        return this.wc$1.changeValue$mcJ$sp((Object)w2, (Function0)new Serializable(this){
                            public static final long serialVersionUID = 0L;

                            public final long apply() {
                                return this.apply$mcJ$sp();
                            }

                            public long apply$mcJ$sp() {
                                return 1L;
                            }
                        }, (Function1)new Serializable(this){
                            public static final long serialVersionUID = 0L;

                            public final long apply(long x$2) {
                                return this.apply$mcJJ$sp(x$2);
                            }

                            public long apply$mcJJ$sp(long x$2) {
                                return x$2 + 1L;
                            }
                        });
                    }
                    {
                        this.wc$1 = wc$1;
                    }
                });
                Iterable iterable = (Iterable)wc.map((Function1)new Serializable(this){
                    public static final long serialVersionUID = 0L;

                    public final Tuple2<String, Tuple2<Object, Object>> apply(Tuple2<String, Object> x0$2) {
                        Tuple2<String, Object> tuple2 = x0$2;
                        if (tuple2 != null) {
                            String word = (String)tuple2._1();
                            long count = tuple2._2$mcJ$sp();
                            Tuple2 tuple22 = new Tuple2((Object)word, (Object)new Tuple2.mcJI.sp(count, 1));
                            return tuple22;
                        }
                        throw new MatchError(tuple2);
                    }
                }, Iterable$.MODULE$.canBuildFrom());
                return iterable;
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class)), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(Tuple2.class), (Ordering)Ordering.String$.MODULE$).reduceByKey((Function2)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<Object, Object> apply(Tuple2<Object, Object> x0$3, Tuple2<Object, Object> x1$1) {
                Tuple2 tuple2 = new Tuple2(x0$3, x1$1);
                if (tuple2 != null) {
                    Tuple2 tuple22 = (Tuple2)tuple2._1();
                    Tuple2 tuple23 = (Tuple2)tuple2._2();
                    if (tuple22 != null) {
                        long wc1 = tuple22._1$mcJ$sp();
                        int df1 = tuple22._2$mcI$sp();
                        if (tuple23 != null) {
                            long wc2 = tuple23._1$mcJ$sp();
                            int df2 = tuple23._2$mcI$sp();
                            Tuple2.mcJI.sp sp2 = new Tuple2.mcJI.sp(wc1 + wc2, df1 + df2);
                            return sp2;
                        }
                    }
                }
                throw new MatchError((Object)tuple2);
            }
        });
        boolean filteringRequired = this.isSet(this.minDF()) || this.isSet(this.maxDF());
        RDD maybeFilteredWordCounts = filteringRequired ? allWordCounts.filter((Function1)new Serializable(this, minDf, maxDf){
            public static final long serialVersionUID = 0L;
            private final double minDf$1;
            private final double maxDf$1;

            public final boolean apply(Tuple2<String, Tuple2<Object, Object>> x0$4) {
                Tuple2 tuple2;
                Tuple2<String, Tuple2<Object, Object>> tuple22 = x0$4;
                if (tuple22 != null && (tuple2 = (Tuple2)tuple22._2()) != null) {
                    int df = tuple2._2$mcI$sp();
                    boolean bl = (double)df >= this.minDf$1 && (double)df <= this.maxDf$1;
                    return bl;
                }
                throw new MatchError(tuple22);
            }
            {
                this.minDf$1 = minDf$1;
                this.maxDf$1 = maxDf$1;
            }
        }) : allWordCounts;
        RDD wordCounts = maybeFilteredWordCounts.map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final Tuple2<String, Object> apply(Tuple2<String, Tuple2<Object, Object>> x0$5) {
                Tuple2<String, Tuple2<Object, Object>> tuple2 = x0$5;
                if (tuple2 != null) {
                    String word = (String)tuple2._1();
                    Tuple2 tuple22 = (Tuple2)tuple2._2();
                    if (tuple22 != null) {
                        long count = tuple22._1$mcJ$sp();
                        Tuple2 tuple23 = new Tuple2((Object)word, (Object)BoxesRunTime.boxToLong((long)count));
                        return tuple23;
                    }
                }
                throw new MatchError(tuple2);
            }
        }, ClassTag$.MODULE$.apply(Tuple2.class)).cache();
        Object object = countingRequired ? input.unpersist(input.unpersist$default$1()) : BoxedUnit.UNIT;
        long fullVocabSize = wordCounts.count();
        String[] vocab = (String[])Predef$.MODULE$.refArrayOps((Object[])wordCounts.top((int)scala.math.package$.MODULE$.min(fullVocabSize, (long)vocSize), package$.MODULE$.Ordering().by((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final long apply(Tuple2<String, Object> x$3) {
                return x$3._2$mcJ$sp();
            }
        }, (Ordering)Ordering.Long$.MODULE$))).map((Function1)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply(Tuple2<String, Object> x$4) {
                return (String)x$4._1();
            }
        }, Array$.MODULE$.canBuildFrom(ClassTag$.MODULE$.apply(String.class)));
        Predef$.MODULE$.require(vocab.length > 0, (Function0)new Serializable(this){
            public static final long serialVersionUID = 0L;

            public final String apply() {
                return "The vocabulary size should be > 0. Lower minDF as necessary.";
            }
        });
        return this.copyValues(new CountVectorizerModel(this.uid(), vocab).setParent(this), this.copyValues$default$2());
    }

    @Override
    public StructType transformSchema(StructType schema) {
        return this.validateAndTransformSchema(schema);
    }

    @Override
    public CountVectorizer copy(ParamMap extra) {
        return (CountVectorizer)this.defaultCopy(extra);
    }

    public CountVectorizer(String uid) {
        this.uid = uid;
        HasInputCol$class.$init$(this);
        HasOutputCol$class.$init$(this);
        CountVectorizerParams$class.$init$(this);
        MLWritable$class.$init$(this);
        DefaultParamsWritable$class.$init$(this);
    }

    public CountVectorizer() {
        this(Identifiable$.MODULE$.randomUID("cntVec"));
    }
}

