package org.apache.spark.mllib.feature;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import org.apache.spark.SharedSparkSession;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.mllib.linalg.Vector;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:org/apache/spark/mllib/feature/JavaTfIdfSuite.class */
public class JavaTfIdfSuite extends SharedSparkSession {
    @Test
    public void tfIdf() {
        HashingTF hashingTF = new HashingTF();
        JavaRDD transform = hashingTF.transform(this.jsc.parallelize(Arrays.asList(Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2));
        transform.collect();
        List collect = new IDF().fit(transform).transform(transform).collect();
        int indexOf = hashingTF.indexOf("this");
        Iterator it = collect.iterator();
        while (it.hasNext()) {
            Assertions.assertEquals(0.0d, ((Vector) it.next()).apply(indexOf), 1.0E-15d);
        }
    }

    @Test
    public void tfIdfMinimumDocumentFrequency() {
        HashingTF hashingTF = new HashingTF();
        JavaRDD transform = hashingTF.transform(this.jsc.parallelize(Arrays.asList(Arrays.asList("this is a sentence".split(" ")), Arrays.asList("this is another sentence".split(" ")), Arrays.asList("this is still a sentence".split(" "))), 2));
        transform.collect();
        List collect = new IDF(2).fit(transform).transform(transform).collect();
        int indexOf = hashingTF.indexOf("this");
        Iterator it = collect.iterator();
        while (it.hasNext()) {
            Assertions.assertEquals(0.0d, ((Vector) it.next()).apply(indexOf), 1.0E-15d);
        }
    }
}
