Packages

o

com.nvidia.spark.rapids

SamplingUtils

object SamplingUtils

Linear Supertypes
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. SamplingUtils
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  5. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  6. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  7. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  8. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  9. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  10. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  11. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  12. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  13. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  14. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  15. def randomResample(input: Iterator[ColumnarBatch], fraction: Double, sorter: GpuSorter, converter: (Iterator[ColumnarBatch]) ⇒ Iterator[InternalRow], seed: Long = Random.nextLong()): Array[InternalRow]

    Random sampling without replacement.

    Random sampling without replacement.

    input

    iterator to feed batches for sampling.

    fraction

    the percentage of rows to randomly select

    sorter

    used to add rows needed for sorting on the CPU later. The sorter should be setup for the schema of the input data and the output sampled rows will have any needed rows added to them as the sorter needs to.

    converter

    used to convert a batch of data to rows. This should have been setup to convert to rows based of the expected output for the sorter.

    seed

    the seed to the random number generator

    returns

    the sampled rows

  16. def reservoirSampleAndCount(input: Iterator[ColumnarBatch], k: Int, sorter: GpuSorter, converter: (Iterator[ColumnarBatch]) ⇒ Iterator[InternalRow], seed: Long = Random.nextLong()): (Array[InternalRow], Long)

    Reservoir sampling implementation that also returns the input size.

    Reservoir sampling implementation that also returns the input size.

    input

    iterator to feed batches for sampling.

    k

    the number of rows to randomly select.

    sorter

    used to add rows needed for sorting on the CPU later. The sorter should be setup for the schema of the input data and the output sampled rows will have any needed rows added to them as the sorter needs to.

    converter

    used to convert a batch of data to rows. This should have been setup to convert to rows based of the expected output for the sorter.

    seed

    the seed to the random number generator

    returns

    (samples, input size)

  17. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  18. def toString(): String
    Definition Classes
    AnyRef → Any
  19. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  20. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  21. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from AnyRef

Inherited from Any

Ungrouped