case class GpuOutOfCoreSortIterator(iter: Iterator[ColumnarBatch], sorter: GpuSorter, targetSize: Long, opTime: GpuMetric, sortTime: GpuMetric, outputBatches: GpuMetric, outputRows: GpuMetric) extends Iterator[ColumnarBatch] with AutoCloseable with Product with Serializable
Sorts incoming batches of data spilling if needed.
The algorithm for this is a modified version of an external merge sort with multiple passes for
large data.
https://en.wikipedia.org/wiki/External_sorting#External_merge_sort
The main difference is that we cannot stream the data when doing a merge sort. So, we instead
divide the data into batches that are small enough that we can do a merge sort on N batches
and still fit the output within the target batch size. When merging batches instead of
individual rows we cannot assume that all of the resulting data is globally sorted. Hopefully,
most of it is globally sorted but we have to use the first row from the next pending batch to
determine the cutoff point between globally sorted data and data that still needs to be merged
with other batches. The globally sorted portion is put into a sorted queue while the rest of
the merged data is split and put back into a pending queue. The process repeats until we have
enough data to output.
- Alphabetic
- By Inheritance
- GpuOutOfCoreSortIterator
- Serializable
- Serializable
- Product
- Equals
- AutoCloseable
- Iterator
- TraversableOnce
- GenTraversableOnce
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
Type Members
-
class
GroupedIterator[B >: A] extends AbstractIterator[Seq[B]] with Iterator[Seq[B]]
- Definition Classes
- Iterator
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
def
++[B >: ColumnarBatch](that: ⇒ GenTraversableOnce[B]): Iterator[B]
- Definition Classes
- Iterator
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
addString(b: StringBuilder): StringBuilder
- Definition Classes
- TraversableOnce
-
def
addString(b: StringBuilder, sep: String): StringBuilder
- Definition Classes
- TraversableOnce
-
def
addString(b: StringBuilder, start: String, sep: String, end: String): StringBuilder
- Definition Classes
- TraversableOnce
-
def
aggregate[B](z: ⇒ B)(seqop: (B, ColumnarBatch) ⇒ B, combop: (B, B) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
val
alreadySortedIter: Iterator[SpillableColumnarBatch]
This has already sorted the data, and it still has the projected columns in it that need to be removed before it is returned.
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
buffered: BufferedIterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
close(): Unit
- Definition Classes
- GpuOutOfCoreSortIterator → AutoCloseable
-
def
collect[B](pf: PartialFunction[ColumnarBatch, B]): Iterator[B]
- Definition Classes
- Iterator
- Annotations
- @migration
- Migration
(Changed in version 2.8.0)
collecthas changed. The previous behavior can be reproduced withtoSeq.
-
def
collectFirst[B](pf: PartialFunction[ColumnarBatch, B]): Option[B]
- Definition Classes
- TraversableOnce
-
def
contains(elem: Any): Boolean
- Definition Classes
- Iterator
-
def
copyToArray[B >: ColumnarBatch](xs: Array[B], start: Int, len: Int): Unit
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
copyToArray[B >: ColumnarBatch](xs: Array[B]): Unit
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
copyToArray[B >: ColumnarBatch](xs: Array[B], start: Int): Unit
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
copyToBuffer[B >: ColumnarBatch](dest: Buffer[B]): Unit
- Definition Classes
- TraversableOnce
-
def
corresponds[B](that: GenTraversableOnce[B])(p: (ColumnarBatch, B) ⇒ Boolean): Boolean
- Definition Classes
- Iterator
-
def
count(p: (ColumnarBatch) ⇒ Boolean): Int
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
drop(n: Int): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
dropWhile(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
duplicate: (Iterator[ColumnarBatch], Iterator[ColumnarBatch])
- Definition Classes
- Iterator
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
exists(p: (ColumnarBatch) ⇒ Boolean): Boolean
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
filter(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
filterNot(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
find(p: (ColumnarBatch) ⇒ Boolean): Option[ColumnarBatch]
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
flatMap[B](f: (ColumnarBatch) ⇒ GenTraversableOnce[B]): Iterator[B]
- Definition Classes
- Iterator
-
def
fold[A1 >: ColumnarBatch](z: A1)(op: (A1, A1) ⇒ A1): A1
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
foldLeft[B](z: B)(op: (B, ColumnarBatch) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
foldRight[B](z: B)(op: (ColumnarBatch, B) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
forall(p: (ColumnarBatch) ⇒ Boolean): Boolean
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
foreach[U](f: (ColumnarBatch) ⇒ U): Unit
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
grouped[B >: ColumnarBatch](size: Int): GroupedIterator[B]
- Definition Classes
- Iterator
-
def
hasDefiniteSize: Boolean
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
hasNext: Boolean
- Definition Classes
- GpuOutOfCoreSortIterator → Iterator
-
def
indexOf[B >: ColumnarBatch](elem: B, from: Int): Int
- Definition Classes
- Iterator
-
def
indexOf[B >: ColumnarBatch](elem: B): Int
- Definition Classes
- Iterator
-
def
indexWhere(p: (ColumnarBatch) ⇒ Boolean, from: Int): Int
- Definition Classes
- Iterator
-
def
indexWhere(p: (ColumnarBatch) ⇒ Boolean): Int
- Definition Classes
- Iterator
-
def
isEmpty: Boolean
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isTraversableAgain: Boolean
- Definition Classes
- Iterator → GenTraversableOnce
- val iter: Iterator[ColumnarBatch]
-
def
length: Int
- Definition Classes
- Iterator
-
def
map[B](f: (ColumnarBatch) ⇒ B): Iterator[B]
- Definition Classes
- Iterator
-
def
max[B >: ColumnarBatch](implicit cmp: Ordering[B]): ColumnarBatch
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
maxBy[B](f: (ColumnarBatch) ⇒ B)(implicit cmp: Ordering[B]): ColumnarBatch
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
min[B >: ColumnarBatch](implicit cmp: Ordering[B]): ColumnarBatch
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
minBy[B](f: (ColumnarBatch) ⇒ B)(implicit cmp: Ordering[B]): ColumnarBatch
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
mkString: String
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
mkString(sep: String): String
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
mkString(start: String, sep: String, end: String): String
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
next(): ColumnarBatch
- Definition Classes
- GpuOutOfCoreSortIterator → Iterator
-
def
nonEmpty: Boolean
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
onConcatOutput(): Unit
- Attributes
- protected
-
def
onFirstPassSplit(): Unit
Callbacks designed for unit tests only.
Callbacks designed for unit tests only. Don't do any heavy things inside.
- Attributes
- protected
-
def
onMergeSortSplit(): Unit
- Attributes
- protected
- val opTime: GpuMetric
- val outputBatches: GpuMetric
- val outputRows: GpuMetric
-
def
padTo[A1 >: ColumnarBatch](len: Int, elem: A1): Iterator[A1]
- Definition Classes
- Iterator
-
def
partition(p: (ColumnarBatch) ⇒ Boolean): (Iterator[ColumnarBatch], Iterator[ColumnarBatch])
- Definition Classes
- Iterator
-
def
patch[B >: ColumnarBatch](from: Int, patchElems: Iterator[B], replaced: Int): Iterator[B]
- Definition Classes
- Iterator
-
def
product[B >: ColumnarBatch](implicit num: Numeric[B]): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduce[A1 >: ColumnarBatch](op: (A1, A1) ⇒ A1): A1
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduceLeft[B >: ColumnarBatch](op: (B, ColumnarBatch) ⇒ B): B
- Definition Classes
- TraversableOnce
-
def
reduceLeftOption[B >: ColumnarBatch](op: (B, ColumnarBatch) ⇒ B): Option[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduceOption[A1 >: ColumnarBatch](op: (A1, A1) ⇒ A1): Option[A1]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduceRight[B >: ColumnarBatch](op: (ColumnarBatch, B) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduceRightOption[B >: ColumnarBatch](op: (ColumnarBatch, B) ⇒ B): Option[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reversed: List[ColumnarBatch]
- Attributes
- protected[this]
- Definition Classes
- TraversableOnce
-
def
sameElements(that: Iterator[_]): Boolean
- Definition Classes
- Iterator
-
def
scanLeft[B](z: B)(op: (B, ColumnarBatch) ⇒ B): Iterator[B]
- Definition Classes
- Iterator
-
def
scanRight[B](z: B)(op: (ColumnarBatch, B) ⇒ B): Iterator[B]
- Definition Classes
- Iterator
-
def
seq: Iterator[ColumnarBatch]
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
size: Int
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
sizeHintIfCheap: Int
- Attributes
- protected[collection]
- Definition Classes
- GenTraversableOnce
-
def
slice(from: Int, until: Int): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
sliceIterator(from: Int, until: Int): Iterator[ColumnarBatch]
- Attributes
- protected
- Definition Classes
- Iterator
-
def
sliding[B >: ColumnarBatch](size: Int, step: Int): GroupedIterator[B]
- Definition Classes
- Iterator
- val sortTime: GpuMetric
- val sorter: GpuSorter
-
def
span(p: (ColumnarBatch) ⇒ Boolean): (Iterator[ColumnarBatch], Iterator[ColumnarBatch])
- Definition Classes
- Iterator
-
def
sum[B >: ColumnarBatch](implicit num: Numeric[B]): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
take(n: Int): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
takeWhile(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
- val targetSize: Long
-
def
to[Col[_]](implicit cbf: CanBuildFrom[Nothing, ColumnarBatch, Col[ColumnarBatch]]): Col[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toArray[B >: ColumnarBatch](implicit arg0: ClassTag[B]): Array[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toBuffer[B >: ColumnarBatch]: Buffer[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toIndexedSeq: IndexedSeq[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toIterable: Iterable[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toIterator: Iterator[ColumnarBatch]
- Definition Classes
- Iterator → GenTraversableOnce
-
def
toList: List[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toMap[T, U](implicit ev: <:<[ColumnarBatch, (T, U)]): Map[T, U]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toSeq: Seq[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toSet[B >: ColumnarBatch]: Set[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toStream: Stream[ColumnarBatch]
- Definition Classes
- Iterator → GenTraversableOnce
-
def
toString(): String
- Definition Classes
- Iterator → AnyRef → Any
-
def
toTraversable: Traversable[ColumnarBatch]
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
toVector: Vector[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
withFilter(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
zip[B](that: Iterator[B]): Iterator[(ColumnarBatch, B)]
- Definition Classes
- Iterator
-
def
zipAll[B, A1 >: ColumnarBatch, B1 >: B](that: Iterator[B], thisElem: A1, thatElem: B1): Iterator[(A1, B1)]
- Definition Classes
- Iterator
-
def
zipWithIndex: Iterator[(ColumnarBatch, Int)]
- Definition Classes
- Iterator
Deprecated Value Members
-
def
/:[B](z: B)(op: (B, ColumnarBatch) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
- Annotations
- @deprecated
- Deprecated
(Since version 2.12.10) Use foldLeft instead of /:
-
def
:\[B](z: B)(op: (ColumnarBatch, B) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
- Annotations
- @deprecated
- Deprecated
(Since version 2.12.10) Use foldRight instead of :\