case class SparkSubFeed(dataFrame: Option[DataFrame], dataObjectId: DataObjectId, partitionValues: Seq[PartitionValues], isDAGStart: Boolean = false, isSkipped: Boolean = false, isDummy: Boolean = false, filter: Option[String] = None) extends SubFeed with Product with Serializable

A SparkSubFeed is used to transport DataFrame's between Actions.

dataFrame

Spark DataFrame to be processed. DataFrame should not be saved to state (@transient).

dataObjectId

id of the DataObject this SubFeed corresponds to

partitionValues

Values of Partitions transported by this SubFeed

isDAGStart

true if this subfeed is a start node of the dag

isSkipped

true if this subfeed is the result of a skipped action

isDummy

true if this subfeed only contains a dummy DataFrame. Dummy DataFrames can be used for validating the lineage in init phase, but not for the exec phase.

filter

a spark sql filter expression. This is used by SparkIncrementalMode.

Annotations
@Scaladoc()
Linear Supertypes
Serializable, Serializable, Product, Equals, SubFeed, SmartDataLakeLogger, DAGResult, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. SparkSubFeed
  2. Serializable
  3. Serializable
  4. Product
  5. Equals
  6. SubFeed
  7. SmartDataLakeLogger
  8. DAGResult
  9. AnyRef
  10. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new SparkSubFeed(dataFrame: Option[DataFrame], dataObjectId: DataObjectId, partitionValues: Seq[PartitionValues], isDAGStart: Boolean = false, isSkipped: Boolean = false, isDummy: Boolean = false, filter: Option[String] = None)

    dataFrame

    Spark DataFrame to be processed. DataFrame should not be saved to state (@transient).

    dataObjectId

    id of the DataObject this SubFeed corresponds to

    partitionValues

    Values of Partitions transported by this SubFeed

    isDAGStart

    true if this subfeed is a start node of the dag

    isSkipped

    true if this subfeed is the result of a skipped action

    isDummy

    true if this subfeed only contains a dummy DataFrame. Dummy DataFrames can be used for validating the lineage in init phase, but not for the exec phase.

    filter

    a spark sql filter expression. This is used by SparkIncrementalMode.

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. def applyExecutionModeResultForInput(result: ExecutionModeResult, mainInputId: DataObjectId)(implicit context: ActionPipelineContext): SparkSubFeed
    Definition Classes
    SparkSubFeedSubFeed
  5. def applyExecutionModeResultForOutput(result: ExecutionModeResult)(implicit context: ActionPipelineContext): SparkSubFeed
    Definition Classes
    SparkSubFeedSubFeed
  6. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  7. def breakLineage(implicit context: ActionPipelineContext): SparkSubFeed

    Break lineage.

    Break lineage. This means to discard an existing DataFrame or List of FileRefs, so that it is requested again from the DataObject. On one side this is usable to break long DataFrame Lineages over multiple Actions and instead reread the data from an intermediate table. On the other side it is needed if partition values or filter condition are changed.

    Definition Classes
    SparkSubFeedSubFeed
  8. def clearDAGStart(): SparkSubFeed
    Definition Classes
    SparkSubFeedSubFeed
  9. def clearFilter(breakLineageOnChange: Boolean = true)(implicit context: ActionPipelineContext): SparkSubFeed
  10. def clearPartitionValues(breakLineageOnChange: Boolean = true)(implicit context: ActionPipelineContext): SparkSubFeed
    Definition Classes
    SparkSubFeedSubFeed
  11. def clearSkipped(): SparkSubFeed
    Definition Classes
    SparkSubFeedSubFeed
  12. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native() @HotSpotIntrinsicCandidate()
  13. val dataFrame: Option[DataFrame]
  14. val dataObjectId: DataObjectId
    Definition Classes
    SparkSubFeedSubFeed
  15. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  16. val filter: Option[String]
  17. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  18. def getFilterCol: Option[Column]
  19. def hasReusableDataFrame: Boolean
  20. val isDAGStart: Boolean
    Definition Classes
    SparkSubFeedSubFeed
  21. val isDummy: Boolean
  22. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  23. val isSkipped: Boolean
    Definition Classes
    SparkSubFeedSubFeed
  24. def isStreaming: Option[Boolean]
  25. lazy val logger: Logger
    Attributes
    protected
    Definition Classes
    SmartDataLakeLogger
    Annotations
    @transient()
  26. def movePartitionColumnsLast(partitions: Seq[String]): SparkSubFeed
  27. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  28. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  29. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  30. val partitionValues: Seq[PartitionValues]
    Definition Classes
    SparkSubFeedSubFeed
  31. def persist: SparkSubFeed
  32. def resultId: String
    Definition Classes
    SubFeed → DAGResult
  33. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  34. def toOutput(dataObjectId: DataObjectId): SparkSubFeed
    Definition Classes
    SparkSubFeedSubFeed
  35. def union(other: SubFeed)(implicit context: ActionPipelineContext): SubFeed
    Definition Classes
    SparkSubFeedSubFeed
  36. def unionPartitionValues(otherPartitionValues: Seq[PartitionValues]): Seq[PartitionValues]
    Definition Classes
    SubFeed
  37. def updatePartitionValues(partitions: Seq[String], breakLineageOnChange: Boolean = true, newPartitionValues: Option[Seq[PartitionValues]] = None)(implicit context: ActionPipelineContext): SparkSubFeed
    Definition Classes
    SparkSubFeedSubFeed
  38. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  39. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  40. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] ) @Deprecated
    Deprecated

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from SubFeed

Inherited from SmartDataLakeLogger

Inherited from DAGResult

Inherited from AnyRef

Inherited from Any

Ungrouped