case class SparkSubFeed(dataFrame: Option[DataFrame], dataObjectId: DataObjectId, partitionValues: Seq[PartitionValues], isDAGStart: Boolean = false, isSkipped: Boolean = false, isDummy: Boolean = false, filter: Option[String] = None) extends SubFeed with Product with Serializable
A SparkSubFeed is used to transport DataFrame's between Actions.
- dataFrame
Spark DataFrame to be processed. DataFrame should not be saved to state (@transient).
- dataObjectId
id of the DataObject this SubFeed corresponds to
- partitionValues
Values of Partitions transported by this SubFeed
- isDAGStart
true if this subfeed is a start node of the dag
- isSkipped
true if this subfeed is the result of a skipped action
- isDummy
true if this subfeed only contains a dummy DataFrame. Dummy DataFrames can be used for validating the lineage in init phase, but not for the exec phase.
- filter
a spark sql filter expression. This is used by SparkIncrementalMode.
- Annotations
- @Scaladoc()
- Alphabetic
- By Inheritance
- SparkSubFeed
- Serializable
- Serializable
- Product
- Equals
- SubFeed
- SmartDataLakeLogger
- DAGResult
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
SparkSubFeed(dataFrame: Option[DataFrame], dataObjectId: DataObjectId, partitionValues: Seq[PartitionValues], isDAGStart: Boolean = false, isSkipped: Boolean = false, isDummy: Boolean = false, filter: Option[String] = None)
- dataFrame
Spark DataFrame to be processed. DataFrame should not be saved to state (@transient).
- dataObjectId
id of the DataObject this SubFeed corresponds to
- partitionValues
Values of Partitions transported by this SubFeed
- isDAGStart
true if this subfeed is a start node of the dag
- isSkipped
true if this subfeed is the result of a skipped action
- isDummy
true if this subfeed only contains a dummy DataFrame. Dummy DataFrames can be used for validating the lineage in init phase, but not for the exec phase.
- filter
a spark sql filter expression. This is used by SparkIncrementalMode.
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
applyExecutionModeResultForInput(result: ExecutionModeResult, mainInputId: DataObjectId)(implicit context: ActionPipelineContext): SparkSubFeed
- Definition Classes
- SparkSubFeed → SubFeed
-
def
applyExecutionModeResultForOutput(result: ExecutionModeResult)(implicit context: ActionPipelineContext): SparkSubFeed
- Definition Classes
- SparkSubFeed → SubFeed
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
breakLineage(implicit context: ActionPipelineContext): SparkSubFeed
Break lineage.
Break lineage. This means to discard an existing DataFrame or List of FileRefs, so that it is requested again from the DataObject. On one side this is usable to break long DataFrame Lineages over multiple Actions and instead reread the data from an intermediate table. On the other side it is needed if partition values or filter condition are changed.
- Definition Classes
- SparkSubFeed → SubFeed
-
def
clearDAGStart(): SparkSubFeed
- Definition Classes
- SparkSubFeed → SubFeed
- def clearFilter(breakLineageOnChange: Boolean = true)(implicit context: ActionPipelineContext): SparkSubFeed
-
def
clearPartitionValues(breakLineageOnChange: Boolean = true)(implicit context: ActionPipelineContext): SparkSubFeed
- Definition Classes
- SparkSubFeed → SubFeed
-
def
clearSkipped(): SparkSubFeed
- Definition Classes
- SparkSubFeed → SubFeed
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native() @HotSpotIntrinsicCandidate()
- val dataFrame: Option[DataFrame]
-
val
dataObjectId: DataObjectId
- Definition Classes
- SparkSubFeed → SubFeed
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- val filter: Option[String]
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
- def getFilterCol: Option[Column]
- def hasReusableDataFrame: Boolean
-
val
isDAGStart: Boolean
- Definition Classes
- SparkSubFeed → SubFeed
- val isDummy: Boolean
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
val
isSkipped: Boolean
- Definition Classes
- SparkSubFeed → SubFeed
- def isStreaming: Option[Boolean]
-
lazy val
logger: Logger
- Attributes
- protected
- Definition Classes
- SmartDataLakeLogger
- Annotations
- @transient()
- def movePartitionColumnsLast(partitions: Seq[String]): SparkSubFeed
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
val
partitionValues: Seq[PartitionValues]
- Definition Classes
- SparkSubFeed → SubFeed
- def persist: SparkSubFeed
-
def
resultId: String
- Definition Classes
- SubFeed → DAGResult
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toOutput(dataObjectId: DataObjectId): SparkSubFeed
- Definition Classes
- SparkSubFeed → SubFeed
-
def
union(other: SubFeed)(implicit context: ActionPipelineContext): SubFeed
- Definition Classes
- SparkSubFeed → SubFeed
-
def
unionPartitionValues(otherPartitionValues: Seq[PartitionValues]): Seq[PartitionValues]
- Definition Classes
- SubFeed
-
def
updatePartitionValues(partitions: Seq[String], breakLineageOnChange: Boolean = true, newPartitionValues: Option[Seq[PartitionValues]] = None)(implicit context: ActionPipelineContext): SparkSubFeed
- Definition Classes
- SparkSubFeed → SubFeed
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
Deprecated Value Members
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] ) @Deprecated
- Deprecated