case class CustomFileDataObject(id: DataObjectId, creator: CustomFileCreatorConfig, metadata: Option[DataObjectMetadata] = None)(implicit instanceRegistry: InstanceRegistry) extends DataObject with FileRefDataObject with CanCreateInputStream with Product with Serializable
- Alphabetic
- By Inheritance
- CustomFileDataObject
- Serializable
- Serializable
- Product
- Equals
- CanCreateInputStream
- FileRefDataObject
- FileDataObject
- CanHandlePartitions
- DataObject
- AtlasExportable
- SmartDataLakeLogger
- ParsableFromConfig
- SdlConfigObject
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new CustomFileDataObject(id: DataObjectId, creator: CustomFileCreatorConfig, metadata: Option[DataObjectMetadata] = None)(implicit instanceRegistry: InstanceRegistry)
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
atlasName: String
- Definition Classes
- DataObject → AtlasExportable
-
def
atlasQualifiedName(prefix: String): String
- Definition Classes
- AtlasExportable
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native() @HotSpotIntrinsicCandidate()
-
def
createInputStream(path: String)(implicit context: ActionPipelineContext): InputStream
- Definition Classes
- CustomFileDataObject → CanCreateInputStream
- val creator: CustomFileCreatorConfig
-
def
deleteAll(implicit context: ActionPipelineContext): Unit
Delete all data.
Delete all data. This is used to implement SaveMode.Overwrite.
- Definition Classes
- FileRefDataObject
- Annotations
- @Scaladoc()
-
def
deleteFileRefs(fileRefs: Seq[FileRef])(implicit context: ActionPipelineContext): Unit
Delete given files.
Delete given files. This is used to cleanup files after they are processed.
- Definition Classes
- FileRefDataObject
- Annotations
- @Scaladoc()
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
expectedPartitionsCondition: Option[String]
Definition of partitions that are expected to exists.
Definition of partitions that are expected to exists. This is used to validate that partitions being read exists and don't return no data. Define a Spark SQL expression that is evaluated against a PartitionValues instance and returns true or false example: "elements['yourColName'] > 2017"
- returns
true if partition is expected to exist.
- Definition Classes
- CustomFileDataObject → CanHandlePartitions
-
def
extractPartitionValuesFromPath(filePath: String)(implicit context: ActionPipelineContext): PartitionValues
Extract partition values from a given file path
Extract partition values from a given file path
- Attributes
- protected
- Definition Classes
- FileRefDataObject
- Annotations
- @Scaladoc()
-
def
factory: FromConfigFactory[DataObject]
Returns the factory that can parse this type (that is, type
CO).Returns the factory that can parse this type (that is, type
CO).Typically, implementations of this method should return the companion object of the implementing class. The companion object in turn should implement FromConfigFactory.
- returns
the factory (object) for this class.
- Definition Classes
- CustomFileDataObject → ParsableFromConfig
-
val
fileName: String
Definition of fileName.
Definition of fileName. Default is an asterix to match everything. This is concatenated with the partition layout to search for files.
- Definition Classes
- FileRefDataObject
- Annotations
- @Scaladoc()
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
def
getConnection[T <: Connection](connectionId: ConnectionId)(implicit registry: InstanceRegistry, ct: ClassTag[T], tt: scala.reflect.api.JavaUniverse.TypeTag[T]): T
Handle class cast exception when getting objects from instance registry
Handle class cast exception when getting objects from instance registry
- Attributes
- protected
- Definition Classes
- DataObject
- Annotations
- @Scaladoc()
-
def
getConnectionReg[T <: Connection](connectionId: ConnectionId, registry: InstanceRegistry)(implicit ct: ClassTag[T], tt: scala.reflect.api.JavaUniverse.TypeTag[T]): T
- Attributes
- protected
- Definition Classes
- DataObject
-
def
getFileRefs(partitionValues: Seq[PartitionValues])(implicit context: ActionPipelineContext): Seq[FileRef]
List files for given partition values
List files for given partition values
- partitionValues
List of partition values to be filtered. If empty all files in root path of DataObject will be listed.
- returns
List of FileRefs
- Definition Classes
- CustomFileDataObject → FileRefDataObject
-
def
getPartitionString(partitionValues: PartitionValues)(implicit context: ActionPipelineContext): Option[String]
get partition values formatted by partition layout
get partition values formatted by partition layout
- Definition Classes
- FileRefDataObject
- Annotations
- @Scaladoc()
-
def
getPath(implicit context: ActionPipelineContext): String
Method for subclasses to override the base path for this DataObject.
Method for subclasses to override the base path for this DataObject. This is for instance needed if pathPrefix is defined in a connection.
- Definition Classes
- FileRefDataObject
- Annotations
- @Scaladoc()
-
def
getSearchPaths(partitionValues: Seq[PartitionValues])(implicit context: ActionPipelineContext): Seq[(PartitionValues, String)]
prepare paths to be searched
prepare paths to be searched
- Attributes
- protected
- Definition Classes
- FileRefDataObject
- Annotations
- @Scaladoc()
-
def
housekeepingMode: Option[HousekeepingMode]
Configure a housekeeping mode to e.g cleanup, archive and compact partitions.
Configure a housekeeping mode to e.g cleanup, archive and compact partitions. Default is None.
- Definition Classes
- DataObject
- Annotations
- @Scaladoc()
-
val
id: DataObjectId
A unique identifier for this instance.
A unique identifier for this instance.
- Definition Classes
- CustomFileDataObject → DataObject → SdlConfigObject
- implicit val instanceRegistry: InstanceRegistry
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
listPartitions(implicit context: ActionPipelineContext): Seq[PartitionValues]
list partition values
list partition values
- Definition Classes
- CustomFileDataObject → CanHandlePartitions
-
lazy val
logger: Logger
- Attributes
- protected
- Definition Classes
- SmartDataLakeLogger
- Annotations
- @transient()
-
val
metadata: Option[DataObjectMetadata]
Additional metadata for the DataObject
Additional metadata for the DataObject
- Definition Classes
- CustomFileDataObject → DataObject
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
def
partitionLayout(): Option[String]
Definition of partition layout use %<partitionColName>% as placeholder and * for globs in layout Note: if you have globs in partition layout, it's not possible to write files to this DataObject Note: if this is a directory, you must add a final backslash to the partition layout
Definition of partition layout use %<partitionColName>% as placeholder and * for globs in layout Note: if you have globs in partition layout, it's not possible to write files to this DataObject Note: if this is a directory, you must add a final backslash to the partition layout
- Definition Classes
- CustomFileDataObject → FileRefDataObject
-
def
partitions: Seq[String]
Definition of partition columns
Definition of partition columns
- Definition Classes
- CustomFileDataObject → CanHandlePartitions
-
def
path: String
The root path of the files that are handled by this DataObject.
The root path of the files that are handled by this DataObject.
- Definition Classes
- CustomFileDataObject → FileDataObject
-
def
prepare(implicit context: ActionPipelineContext): Unit
Prepare & test DataObject's prerequisits
Prepare & test DataObject's prerequisits
This runs during the "prepare" operation of the DAG.
- Definition Classes
- FileDataObject → DataObject
-
def
relativizePath(filePath: String)(implicit context: ActionPipelineContext): String
Make a given path relative to this DataObjects base path
Make a given path relative to this DataObjects base path
- Definition Classes
- CustomFileDataObject → FileDataObject
-
def
saveMode: SDLSaveMode
Overwrite or Append new data.
Overwrite or Append new data. When writing partitioned data, this applies only to partitions concerned.
- Definition Classes
- CustomFileDataObject → FileRefDataObject
-
val
separator: Char
default separator for paths
default separator for paths
- Attributes
- protected
- Definition Classes
- FileDataObject
- Annotations
- @Scaladoc()
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toStringShort: String
- Definition Classes
- DataObject
-
def
translateFileRefs(fileRefs: Seq[FileRef])(implicit context: ActionPipelineContext): Seq[FileRefMapping]
Given some FileRef for another DataObject, translate the paths to the root path of this DataObject
Given some FileRef for another DataObject, translate the paths to the root path of this DataObject
- Definition Classes
- FileRefDataObject
- Annotations
- @Scaladoc()
-
def
validateSchemaHasPartitionCols(df: DataFrame, role: String): Unit
Validate the schema of a given Spark Data Frame
dfthat it contains the specified partition columnsValidate the schema of a given Spark Data Frame
dfthat it contains the specified partition columns- df
The data frame to validate.
- role
role used in exception message. Set to read or write.
- Definition Classes
- CanHandlePartitions
- Annotations
- @Scaladoc()
- Exceptions thrown
SchemaViolationExceptionif the partitions columns are not included.
-
def
validateSchemaHasPrimaryKeyCols(df: DataFrame, primaryKeyCols: Seq[String], role: String): Unit
Validate the schema of a given Spark Data Frame
dfthat it contains the specified primary key columnsValidate the schema of a given Spark Data Frame
dfthat it contains the specified primary key columns- df
The data frame to validate.
- role
role used in exception message. Set to read or write.
- Definition Classes
- CanHandlePartitions
- Annotations
- @Scaladoc()
- Exceptions thrown
SchemaViolationExceptionif the partitions columns are not included.
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
Deprecated Value Members
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] ) @Deprecated
- Deprecated