implicit class StorageActionImplicits extends AnyRef
- Alphabetic
- By Inheritance
- StorageActionImplicits
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new StorageActionImplicits(sparkDataFlow: SparkDataFlow)
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
getAuditTable(storageBasePath: String, labelPrefix: Option[String] = Some("audittable"), includeHot: Boolean = true)(tableNames: String*): SparkDataFlow
Opens a storage layer table and adds the AuditTable object to the flow with a given label.
Opens a storage layer table and adds the AuditTable object to the flow with a given label. This can then be used with the writeToStorage action. Fails if the table does not exist in the storage layer.
- storageBasePath
the base path of the storage layer
- labelPrefix
optionally prefix the output label for the AuditTable. If set, the label of the AuditTable will be
s"${labelPrefix}_$table"- includeHot
whether or not to include hot partitions in the read
- tableNames
the tables we want to open in the storage layer
- returns
a new SparkDataFlow with the get action added
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getOrCreateAuditTable(storageBasePath: String, metadataRetrieval: Option[(String) ⇒ AuditTableInfo] = None, labelPrefix: Option[String] = Some("audittable"), includeHot: Boolean = true, updateTableMetadata: ⇒ Boolean = ...)(tableNames: String*): SparkDataFlow
Opens or creates a storage layer table and adds the AuditTable object to the flow with a given label.
Opens or creates a storage layer table and adds the AuditTable object to the flow with a given label. This can then be used with the writeToStorage action. Creates a table if it does not already exist in the storage layer and the optional
metadataRetrievalfunction is given. Fails if the table does not exist in the storage layer and the optionalmetadataRetrievalfunction is not given.- storageBasePath
the base path of the storage layer
- metadataRetrieval
an optional function that generates table metadata from a table name. This function is used during table creation if a table does not exist in the storage layer or to update the metadata if updateTableMetadata is set to true
- labelPrefix
optionally prefix the output label for the AuditTable. If set, the label of the AuditTable will be
s"${labelPrefix}_$table"- includeHot
whether or not to include hot partitions in the read
- updateTableMetadata
whether or not to update the table metadata. Uses spark.waimak.storage.updateMetadata by default (which defaults to false)
- tableNames
the tables we want to open in the storage layer
- returns
a new SparkDataFlow with the get action added
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
loadFromStorage(storageBasePath: String, from: Option[Timestamp] = None, to: Option[Timestamp] = None, includeHot: Boolean = true, outputPrefix: Option[String] = None)(tables: String*): SparkDataFlow
Load everything between two timestamps for the given tables
Load everything between two timestamps for the given tables
NB; this will not give you a snapshot of the tables at a given time, it will give you the entire history of events which have occurred between the provided dates for each table. To get a snapshot, use snapshotFromStorage
- storageBasePath
the base path of the storage layer
- from
Optionally, the lower bound last updated timestamp (if undefined, it will read from the beginning of time)
- to
Optionally, the upper bound last updated timestamp (if undefined, it will read up until the most recent events)
- tables
the tables to load
- returns
a new SparkDataFlow with the read actions added
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
snapshotFromStorage(storageBasePath: String, snapshotTimestamp: Timestamp, includeHot: Boolean = true, outputPrefix: Option[String] = None)(tables: String*): SparkDataFlow
Get a snapshot of tables in the storage layer for a given timestamp
Get a snapshot of tables in the storage layer for a given timestamp
- storageBasePath
the base path of the storage layer
- snapshotTimestamp
the snapshot timestamp
- includeHot
whether or not to include hot partitions in the read
- outputPrefix
optionally prefix the output label for the Dataset. If set, the label of the snapshot Dataset will be
s"${outputPrefix}_$table"- tables
the tables we want to snapshot
- returns
a new SparkDataFlow with the snapshot actions added
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
writeToStorage(labelName: String, lastUpdatedCol: String, appendDateTime: ZonedDateTime, doCompaction: CompactionDecision = (_, _, _) => false, auditTableLabelPrefix: String = "audittable"): SparkDataFlow
Writes a Dataset to the storage layer.
Writes a Dataset to the storage layer. The table must have been already opened on the flow by using either the getOrCreateAuditTable or getAuditTable actions.
- labelName
the label whose Dataset we wish to write
- lastUpdatedCol
the last updated column in the Dataset
- appendDateTime
timestamp of the append, zoned to a timezone
- doCompaction
a lambda used to decide whether a compaction should happen after an append. Takes list of table regions, the count of records added in this batch and the compaction zoned date time. Default is not to trigger a compaction.
- auditTableLabelPrefix
the prefix of the audit table entity on the flow. The AuditTable will be found with
s"${auditTableLabelPrefix}_$labelName"- returns
a new SparkDataFlow with the write action added