case class Variants(rc: RuntimeETLContext, participantId: Column = col("participant_id"), affectedStatus: Column = col("affected_status"), filterSnv: Option[Column] = Some(col("has_alt")), snvDatasetId: String, splits: Seq[OccurrenceSplit], extraAggregations: Seq[Column] = Nil, checkpoint: Boolean = false, spliceAi: Boolean = true, destinationDataSetId: String = "enriched_variants") extends SimpleSingleETL with Product with Serializable
This ETL create an aggregated table on occurrences of SNV variants. Occurrences are aggregated by calculating the frequencies specified in parameter frequencies. The table is enriched with information from other datasets such as genes, dbsnp, clinvar, 1000 genomes, topmed_bravo, gnomad_genomes_v2, gnomad_exomes_v2, gnomad_genomes_v3.
- rc
the etl context
- participantId
column used to distinct participants in order to calculate total number of participants (pn) and total allele number (an)
- affectedStatus
column used to calculate frequencies for affected / unaffected participants
- snvDatasetId
the id of the dataset containing the SNV variants
- extraAggregations
extra aggregations to be computed when grouping occurrences by locus. Will be added to the root of the data
- spliceAi
bool indicating whether or not to join variants with SpliceAI. Defaults to true.
- Alphabetic
- By Inheritance
- Variants
- Serializable
- Serializable
- Product
- Equals
- SingleETL
- ETL
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
Variants(rc: RuntimeETLContext, participantId: Column = col("participant_id"), affectedStatus: Column = col("affected_status"), filterSnv: Option[Column] = Some(col("has_alt")), snvDatasetId: String, splits: Seq[OccurrenceSplit], extraAggregations: Seq[Column] = Nil, checkpoint: Boolean = false, spliceAi: Boolean = true, destinationDataSetId: String = "enriched_variants")
- rc
the etl context
- participantId
column used to distinct participants in order to calculate total number of participants (pn) and total allele number (an)
- affectedStatus
column used to calculate frequencies for affected / unaffected participants
- snvDatasetId
the id of the dataset containing the SNV variants
- extraAggregations
extra aggregations to be computed when grouping occurrences by locus. Will be added to the root of the data
- spliceAi
bool indicating whether or not to join variants with SpliceAI. Defaults to true.
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- val affectedStatus: Column
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- val checkpoint: Boolean
-
val
clinvar: DatasetConf
- Attributes
- protected
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native() @HotSpotIntrinsicCandidate()
-
implicit
val
conf: Configuration
- Definition Classes
- ETL
-
val
cosmic: DatasetConf
- Attributes
- protected
-
val
dbsnp: DatasetConf
- Attributes
- protected
-
val
defaultCurrentValue: LocalDateTime
- Definition Classes
- ETL
-
def
defaultRepartition: (DataFrame) ⇒ DataFrame
- Definition Classes
- ETL
-
def
defaultSampling: PartialFunction[String, (DataFrame) ⇒ DataFrame]
- Definition Classes
- ETL
- val destinationDataSetId: String
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- val extraAggregations: Seq[Column]
-
def
extract(lastRunValue: LocalDateTime = minValue, currentRunValue: LocalDateTime = LocalDateTime.now()): Map[String, DataFrame]
Reads data from a file system and produces a Map[DatasetConf, DataFrame].
Reads data from a file system and produces a Map[DatasetConf, DataFrame]. This method should avoid transformation and joins but can implement filters in order to make the ETL more efficient.
- returns
all the data needed to pass to the transform method and produce the desired output.
- val filterSnv: Option[Column]
-
val
genes: DatasetConf
- Attributes
- protected
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
def
getLastRunValue(ds: DatasetConf): LocalDateTime
If possible, fetch the last run value from the dataset passed in argument.
-
val
gnomad_exomes_v2: DatasetConf
- Attributes
- protected
-
val
gnomad_genomes_v2: DatasetConf
- Attributes
- protected
-
val
gnomad_genomes_v3: DatasetConf
- Attributes
- protected
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
load(data: Map[String, DataFrame], lastRunValue: LocalDateTime, currentRunValue: LocalDateTime): Map[String, DataFrame]
Loads the output data into a persistent storage.
-
def
loadDataset(df: DataFrame, ds: DatasetConf): DataFrame
- Definition Classes
- ETL
-
def
loadSingle(data: DataFrame, lastRunValue: LocalDateTime = minValue, currentRunValue: LocalDateTime = defaultCurrentValue): DataFrame
- Definition Classes
- SingleETL
-
val
log: Logger
- Definition Classes
- ETL
- val mainDestination: DatasetConf
-
val
minValue: LocalDateTime
- Definition Classes
- ETL
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
- val participantId: Column
-
def
publish(): Unit
OPTIONAL - Contains all actions needed to be done in order to make the data available to users like creating a view with the data.
OPTIONAL - Contains all actions needed to be done in order to make the data available to users like creating a view with the data.
- Definition Classes
- ETL
- val rc: RuntimeETLContext
-
def
replaceWhere: Option[String]
replaceWhere is used in for OverWriteStaticPartition load.
replaceWhere is used in for OverWriteStaticPartition load. It avoids to compute dataframe to infer which partitions to replace. Most of the time, these partitions can be inferred statically. Always prefer that to dynamically overwrite partitions.
- Definition Classes
- ETL
-
def
reset(): Unit
Reset the ETL by removing the destination dataset.
Reset the ETL by removing the destination dataset.
- Definition Classes
- ETL
-
def
run(lastRunValue: Option[LocalDateTime] = None, currentRunValue: Option[LocalDateTime] = None): Map[String, DataFrame]
Entry point of the etl - execute this method in order to run the whole ETL
Entry point of the etl - execute this method in order to run the whole ETL
- Definition Classes
- ETL
-
def
sampling: PartialFunction[String, (DataFrame) ⇒ DataFrame]
Logic used when the ETL is run as a RunStep.sample step.
Logic used when the ETL is run as a RunStep.sample step.
- Definition Classes
- ETL
- val snvDatasetId: String
-
implicit
val
spark: SparkSession
- Definition Classes
- ETL
- val spliceAi: Boolean
-
val
spliceai_indel: DatasetConf
- Attributes
- protected
-
val
spliceai_snv: DatasetConf
- Attributes
- protected
- val splits: Seq[OccurrenceSplit]
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
val
thousand_genomes: DatasetConf
- Attributes
- protected
-
def
toMain(df: ⇒ DataFrame): Map[String, DataFrame]
- Definition Classes
- ETL
-
val
topmed_bravo: DatasetConf
- Attributes
- protected
-
final
def
transform(data: Map[String, DataFrame], lastRunValue: LocalDateTime = minValue, currentRunValue: LocalDateTime = defaultCurrentValue): Map[String, DataFrame]
Takes a Map[DatasetConf, DataFrame] as input and applies a set of transformations to it to produce the ETL output.
-
def
transformSingle(data: Map[String, DataFrame], lastRunValue: LocalDateTime = minValue, currentRunValue: LocalDateTime = LocalDateTime.now()): DataFrame
Takes a DataFrame as input and applies a set of transformations to it to produce the ETL output.
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
Deprecated Value Members
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] ) @Deprecated
- Deprecated