object VariantNormalizer extends GlowLogging
- Alphabetic
- By Inheritance
- VariantNormalizer
- GlowLogging
- LazyLogging
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- val changedFieldName: String
-
def
clone(): AnyRef
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- val errorMessageFieldName: String
-
def
finalize(): Unit
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def isSNP(refAllele: String, altAlleles: Array[String]): Boolean
- def isSymbolic(altAlleles: Array[String]): Boolean
-
lazy val
logger: Logger
- Attributes
- protected
- Definition Classes
- LazyLogging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- val normalizationResultFieldName: String
- val normalizationResultStructType: StructType
- val normalizationStatusFieldName: String
- val normalizationStatusStructField: StructField
-
def
normalizeVariant(contigName: String, start: Long, end: Long, refAllele: String, altAlleles: Array[String], refGenomeIndexedFasta: IndexedFastaSequenceFile): InternalRow
Contains the main normalization logic.
Contains the main normalization logic. Given contigName, start, end, refAllele, and altAlleles of a variant as well as the indexed fasta file of the reference genome, creates an InternalRow of the normalization result.
The algorithm has a logic similar to bcftools norm or vt normalize:
It starts from the rightmost base of all alleles and scans one base at a time incrementing trimSize and nTrimmedBasesBeforeNextPadding as long as the bases of all alleles at that position are the same. If the beginning of any of the alleles is reached, all alleles are padded on the left by PAD_WINDOW_SIZE bases by reading from the reference genome and nTrimmedBaseBeforeNextPadding is reset. The process continues until a position is reached where all alleles do not have the same base or the beginning of the contig is reached. Next trimming from left starts and all bases common among all alleles from left are trimmed. Start and end are adjusted accordingly during the process.
- contigName
: Contig name of the alleles
- start
: 0-based start of the REF allele in an open-left closed-right interval system
- end
: 0-based end of the REF allele in an open-left closed-right interval system
- refAllele
: String containing refrence allele
- altAlleles
: String array of alternate alleles
- refGenomeIndexedFasta
: an IndexedFastaSequenceFile of the reference genome.
- returns
normalization result as an InternalRow
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @throws( ... )