o

io.projectglow.vcf

VCFSchemaInferrer

object VCFSchemaInferrer

Infers the schema of a VCF file from its headers.

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. VCFSchemaInferrer
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. val VCF_HEADER_COUNT_KEY: String
  5. val VCF_HEADER_DESCRIPTION_KEY: String
  6. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  7. def clone(): AnyRef
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @native() @throws( ... )
  8. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  9. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  10. def finalize(): Unit
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  11. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  12. def getInfoFieldStruct(headerLine: VCFInfoHeaderLine): StructField
  13. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  14. def headerLinesFromSchema(schema: StructType): Seq[VCFHeaderLine]

    Returns the VCF header lines that correspond to a variant schema.

    Returns the VCF header lines that correspond to a variant schema. Each flattened info field (those fields whose names start with "INFO_") will be converted to an info header line, and fields from the "genotype" struct will be converted to format header lines.

    If the count type is available in the schema metadata (which is always the case if the original schema was generated by inferSchema), that will be the returned count type. If not, we provide a best guess count type according to the following schema possibilities: - If it's a boolean field, return count = 0, as is the convention for flags - If it's a non-array field, return count = 1 - If it's an array field, return count = UNBOUNDED

    schema

    The schema of the variant DataFrame

    returns

    VCF header lines that can be inferred from the input schema

  15. def inferGenotypeSchema(includeSampleIds: Boolean, formatHeaders: Seq[VCFFormatHeaderLine]): StructType
  16. def inferSchema(includeSampleIds: Boolean, flattenInfoFields: Boolean, header: VCFHeader): StructType
  17. def inferSchema(includeSampleIds: Boolean, flattenInfoFields: Boolean, infoHeaders: Seq[VCFInfoHeaderLine], formatHeaders: Seq[VCFFormatHeaderLine]): StructType

    includeSampleIds

    If true, a sampleId column will be added to the genotype fields

    flattenInfoFields

    If true, each INFO field will be promoted to a column. If false, they will instead be stored in a string -> string map

    returns

    A StructType describing the schema

  18. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  19. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  20. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  21. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  22. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  23. def toString(): String
    Definition Classes
    AnyRef → Any
  24. def typesForHeader(line: VCFCompoundHeaderLine): Seq[DataType]
  25. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  26. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  27. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @throws( ... )

Inherited from AnyRef

Inherited from Any

Ungrouped