Package cz.o2.proxima.beam.core
Class BeamDataOperator
- java.lang.Object
-
- cz.o2.proxima.beam.core.BeamDataOperator
-
- All Implemented Interfaces:
DataOperator,java.lang.AutoCloseable
public class BeamDataOperator extends java.lang.Object implements DataOperator
ADataOperatorfor Apache Beam transformations.
-
-
Nested Class Summary
Nested Classes Modifier and Type Class Description private static classBeamDataOperator.BatchSnapshotDescriptorprivate static classBeamDataOperator.BatchUpdatesDescriptorprivate static interfaceBeamDataOperator.PCollectionDescriptorprivate static interfaceBeamDataOperator.PCollectionFactoryFromDescriptor<T extends BeamDataOperator.PCollectionDescriptor>private static classBeamDataOperator.StreamDescriptor
-
Field Summary
Fields Modifier and Type Field Description private java.util.Map<AttributeFamilyDescriptor,DataAccessor>accessorMapprivate java.util.Map<BeamDataOperator.PCollectionDescriptor,org.apache.beam.sdk.values.PCollection<StreamElement>>createdStreamsMapprivate DirectDataOperatordirectprivate DataAccessorLoader<BeamDataOperator,DataAccessor,DataAccessorFactory>loaderprivate Repositoryrepoprivate java.util.Set<org.apache.beam.sdk.Pipeline>typesRegistered
-
Constructor Summary
Constructors Constructor Description BeamDataOperator(Repository repo)
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description private DataAccessoraccessorFor(AttributeFamilyDescriptor family)voidclose()private DataAccessorcreateAccessorFor(AttributeFamilyDescriptor family)private java.util.function.Supplier<java.lang.IllegalArgumentException>failNotFound(AttributeDescriptor<?>[] attrs, java.lang.String accessorType)private org.apache.beam.sdk.transforms.PTransform<org.apache.beam.sdk.values.PCollection<StreamElement>,org.apache.beam.sdk.values.PCollection<StreamElement>>filterAttrs(AttributeDescriptor<?>[] attrs)private java.util.stream.Stream<DataAccessor>findSuitableAccessors(java.util.function.Predicate<AttributeFamilyDescriptor> predicate, java.lang.String accessorType, AttributeDescriptor<?>[] attrs)private java.util.stream.Stream<Pair<AttributeDescriptor<?>,java.util.Optional<AttributeFamilyDescriptor>>>findSuitableFamilies(java.util.function.Predicate<AttributeFamilyDescriptor> predicate, AttributeDescriptor<?>[] attrs)DataAccessorgetAccessorFor(AttributeFamilyDescriptor family)GetDataAccessorfor givenAttributeFamilyDescriptor.org.apache.beam.sdk.values.PCollection<StreamElement>getBatchSnapshot(org.apache.beam.sdk.Pipeline pipeline, long fromStamp, long untilStamp, AttributeDescriptor<?>... attrs)CreatePCollectionfrom snapshot of given attributes.org.apache.beam.sdk.values.PCollection<StreamElement>getBatchSnapshot(org.apache.beam.sdk.Pipeline pipeline, AttributeDescriptor<?>... attrs)CreatePCollectionfrom snapshot of given attributes.org.apache.beam.sdk.values.PCollection<StreamElement>getBatchUpdates(org.apache.beam.sdk.Pipeline pipeline, long startStamp, long endStamp, boolean asStream, AttributeDescriptor<?>... attrs)CreatePCollectionfrom updates to given attributes with given time range.org.apache.beam.sdk.values.PCollection<StreamElement>getBatchUpdates(org.apache.beam.sdk.Pipeline pipeline, long startStamp, long endStamp, AttributeDescriptor<?>... attrs)CreatePCollectionfrom updates to given attributes with given time range.org.apache.beam.sdk.values.PCollection<StreamElement>getBatchUpdates(org.apache.beam.sdk.Pipeline pipeline, AttributeDescriptor<?>... attrs)CreatePCollectionfrom updates to given attributes.DirectDataOperatorgetDirect()private <T extends BeamDataOperator.PCollectionDescriptor>
org.apache.beam.sdk.values.PCollection<StreamElement>getOrCreatePCollection(T desc, boolean cacheable, BeamDataOperator.PCollectionFactoryFromDescriptor<T> factory)RepositorygetRepository()(package private) org.apache.beam.sdk.values.PCollection<StreamElement>getStream(java.lang.String name, org.apache.beam.sdk.Pipeline pipeline, Position position, boolean stopAtCurrent, boolean useEventTime, long limit, AttributeDescriptor<?>... attrs)CreatePCollectionin givenPipelinefrom commit log for given attributes limiting number of elements read.org.apache.beam.sdk.values.PCollection<StreamElement>getStream(java.lang.String name, org.apache.beam.sdk.Pipeline pipeline, Position position, boolean stopAtCurrent, boolean useEventTime, AttributeDescriptor<?>... attrs)CreatePCollectionin givenPipelinefrom commit log for given attributes.org.apache.beam.sdk.values.PCollection<StreamElement>getStream(org.apache.beam.sdk.Pipeline pipeline, Position position, boolean stopAtCurrent, boolean useEventTime, AttributeDescriptor<?>... attrs)CreatePCollectionin givenPipelinefrom commit log for given attributes.booleanhasDirect()private voidregisterTypesFor(org.apache.beam.sdk.Pipeline pipeline)voidreload()private booleantypesRegisteredFor(org.apache.beam.sdk.Pipeline pipeline)
-
-
-
Field Detail
-
repo
private final Repository repo
-
direct
@Nullable private final DirectDataOperator direct
-
loader
private final DataAccessorLoader<BeamDataOperator,DataAccessor,DataAccessorFactory> loader
-
accessorMap
private final java.util.Map<AttributeFamilyDescriptor,DataAccessor> accessorMap
-
createdStreamsMap
private final java.util.Map<BeamDataOperator.PCollectionDescriptor,org.apache.beam.sdk.values.PCollection<StreamElement>> createdStreamsMap
-
typesRegistered
private final java.util.Set<org.apache.beam.sdk.Pipeline> typesRegistered
-
-
Constructor Detail
-
BeamDataOperator
BeamDataOperator(Repository repo)
-
-
Method Detail
-
close
public void close()
- Specified by:
closein interfacejava.lang.AutoCloseable- Specified by:
closein interfaceDataOperator
-
reload
public void reload()
- Specified by:
reloadin interfaceDataOperator
-
getStream
@SafeVarargs public final org.apache.beam.sdk.values.PCollection<StreamElement> getStream(org.apache.beam.sdk.Pipeline pipeline, Position position, boolean stopAtCurrent, boolean useEventTime, AttributeDescriptor<?>... attrs)
CreatePCollectionin givenPipelinefrom commit log for given attributes.- Parameters:
pipeline- thePipelineto createPCollectionin.position- position in commit log to read fromstopAtCurrent-trueto stop at recent datauseEventTime-trueto use event timeattrs- the attributes to createPCollectionfor- Returns:
- the
PCollection
-
getStream
@SafeVarargs public final org.apache.beam.sdk.values.PCollection<StreamElement> getStream(@Nullable java.lang.String name, org.apache.beam.sdk.Pipeline pipeline, Position position, boolean stopAtCurrent, boolean useEventTime, AttributeDescriptor<?>... attrs)
CreatePCollectionin givenPipelinefrom commit log for given attributes.- Parameters:
name- name of the consumerpipeline- thePipelineto createPCollectionin.position- position in commit log to read fromstopAtCurrent-trueto stop at recent datauseEventTime-trueto use event timeattrs- the attributes to createPCollectionfor- Returns:
- the
PCollection
-
getStream
@SafeVarargs final org.apache.beam.sdk.values.PCollection<StreamElement> getStream(@Nullable java.lang.String name, org.apache.beam.sdk.Pipeline pipeline, Position position, boolean stopAtCurrent, boolean useEventTime, long limit, AttributeDescriptor<?>... attrs)
CreatePCollectionin givenPipelinefrom commit log for given attributes limiting number of elements read.- Parameters:
name- name of the consumerpipeline- thePipelineto createPCollectionin.position- position in commit log to read fromstopAtCurrent-trueto stop at recent datauseEventTime-trueto use event timelimit- number of elements to read from the sourceattrs- the attributes to createPCollectionfor- Returns:
- the
PCollection
-
getBatchUpdates
@SafeVarargs public final org.apache.beam.sdk.values.PCollection<StreamElement> getBatchUpdates(org.apache.beam.sdk.Pipeline pipeline, AttributeDescriptor<?>... attrs)
CreatePCollectionfrom updates to given attributes.- Parameters:
pipeline-Pipelineto create thePCollectioninattrs- attributes to read updates for- Returns:
- the
PCollection
-
getBatchUpdates
@SafeVarargs public final org.apache.beam.sdk.values.PCollection<StreamElement> getBatchUpdates(org.apache.beam.sdk.Pipeline pipeline, long startStamp, long endStamp, AttributeDescriptor<?>... attrs)
CreatePCollectionfrom updates to given attributes with given time range.- Parameters:
pipeline-Pipelineto create thePCollectioninstartStamp- timestamp (inclusive) of first update taken into accountendStamp- timestamp (exclusive) of last update taken into accountattrs- attributes to read updates for- Returns:
- the
PCollection
-
getBatchUpdates
@SafeVarargs public final org.apache.beam.sdk.values.PCollection<StreamElement> getBatchUpdates(org.apache.beam.sdk.Pipeline pipeline, long startStamp, long endStamp, boolean asStream, AttributeDescriptor<?>... attrs)
CreatePCollectionfrom updates to given attributes with given time range.- Parameters:
pipeline-Pipelineto create thePCollectioninstartStamp- timestamp (inclusive) of first update taken into accountendStamp- timestamp (exclusive) of last update taken into accountasStream- create PCollection that is suitable for streaming processing (i.e. can update watermarks before end of input)attrs- attributes to read updates for- Returns:
- the
PCollection
-
getBatchSnapshot
public final org.apache.beam.sdk.values.PCollection<StreamElement> getBatchSnapshot(org.apache.beam.sdk.Pipeline pipeline, AttributeDescriptor<?>... attrs)
CreatePCollectionfrom snapshot of given attributes. The snapshot is either read from available storage or created by reduction of updates.- Parameters:
pipeline-Pipelineto create thePCollectioninattrs- attributes to read snapshot for- Returns:
- the
PCollection
-
getBatchSnapshot
public final org.apache.beam.sdk.values.PCollection<StreamElement> getBatchSnapshot(org.apache.beam.sdk.Pipeline pipeline, long fromStamp, long untilStamp, AttributeDescriptor<?>... attrs)
CreatePCollectionfrom snapshot of given attributes. The snapshot is either read from available storage or created by reduction of updates.- Parameters:
pipeline-Pipelineto create thePCollectioninfromStamp- ignore updates older than this stampuntilStamp- read only updates older than this timestamp (i.e. if this method was called at the given timestamp)attrs- attributes to read snapshot for- Returns:
- the
PCollection
-
getAccessorFor
public DataAccessor getAccessorFor(AttributeFamilyDescriptor family)
- Parameters:
family- descriptor of family to retrieve accessor for- Returns:
DataAccessorfor given family
-
findSuitableAccessors
private java.util.stream.Stream<DataAccessor> findSuitableAccessors(java.util.function.Predicate<AttributeFamilyDescriptor> predicate, java.lang.String accessorType, AttributeDescriptor<?>[] attrs)
-
findSuitableFamilies
private java.util.stream.Stream<Pair<AttributeDescriptor<?>,java.util.Optional<AttributeFamilyDescriptor>>> findSuitableFamilies(java.util.function.Predicate<AttributeFamilyDescriptor> predicate, AttributeDescriptor<?>[] attrs)
-
accessorFor
private DataAccessor accessorFor(AttributeFamilyDescriptor family)
-
createAccessorFor
private DataAccessor createAccessorFor(AttributeFamilyDescriptor family)
-
getRepository
public Repository getRepository()
- Specified by:
getRepositoryin interfaceDataOperator
-
getDirect
public DirectDataOperator getDirect()
-
hasDirect
public boolean hasDirect()
-
filterAttrs
private org.apache.beam.sdk.transforms.PTransform<org.apache.beam.sdk.values.PCollection<StreamElement>,org.apache.beam.sdk.values.PCollection<StreamElement>> filterAttrs(AttributeDescriptor<?>[] attrs)
-
failNotFound
private java.util.function.Supplier<java.lang.IllegalArgumentException> failNotFound(AttributeDescriptor<?>[] attrs, java.lang.String accessorType)
-
getOrCreatePCollection
private <T extends BeamDataOperator.PCollectionDescriptor> org.apache.beam.sdk.values.PCollection<StreamElement> getOrCreatePCollection(T desc, boolean cacheable, BeamDataOperator.PCollectionFactoryFromDescriptor<T> factory)
-
registerTypesFor
private void registerTypesFor(org.apache.beam.sdk.Pipeline pipeline)
-
typesRegisteredFor
private boolean typesRegisteredFor(org.apache.beam.sdk.Pipeline pipeline)
-
-