背景
算子的列表状态是平时比较常见的一种状态,本文通过官方的例子来看一下怎么使用算子列表状态
算子列表状态
算子列表状态支持应用的并行度扩缩容,如下所示:
使用方法参见官方示例,我加了几个注解:
public class BufferingSinkimplements SinkFunction<Tuple2<String, Integer>>,CheckpointedFunction {//要实现CheckpointedFunction接口private final int threshold;//算子操作状态对象--算子级别的private transient ListState<Tuple2<String, Integer>> checkpointedState;//本地变量,保存这个算子任务的本地变量--任务级别的 private List<Tuple2<String, Integer>> bufferedElements;public BufferingSink(int threshold) {this.threshold = threshold;this.bufferedElements = new ArrayList<>();}//invoke方法中一般都是操作本地变量bufferedElements,不会直接操作算子列表状态@Overridepublic void invoke(Tuple2<String, Integer> value, Context contex) throws Exception {bufferedElements.add(value);if (bufferedElements.size() >= threshold) {for (Tuple2<String, Integer> element: bufferedElements) {// send it to the sink}bufferedElements.clear();}}@Overridepublic void snapshotState(FunctionSnapshotContext context) throws Exception {checkpointedState.clear();for (Tuple2<String, Integer> element : bufferedElements) {// 把本地变量的值设置到算子列表状态中,算子列表状态会自动会被持久化checkpointedState.add(element);}}@Overridepublic void initializeState(FunctionInitializationContext context) throws Exception {ListStateDescriptor<Tuple2<String, Integer>> descriptor =new ListStateDescriptor<>("buffered-elements",TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {}));// 定义算子列表状态checkpointedState = context.getOperatorStateStore().getListState(descriptor);if (context.isRestored()) {// 算子列表状态的值设置到本地变量中for (Tuple2<String, Integer> element : checkpointedState.get()) {bufferedElements.add(element);}}}
}