自定义分区类,实现Partitioner
接口
package com.xiaohu.transfrom;import org.apache.flink.api.common.functions.Partitioner;public class MyPartitioner implements Partitioner<String> {@Overridepublic int partition(String key, int numPartitions) {return Integer.parseInt(key)%numPartitions;}
}
使用自定义分区类
package com.xiaohu.transfrom;import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;public class CustomPartitionDemo {public static void main(String[] args) throws Exception {Configuration conf = new Configuration();StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf);env.setParallelism(2);//设置流处理环境还是批处理环境 DataSet API已经过时了,现在都是一套代码,进行设置
// env.setRuntimeMode(RuntimeExecutionMode.BATCH); //批处理
// env.setRuntimeMode(RuntimeExecutionMode.STREAMING); //流处理,默认就是流处理//一般情况下,不会在代码中指定,不够灵活,一般都是在提交的时候,使用命令进行指定 flink run -Dexecution.runtime-mode=BATCH【STREAMING】 ...DataStreamSource<String> socketDS = env.socketTextStream("master", 7777);// 第二个参数是数据,从数据中选择要分区的依据传给第一个自定义参数对象进行分区,觉得该条数据到哪一个分区中socketDS.partitionCustom(new MyPartitioner(), new KeySelector<String, String>() {@Overridepublic String getKey(String value) throws Exception {return value;}}).print();env.execute();}
}