- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
#人类已无法阻止她的沙雕发明#为脱单发明丰胸器、壁咚男友 人类已无法阻止她的沙雕发明…
-
-
-
-
-
奇葩搞笑菌▲于是就花钱雇了一个在下棋的老头冒充家长,笑话:学生害怕叫家长
-
-
图书馆闭馆日为84岁老农一人开放|温暖!图书馆闭馆日为84岁老农一人开放 空巢老人更需要关爱
-
『rookie』TheShy被全场暴打,但却抢下大龙立功!Rookie赛后终于露出笑容
-
徐克翻拍《射雕英雄传》,肖战饰演郭靖,黄蓉扮演者颜值是真的高
-
古代为什么有公元前、公元后之分? 公元元年是哪个朝代
-
穆雷|ATP2020网球中国赛季取消 穆雷暖心发文关心中国
-
-
-
-
高晓松|说话不过脑子,高晓松终于为自己的嚣张,付出了惨痛的代价
-
墨染年华|妻子发毒誓等一辈子,5年后丈夫出狱,妻子已嫁与他人,丈夫入狱
-
苹果|消息称苹果将推新OLED版iPad:10.86寸、三星供应屏
-
疫苗接种|疫苗对德尔塔还有用吗?张伯礼回应 对新毒株仍有保护作用
-