package cn.mr.dedup;

Hadoop数据去重

import .io.IOException;

import org..hadoop.io.LongWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mer;

public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

private static Text field = new Text();

// <0,2018-3-3 c><11,2018-3-4 d>

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

field = value;

context.write(field, NullWritable.get());

}

// <2018-3-3 c,null> <2018-3-4 d,null>

}

package cn.mr.dedup;

import java.io.IOException;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {

// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>

@Override

protected void reduce(Text key, Iterable<NullWritable> values, Context context)

throws IOException, InterruptedException {

context.write(key, NullWritable.get());

}

}

package cn.mr.dedup;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class DedupRunner {

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf);

job.setJarByClass(DedupRunner.class);

job.setMapperClass(DedupMapper.class);

job.setReducerClass(DedupReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(NullWritable.class);

FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));

// 指定处理完成之后的结果所保存的位置

FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));

job.waitForCompletion(true);

}

}

胜象大百科