import org.apache.hadoop.fs.Path;
//import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Adriver extends Configured implements Tool {
@Override
public int
run(String[] args) throws Exception {
if (args.length
!= 2) {
System.out.printf(
"Usage: %s [generic options] <input dir> <output
dir>\n", getClass()
.getSimpleName());
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
JobConf conf =
new JobConf(getConf(), Adriver.class);
conf.setJobName(this.getClass().getName());
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
conf.setMapperClass(AMapper.class);
conf.setReducerClass(AReducer.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
JobClient.runJob(conf);
return 0;
}
public static
void main(String[] args) throws Exception {
int exitCode =
ToolRunner.run(new Adriver(), args);
System.exit(exitCode);
}
}
AMAPPER:
import java.io.IOException;
//import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
public class AMapper extends MapReduceBase implements
Mapper<LongWritable, Text, Text, Text>
{
@Override
public void
map(LongWritable key, Text value,
OutputCollector<Text, Text> output,
Reporter reporter)
throws IOException {
String[] parts = record.split(",");
output.collect(new Text(parts[0]), new
Text(parts[1]+","+parts[2]+","+parts[5]));
}}
Reducer
import java.io.IOException;
import java.util.Iterator;
//import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
public class AReducer extends MapReduceBase implements
Reducer<Text, Text, Text, Text> {
@Override
public void
reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws
IOException {
String
ac1="yes";
String
al1="yes";
int sp;
float sum = 0;
float alsum=0;
float spsum=0;
float alavg;
float spavg;
while
(values.hasNext())
{
String record
= values.next().toString();
String[]
parts=record.split(",");
String
ac=parts[0];
ac yes
String
al=parts[1];
al yes
sp=Integer.parseInt(parts[2]); sp 120
{
sum=sum+1;
if(al.equals(al1))
{
alsum=alsum+1;
}
if(sp>100)
{
spsum=spsum+1;
}
}
}
//if(spsum==0)
//{
//spavg=0;
//}
//else
//{
spavg=spsum/sum;
//}
//if(alsum==0)
//{
// alavg=0;
//}
//else
//{
alavg=alsum/sum;
//}
String
res=String.valueOf(alavg) +","+String.valueOf(spavg);
output.collect(key,new Text( res));
}
}
area1,yes,yes,11,32,120
area1,yes,yes,11,32,120
area2,no,yes,15,34,89
area3,yes,yes,16,36,58
area1,yes,yes,11,38,27
area1,no,yes,12,40,120
area2,yes,yes,6,16,120
area3,yes,no,18,44,100
area1,no,yes,21,2,80
area1,yes,yes,22,18,60
area2,yes,yes,22,23,110
area3,no,yes,22,23,120
area1,no,yes,18,38,100
area1,yes,no,21,40,110
area2,yes,no,22,16,77
area3,no,yes,22,38,88
area1,no,yes,22,40,115
area1,yes,no,16,16,99
area2,yes,no,11,38,88
area3,yes,yes,12,40,115
area1,no,yes,6,16,99
area1,yes,no,16,16,99
area2,yes,no,11,38,88
area3,yes,yes,12,40,115
area1,no,yes,6,16,99
area1,yes,yes,11,32,120
area1,yes,yes,11,32,120
area2,no,yes,15,34,89
area3,yes,yes,16,36,58
area1,yes,yes,11,38,27
area1,no,yes,12,40,120
area2,yes,yes,6,16,120
area3,yes,no,18,44,100
area1,no,yes,21,2,80
area1,yes,yes,22,18,60
area2,yes,yes,22,23,110
area3,no,yes,22,23,120
area1,no,yes,18,38,100
area1,yes,no,21,40,110
area2,yes,no,22,16,77
area3,no,yes,22,38,88
area1,no,yes,22,40,115
area1,yes,no,16,16,99
area2,yes,no,11,38,88
area3,yes,yes,12,40,115
area1,no,yes,6,16,99
area1,yes,no,16,16,99
area2,yes,no,11,38,88
area3,yes,yes,12,40,115
area1,no,yes,6,16,99
----------------------------------------------------------------------
ouput:
[training@localhost ~]$ hadoop fs -cat web/newacop34/part-00000
area1 0.5714286,0.42857143
area2 0.4,0.4
area3 0.75,0.5
--------------------------------------------------------------
No comments:
Post a Comment