Hadoop learning pot: MOVIE DATA ANALYTICS BASED ON RATING

FIRST u CREATE A MOVIE DATA IN A TEXT FILE:

$cat > moviedata.txt

aa dangal 5
bb laggan 4
aa laggan 4
cc dangal 5
bb dangal 4
pq khidi 3
cc khidi 5
cc dangall 5
za khidi 5
cd laggan 5

Load this data in to HDFS

$hadoop fs -put moviedata.txt /movie/moviedata1.txt

write Mapper and Partitioner code as shown below:

Partitioner::

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.mapred.Partitioner;

public class Mpartitioner implements Partitioner<Text,IntWritable>

{

@Override

public void configure(JobConf arg0) {

// TODO Auto-generated method stub

}

@Override

public int getPartition(Text key,IntWritable values,int setNumReducers)

{

int p=values.get();

if(p==0)

{

return 0;

}

else if(p==1)

{

return 1;

}

else if(p==2)

{

return 2;

}

else if(p==3)

{

return 3;

}

else if(p==4)

{

return 4;

}

else if(p==5)

{

return 5;

}

return p;

}}

-------------------------------------------------------------

import org.apache.hadoop.mapred.OutputCollector;

import org.apache.hadoop.mapred.MapReduceBase;

import org.apache.hadoop.mapred.Reducer;

import org.apache.hadoop.mapred.Reporter;

//public class Mreducer1 extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {

public class Mreducer1 extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {

@Override

public void reduce(Text key, Iterator<IntWritable> values,

OutputCollector<Text, IntWritable> output, Reporter reporter)

throws IOException {

//int p=5;

int c1=0;

int c2=0;

int c3=0;

int c4=0;

int c5=0;

int c6=0;

while (values.hasNext()) {

int v=values.next().get();

if(v==0)

{

c1++;

output.collect(key, new IntWritable(c1));

}

if(v==1)

{

c2++;

output.collect(key, new IntWritable(c2));

}

if(v==2)

{

c3++;

output.collect(key, new IntWritable(c3));

}

if(v==3)

{

c4++;

output.collect(key, new IntWritable(c4));

}

if(v==4)

{

c5++;

output.collect(key, new IntWritable(c5));

}

if(v==5)

{

c6++;

output.collect(key, new IntWritable(c6));

}

//output.collect(key, new IntWritable(c));

}

u make a jar file and move to local file system

$hadoop jar movie.jar moviedriver movie/movie1.txt movie/op

it will generates final output after map reduce program as shown below

your data:

aa dangal 5

bb laggan 4
aa laggan 4
cc dangal 5
bb dangal 4
pq khidi 3
cc khidi 5
cc dangall 5
za khidi 5
cd laggan 5

op director contains

part-00000
part-00001
part-00002
part-00003
part-00004
part-00005

if u open each op directory partition u can find out rating wise information for each movie

Hadoop learning pot

Search This Blog

Thursday, 16 February 2017

MOVIE DATA ANALYTICS BASED ON RATING

No comments:

Post a Comment

Hadoop Analytics

NLP BASICS

Search This Blog