Search This Blog

Thursday 16 February 2017

MOVIE DATA ANALYTICS BASED ON RATING

FIRST u CREATE A MOVIE DATA IN A  TEXT FILE:



$cat > moviedata.txt

aa dangal 5
bb laggan 4
aa laggan 4
cc dangal 5
bb dangal 4
pq khidi 3
cc khidi 5
cc dangall 5
za khidi 5
cd laggan 5



Load this data in to HDFS


$hadoop fs -put  moviedata.txt    /movie/moviedata1.txt






write Mapper and Partitioner code as shown below:


Partitioner::




import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Partitioner;


public class Mpartitioner implements Partitioner<Text,IntWritable>
{


            @Override
            public void configure(JobConf arg0) {
                        // TODO Auto-generated method stub
                       
            }

            @Override
            public int  getPartition(Text key,IntWritable values,int setNumReducers)
            {
                                                                       
                                    int p=values.get();
                                    if(p==0)
                                    {
                                                return 0;
                                    }
                                    else if(p==1)
                                    {
                                                return 1;
                                    }
                                    else if(p==2)
                                    {
                                                return 2;
                                    }
                                    else if(p==3)
                                    {
                                                return 3;
                                    }
                                    else if(p==4)
                                    {
                                                return 4;
                                    }
                                    else if(p==5)
                                    {
                                                return 5;
                                    }
                                    return p;
                                   
            }}

                       


-------------------------------------------------------------

import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;


            //public class Mreducer1 extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
           

            public class Mreducer1 extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
                        @Override
                        public void reduce(Text key, Iterator<IntWritable> values,
                              OutputCollector<Text, IntWritable> output, Reporter reporter)
                              throws IOException {
            //int p=5;
            int c1=0;
            int c2=0;
            int c3=0;
            int c4=0;
            int c5=0;
            int c6=0;
            while (values.hasNext()) {
           
                        int v=values.next().get();
                        if(v==0)
                        {
                  c1++;
                       
                        output.collect(key, new IntWritable(c1));
                        }
                       
                        if(v==1)
                        {
                  c2++;
                       
                        output.collect(key, new IntWritable(c2));
                        }
                       
                        if(v==2)
                        {
                  c3++;
                       
                        output.collect(key, new IntWritable(c3));
                        }
                        if(v==3)
                        {
                  c4++;
                       
                        output.collect(key, new IntWritable(c4));
                        }
                        if(v==4)
                        {
                  c5++;
                       
                        output.collect(key, new IntWritable(c5));
                        }
                        if(v==5)
                        {
                  c6++;
                       
                        output.collect(key, new IntWritable(c6));
                        }
                       
            }
            //output.collect(key, new IntWritable(c));
            }
            }



u make a jar file  and move to local file system



$hadoop jar movie.jar moviedriver movie/movie1.txt  movie/op




it will generates final output after map reduce program as shown below




your data:   
aa dangal 5
bb laggan 4
aa laggan 4
cc dangal 5
bb dangal 4
pq khidi 3
cc khidi 5
cc dangall 5
za khidi 5
cd laggan 5

op director contains


part-00000
part-00001
part-00002
part-00003
part-00004
part-00005


if u open each op directory partition u can find out rating wise information for each movie

No comments:

Post a Comment

Hadoop Analytics

NewolympicData

  Alison Bartosik 21 United States 2004 08-29-04 Synchronized Swimming 0 0 2 2 Anastasiya Davydova 21 Russia 2004 0...