Sunday, 19 November 2017

Search for Specific Keyword form a File Map Reduce programe Hadoop

WordSearch.java



import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.conf.Configuration;


public class WordSearch
{
 public static void main (String args[]) throws Exception
 {
  Path input = new Path(args[0]);
  Path output = new Path(args[1]);
  Configuration conf = new Configuration();
  Job job = new Job(conf);
  job.setJobName("Search");
  job.setJar("search.jar");
  job.setJarByClass(WordSearch.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setMapperClass(WordSearchMapper.class);
  job.setReducerClass(WordSearchReducer.class);
  
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);

  job.setNumReduceTasks(1);
  job.getConfiguration().set("keyword",args[2]);

  FileInputFormat.setInputPaths(job,input);
  FileOutputFormat.setOutputPath(job,output);

  System.exit(job.waitForCompletion(true) ? 0 : 1);
 }
}




WordSearchMapper.java




import java.io.IOException;
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

public class WordSearchMapper extends Mapper
{
 static String keyword;
 static int pos=0;
 public void setup(Context context) throws IOException,InterruptedException
 {
  Configuration conf = context.getConfiguration();
  keyword = conf.get("keyword");   
 } 
public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException
{
 InputSplit i = context.getInputSplit();
 FileSplit f = (FileSplit) i;
 String fileName = f.getPath().getName();
 Integer WordPos;
 pos++;
 if(value.toString().contains(keyword))
 {
  WordPos = value.find(keyword);
  context.write(value,new Text(fileName + "," + new IntWritable(pos).toString() + "," + WordPos.toString()));
 }
}  
}  



WordSearchReducer.java




import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordSearchReducer extends Reducer
{
public void reduce(Text key,Text value,Context context)throws IOException,InterruptedException
 {
  context.write(key,value);
 }

}




Output :


hadoop jar search.jar WordSearch /SampleDir/student.txt /NewOpt NAME

hadoop fs -ls /NewOpt
17/08/26 15:26:28 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Found 2 items
-rw-r--r--   1 administrator supergroup          0 2017-08-26 15:25 /NewOpt/_SUCCESS
-rw-r--r--   1 administrator supergroup         56 2017-08-26 15:25 /NewOpt/part-r-00000

administrator@ravi:/usr/local/hadoop/bin$ hadoop fs -cat /NewOpt/part-r-00000
17/08/26 15:26:42 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable

NAME   student.txt,7,0
NAME : dev patel             student.txt,1,0


0 comments:

Post a Comment