Anagrams

Posted by : Sushanth Thursday, 24 December 2015

Input:

'tween deck

'tween-decks

-'s

-a

-ability

-able

-ably

-ac

-acal

-acea

-aceae

-acean

-aceous

-acious

-acitate

-acity

-acy

-ad

-ade

-adelphia

-adelphous

-ado

-ae

-aemia

-age

-agogue

-al

-ales

-algia

Program:

import org.apache.hadoop.mapred.MapReduceBase;

import java.io.IOException;

import java.util.Arrays;

import java.util.Iterator;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapred.Mapper;

import org.apache.hadoop.mapred.OutputCollector;

import org.apache.hadoop.mapred.Reporter;

import org.apache.hadoop.mapred.Reducer;

import java.util.StringTokenizer;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.mapred.FileInputFormat;

import org.apache.hadoop.mapred.FileOutputFormat;

import org.apache.hadoop.mapred.JobClient;

import org.apache.hadoop.mapred.JobConf;

import org.apache.hadoop.mapred.TextInputFormat;

import org.apache.hadoop.mapred.TextOutputFormat;

public class Anagramclass {

/**

* The Anagram mapper class gets a word as a line from the HDFS input and

* sorts the letters in the word and writes its back to the output collector

* as Key : sorted word (letters in the word sorted) Value: the word itself

* as the value. When the reducer runs then we can group anagrams together

* based on the sorted key.

public static class AnagramMapper extends MapReduceBase implements

Mapper<LongWritable, Text, Text, Text> {

private Text sortedText = new Text();

private Text orginalText = new Text();

@Override

public void map(LongWritable key, Text value,

OutputCollector<Text, Text> outputCollector, Reporter reporter)

throws IOException {

String word = value.toString();

char[] wordChars = word.toCharArray();

Arrays.sort(wordChars);

String sortedWord = new String(wordChars);

sortedText.set(sortedWord);

orginalText.set(word);

outputCollector.collect(sortedText, orginalText);

}

/**

* The Anagram reducer class groups the values of the sorted keys that came

* in and checks to see if the values iterator contains more than one word.

* if the values contain more than one word we have spotted a anagram.

public static class AnagramReducer extends MapReduceBase implements

Reducer<Text, Text, Text, Text> {

private Text outputKey = new Text();

private Text outputValue = new Text();

@Override

public void reduce(Text anagramKey, Iterator<Text> anagramValues,

OutputCollector<Text, Text> results, Reporter reporter)

throws IOException {

String output = "";

while (anagramValues.hasNext()) {

Text anagam = anagramValues.next();

output = output + anagam.toString() + "~";

}

StringTokenizer outputTokenizer = new StringTokenizer(output, "~");

if (outputTokenizer.countTokens() >= 2) {

output = output.replace("~", ",");

outputKey.set(anagramKey.toString());

outputValue.set(output);

results.collect(outputKey, outputValue);

}

public static void main(String[] args) throws Exception {

JobConf conf = new JobConf(Anagramclass.class);

conf.setJobName("anagramcount");

conf.setOutputKeyClass(Text.class);

conf.setOutputValueClass(Text.class);

conf.setMapperClass(AnagramMapper.class);

// conf.setCombinerClass(AnagramReducer.class);

conf.setReducerClass(AnagramReducer.class);

conf.setInputFormat(TextInputFormat.class);

conf.setOutputFormat(TextOutputFormat.class);

FileInputFormat.setInputPaths(conf, new Path(args[0]));

FileOutputFormat.setOutputPath(conf, new Path(args[1]));

JobClient.runJob(conf);

}

Output:

Subscribe to Posts | Subscribe to Comments

Technical Articles

Software Programming articles

Anagrams

Leave a Reply