aboutsummaryrefslogtreecommitdiffstats
path: root/P2
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--P2/Makefile30
-rw-r--r--P2/README.md1
-rw-r--r--P2/Template/Project_2.pdfbin0 -> 155444 bytes
-rw-r--r--P2/include/mapper.h48
-rw-r--r--P2/include/mapreduce.h8
-rw-r--r--P2/include/reducer.h27
-rw-r--r--P2/include/utils.h43
-rw-r--r--P2/lib/utils.c92
-rw-r--r--P2/solutionexe/Makefile57
-rw-r--r--P2/solutionexe/mapperbin0 -> 26444 bytes
-rw-r--r--P2/solutionexe/mapreducebin0 -> 24940 bytes
-rw-r--r--P2/solutionexe/reducerbin0 -> 25676 bytes
-rw-r--r--P2/src/mapper.c221
-rw-r--r--P2/src/mapreduce.c83
-rw-r--r--P2/src/reducer.c140
-rw-r--r--P2/test/T1/F1.txt2
16 files changed, 752 insertions, 0 deletions
diff --git a/P2/Makefile b/P2/Makefile
new file mode 100644
index 0000000..7ab380c
--- /dev/null
+++ b/P2/Makefile
@@ -0,0 +1,30 @@
+CC=gcc
+CFLAGS=-g
+
+SRCDIR=src
+INCLDIR=include
+LIBDIR=lib
+
+mapreduce: $(SRCDIR)/mapreduce.c $(LIBDIR)/utils.o mapper reducer
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/mapreduce.c -o mapreduce
+
+mapper: $(SRCDIR)/mapper.c $(LIBDIR)/utils.o
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/mapper.c -o mapper
+
+reducer: $(SRCDIR)/reducer.c $(LIBDIR)/utils.o
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/reducer.c -o reducer
+
+$(LIBDIR)/utils.o: $(LIBDIR)/utils.c
+ $(CC) $(CFLAGS) -I$(INCLDIR) -c $(LIBDIR)/utils.c -o $(LIBDIR)/utils.o
+
+.PHONY: run clean t1 t2 t3
+
+#500KB
+t1:
+# make -i clean
+# make
+ ./mapreduce 5 2 test/T1/F1.txt
+
+clean:
+ rm lib/utils.o mapreduce mapper reducer
+ rm -rf output \ No newline at end of file
diff --git a/P2/README.md b/P2/README.md
new file mode 100644
index 0000000..0266e65
--- /dev/null
+++ b/P2/README.md
@@ -0,0 +1 @@
+Add your project details as mentioned in Section Deliverables. \ No newline at end of file
diff --git a/P2/Template/Project_2.pdf b/P2/Template/Project_2.pdf
new file mode 100644
index 0000000..d5e99fa
--- /dev/null
+++ b/P2/Template/Project_2.pdf
Binary files differ
diff --git a/P2/include/mapper.h b/P2/include/mapper.h
new file mode 100644
index 0000000..c67eb08
--- /dev/null
+++ b/P2/include/mapper.h
@@ -0,0 +1,48 @@
+#ifndef MAPPER_H
+#define MAPPER_H
+
+#include "utils.h"
+
+// ###### DO NOT REMOVE ######
+#define MAXKEYSZ 100
+#define MAXVALUESZ 100
+
+// ###### DO NOT REMOVE ######
+char *mapOutDir;
+int mapperID;
+
+
+// You are free to change the intermediate data structure as it suits you
+// If you do so, ensure the provided utility functions are also changed
+// 1 1 1...
+typedef struct valueList {
+ // MAXVALUESZ can be reduced to a small value as you are only storing "1"
+ char value[MAXVALUESZ];
+ struct valueList *next;
+}valueList;
+
+// word 1 1 1...
+typedef struct intermediateDS{
+ char key[MAXKEYSZ];
+ valueList *value;
+ struct intermediateDS *next;
+}intermediateDS;
+
+intermediateDS *interDS = NULL;
+
+// ###### DO NOT REMOVE ######
+valueList *createNewValueListNode(char *value);
+valueList *insertNewValueToList(valueList *root, char *count);
+void freeValueList(valueList *root);
+
+// ###### DO NOT REMOVE ######
+intermediateDS *createNewInterDSNode(char *word, char *count);
+intermediateDS *insertPairToInterDS(intermediateDS *root, char *word, char *count);
+void freeInterDS(intermediateDS *root);
+
+// ###### DO NOT REMOVE ######
+void emit(char *key, char *value);
+void map(char *chunkData);
+void writeIntermediateDS();
+
+#endif \ No newline at end of file
diff --git a/P2/include/mapreduce.h b/P2/include/mapreduce.h
new file mode 100644
index 0000000..97fab69
--- /dev/null
+++ b/P2/include/mapreduce.h
@@ -0,0 +1,8 @@
+#ifndef MAPREDUCE_H
+#define MAPREDUCE_H
+
+#include "utils.h" //sendChunkData and shuffle
+
+void execute(char **argv, int nProcesses);
+
+#endif \ No newline at end of file
diff --git a/P2/include/reducer.h b/P2/include/reducer.h
new file mode 100644
index 0000000..44afe2a
--- /dev/null
+++ b/P2/include/reducer.h
@@ -0,0 +1,27 @@
+#ifndef REDUCER_H
+#define REDUCER_H
+
+#include "utils.h"
+
+#define MAXKEYSZ 50
+
+// ###### DO NOT REMOVE ######
+typedef struct finalKeyValueDS {
+ char key[MAXKEYSZ];
+ int value;
+ struct finalKeyValueDS *next;
+} finalKeyValueDS;
+
+finalKeyValueDS *finalDS = NULL;
+
+// ###### DO NOT REMOVE ######
+finalKeyValueDS *createFinalKeyValueNode(char *value, int count);
+finalKeyValueDS *insertNewKeyValue(finalKeyValueDS *root, char *word, int count);
+void freeFinalDS(finalKeyValueDS *root);
+
+// ###### DO NOT REMOVE ######
+void writeFinalDS(int reducerID);
+void reduce(char *key);
+
+#endif
+
diff --git a/P2/include/utils.h b/P2/include/utils.h
new file mode 100644
index 0000000..2c91cdf
--- /dev/null
+++ b/P2/include/utils.h
@@ -0,0 +1,43 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/msg.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <ctype.h>
+
+#define chunkSize 1024
+#define MSGSIZE 1100
+
+struct msgBuffer {
+ long msgType;
+ char msgText[MSGSIZE];
+};
+
+// mapper side
+int validChar(char c);
+char *getWord(char *chunk, int *i);
+char *getChunkData(int mapperID);
+void sendChunkData(char *inputFile, int nMappers);
+
+
+// reducer side
+int hashFunction(char* key, int reducers);
+int getInterData(char *key, int reducerID);
+void shuffle(int nMappers, int nReducers);
+
+// directory
+void createOutputDir();
+char *createMapDir(int mapperID);
+void removeOutputDir();
+void bookeepingCode();
+
+#endif \ No newline at end of file
diff --git a/P2/lib/utils.c b/P2/lib/utils.c
new file mode 100644
index 0000000..79b309e
--- /dev/null
+++ b/P2/lib/utils.c
@@ -0,0 +1,92 @@
+#include "utils.h"
+
+char *getChunkData(int mapperID) {
+}
+
+// sends chunks of size 1024 to the mappers in RR fashion
+void sendChunkData(char *inputFile, int nMappers) {
+}
+
+// hash function to divide the list of word.txt files across reducers
+//http://www.cse.yorku.ca/~oz/hash.html
+int hashFunction(char* key, int reducers){
+ unsigned long hash = 0;
+ int c;
+
+ while ((c = *key++)!='\0')
+ hash = c + (hash << 6) + (hash << 16) - hash;
+
+ return (hash % reducers);
+}
+
+int getInterData(char *key, int reducerID) {
+}
+
+void shuffle(int nMappers, int nReducers) {
+}
+
+// check if the character is valid for a word
+int validChar(char c){
+ return ((tolower(c) >= 'a') && tolower(c <='z')) ||
+ (c >= '0' && c <= '9');
+}
+
+char *getWord(char *chunk, int *i){
+ char *buffer = (char *)malloc(sizeof(char) * chunkSize);
+ memset(buffer, '\0', chunkSize);
+ int j = 0;
+ while((*i) < strlen(chunk)) {
+ // read a single word at a time from chunk
+ // printf("%d\n", i);
+ if (chunk[(*i)] == '\n' || chunk[(*i)] == ' ' || !validChar(chunk[(*i)]) || chunk[(*i)] == 0x0) {
+ buffer[j] = '\0';
+ if(strlen(buffer) > 0){
+ (*i)++;
+ return buffer;
+ }
+ j = 0;
+ (*i)++;
+ continue;
+ }
+ buffer[j] = chunk[(*i)];
+ j++;
+ (*i)++;
+ }
+ if(strlen(buffer) > 0)
+ return buffer;
+ return NULL;
+}
+
+void createOutputDir(){
+ mkdir("output", ACCESSPERMS);
+ mkdir("output/MapOut", ACCESSPERMS);
+ mkdir("output/ReduceOut", ACCESSPERMS);
+}
+
+char *createMapDir(int mapperID){
+ char *dirName = (char *) malloc(sizeof(char) * 100);
+ memset(dirName, '\0', 100);
+ sprintf(dirName, "output/MapOut/Map_%d", mapperID);
+ mkdir(dirName, ACCESSPERMS);
+ return dirName;
+}
+
+void removeOutputDir(){
+ pid_t pid = fork();
+ if(pid == 0){
+ char *argv[] = {"rm", "-rf", "output", NULL};
+ if (execvp(*argv, argv) < 0) {
+ printf("ERROR: exec failed\n");
+ exit(1);
+ }
+ exit(0);
+ } else{
+ wait(NULL);
+ }
+}
+
+void bookeepingCode(){
+ removeOutputDir();
+ sleep(1);
+ createOutputDir();
+} \ No newline at end of file
diff --git a/P2/solutionexe/Makefile b/P2/solutionexe/Makefile
new file mode 100644
index 0000000..231a15e
--- /dev/null
+++ b/P2/solutionexe/Makefile
@@ -0,0 +1,57 @@
+CC=gcc
+CFLAGS=-g
+
+SRCDIR=src
+INCLDIR=include
+LIBDIR=lib
+
+mapreduce: $(SRCDIR)/mapreduce.c $(LIBDIR)/utils.o mapper reducer
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/mapreduce.c -o mapreduce
+
+mapper: $(SRCDIR)/mapper.c $(LIBDIR)/utils.o
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/mapper.c -o mapper
+
+reducer: $(SRCDIR)/reducer.c $(LIBDIR)/utils.o
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/reducer.c -o reducer
+
+$(LIBDIR)/utils.o: $(LIBDIR)/utils.c
+ $(CC) $(CFLAGS) -I$(INCLDIR) -c $(LIBDIR)/utils.c -o $(LIBDIR)/utils.o
+
+.PHONY: run clean t1 t2 t3
+
+#500KB
+t1:
+# make -i clean
+# make
+ ./mapreduce 5 2 test/T1/F1.txt
+
+#44KB
+t2:
+# make -i clean
+# make
+ ./mapreduce 5 2 test/T2/F2.txt
+
+#0KB
+t3:
+# make -i clean
+# make
+ ./mapreduce 5 2 test/T3/F3.txt
+
+# m >= r
+
+# m=1 r=1 (min case)
+t4:
+# make -i clean
+# make
+ ./mapreduce 1 1 test/T1/F1.txt
+
+# m=32 r=26 (max case)
+t5:
+# make -i clean
+# make
+ ./mapreduce 32 26 test/T1/F1.txt
+
+
+clean:
+ rm lib/utils.o mapreduce mapper reducer
+ rm -rf output \ No newline at end of file
diff --git a/P2/solutionexe/mapper b/P2/solutionexe/mapper
new file mode 100644
index 0000000..15abd74
--- /dev/null
+++ b/P2/solutionexe/mapper
Binary files differ
diff --git a/P2/solutionexe/mapreduce b/P2/solutionexe/mapreduce
new file mode 100644
index 0000000..28be9a0
--- /dev/null
+++ b/P2/solutionexe/mapreduce
Binary files differ
diff --git a/P2/solutionexe/reducer b/P2/solutionexe/reducer
new file mode 100644
index 0000000..6d6ccd8
--- /dev/null
+++ b/P2/solutionexe/reducer
Binary files differ
diff --git a/P2/src/mapper.c b/P2/src/mapper.c
new file mode 100644
index 0000000..469b32e
--- /dev/null
+++ b/P2/src/mapper.c
@@ -0,0 +1,221 @@
+#include "mapper.h"
+
+// combined value list corresponding to a word <1,1,1,1....>
+valueList *createNewValueListNode(char *value){
+ valueList *newNode = (valueList *)malloc (sizeof(valueList));
+ strcpy(newNode -> value, value);
+ newNode -> next = NULL;
+ return newNode;
+}
+
+// insert new count to value list
+valueList *insertNewValueToList(valueList *root, char *count){
+ valueList *tempNode = root;
+ if(root == NULL)
+ return createNewValueListNode(count);
+ while(tempNode -> next != NULL)
+ tempNode = tempNode -> next;
+ tempNode -> next = createNewValueListNode(count);
+ return root;
+}
+
+// free value list
+void freeValueList(valueList *root) {
+ if(root == NULL) return;
+
+ valueList *tempNode = NULL;
+ while (root != NULL){
+ tempNode = root;
+ root = root -> next;
+ free(tempNode);
+ }
+}
+
+// create <word, value list>
+intermediateDS *createNewInterDSNode(char *word, char *count){
+ intermediateDS *newNode = (intermediateDS *)malloc (sizeof(intermediateDS));
+ strcpy(newNode -> key, word);
+ newNode -> value = NULL;
+ newNode -> value = insertNewValueToList(newNode -> value, count);
+ newNode -> next = NULL;
+ return newNode;
+}
+
+// insert <word, value> to intermediate DS
+intermediateDS *insertPairToInterDS(intermediateDS *root, char *word, char *count){
+ intermediateDS *tempNode = root;
+ if(root == NULL)
+ return createNewInterDSNode(word, count);
+ while(tempNode -> next != NULL) {
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value = insertNewValueToList(tempNode -> value, count);
+ return root;
+ }
+ tempNode = tempNode -> next;
+
+ }
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value = insertNewValueToList(tempNode -> value, count);
+ } else {
+ tempNode -> next = createNewInterDSNode(word, count);
+ }
+ return root;
+}
+
+// free intermediate DS
+void freeInterDS(intermediateDS *root) {
+ if(root == NULL) return;
+
+ intermediateDS *tempNode = NULL;
+ while (root != NULL){
+ tempNode = root;
+ root = root -> next;
+ freeValueList(tempNode -> value);
+ free(tempNode);
+ }
+}
+
+// emit the <key, value> into intermediate DS
+void emit(char *key, char *value) {
+ // printf("To be emitted: %s, %s\n", (char *)key, value);
+ interDS = insertPairToInterDS(interDS, key, value);
+}
+
+// map function
+void map(char *chunkData){
+ char *cData = chunkData;
+ char *buffer;
+ int i = 0;
+
+ while((buffer = getWord(cData, &i)) != NULL){
+ emit(buffer, "1");
+ free(buffer);
+ buffer = NULL;
+ }
+}
+
+// generate the file name for word.txt
+char *generateWordFileName(char *word){
+ char fileName[MAXKEYSZ];
+ memset(fileName, '\0', MAXKEYSZ);
+ strcpy(fileName, word);
+ strcat(fileName, ".txt");
+ fileName[strlen(fileName)] = '\0';
+
+ char *wordFileName = (char*)malloc(sizeof(char) * (2 * MAXKEYSZ));
+ memset(wordFileName, '\0', (2 * MAXVALUESZ));
+ strcpy(wordFileName, mapOutDir);
+ strcat(wordFileName, "/");
+ strcat(wordFileName, fileName);
+ wordFileName[strlen(wordFileName)] = '\0';
+
+ return wordFileName;
+
+}
+
+// write intermediate data to separate word.txt files
+// Each file will have only one line <word 1 1 1 1 1 ...>
+void writeIntermediateDS() {
+ intermediateDS *travNode = interDS;
+
+ while(travNode != NULL) {
+ // create file word.txt
+ // content : word 1 1 1 1 1 1.....
+ char *wordFileName = generateWordFileName(travNode -> key);
+ int fd = open(wordFileName, O_CREAT | O_WRONLY, 0777);
+ if (fd < 0){
+ printf("ERROR: Cannot open the file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // word write
+ int ret = write(fd, travNode -> key, strlen(travNode -> key));
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // append space
+ ret = write(fd, " ", 1);
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // append value list
+ valueList *tNode = travNode -> value;
+ while(tNode -> next != NULL){
+ ret = write(fd, tNode -> value, strlen(tNode -> value)); // space after the word
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // append space
+ ret = write(fd, " ", 1);
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+ tNode = tNode -> next;
+ }
+
+ // last value
+ ret = write(fd, tNode -> value, strlen(tNode -> value)); // space after the word
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // append newline
+ ret = write(fd, "\n", 1);
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+ //close file
+ close(fd);
+
+ // go to next word in intermediate DS
+ travNode = travNode -> next;
+ }
+}
+
+int main(int argc, char *argv[]) {
+
+ if (argc < 2) {
+ printf("Less number of arguments.\n");
+ printf("./mapper mapperID\n");
+ exit(0);
+ }
+
+ // initializing global variables
+ mapperID = strtol(argv[1], NULL, 10);
+ interDS = NULL;
+
+ //create folder specifically for this mapper in output/MapOut
+ mapOutDir = createMapDir(mapperID);
+
+
+ int count = 0;
+ while(1) {
+ char chunkData[chunkSize + 1];
+ memset(chunkData, '\0', chunkSize + 1);
+
+ char *retChunk = getChunkData(mapperID);
+ if(retChunk == NULL) {
+ break;
+ }
+ count++;
+ strcpy(chunkData, retChunk);
+ free(retChunk);
+
+ map(chunkData);
+ }
+
+ //student code
+ writeIntermediateDS();
+ freeInterDS(interDS);
+
+ return 0;
+} \ No newline at end of file
diff --git a/P2/src/mapreduce.c b/P2/src/mapreduce.c
new file mode 100644
index 0000000..c44adb6
--- /dev/null
+++ b/P2/src/mapreduce.c
@@ -0,0 +1,83 @@
+#include "mapreduce.h"
+
+// execute executables using execvp
+void execute(char **argv, int nProcesses){
+ pid_t pid;
+
+ int i;
+ for (i = 0; i < nProcesses; i++){
+ pid = fork();
+ if (pid < 0) {
+ printf("ERROR: forking child process failed\n");
+ exit(1);
+ } else if (pid == 0) {
+ char *processID = (char *) malloc(sizeof(char) * 5); // memory leak
+ sprintf(processID, "%d", i+1);
+ argv[1] = processID;
+ if (execvp(*argv, argv) < 0) {
+ printf("ERROR: exec failed\n");
+ exit(1);
+ }
+ }
+ }
+}
+
+int main(int argc, char *argv[]) {
+
+ if(argc < 4) {
+ printf("Less number of arguments.\n");
+ printf("./mapreduce #mappers #reducers inputFile\n");
+ exit(0);
+ }
+
+ int nMappers = strtol(argv[1], NULL, 10);
+ int nReducers = strtol(argv[2], NULL, 10);
+
+ if(nMappers < nReducers){
+ printf("ERROR: Number of mappers should be greater than or equal to number of reducers...\n");
+ exit(0);
+ }
+
+ if(nMappers == 0 || nReducers == 0){
+ printf("ERROR: Mapper and Reducer count should be grater than zero...\n");
+ exit(0);
+ }
+
+ char *inputFile = argv[3];
+
+ bookeepingCode();
+
+ int status;
+ pid_t pid = fork();
+ if(pid == 0){
+ //send chunks of data to the mappers in RR fashion
+ sendChunkData(inputFile, nMappers);
+ exit(0);
+ }
+ sleep(1);
+
+ // spawn mappers
+ char *mapperArgv[] = {"./mapper", NULL, NULL};
+ execute(mapperArgv, nMappers);
+
+ // wait for all children to complete execution
+ while (wait(&status) > 0);
+
+ // shuffle sends the word.txt files generated by mapper
+ // to reducer based on a hash function
+ pid = fork();
+ if(pid == 0){
+ shuffle(nMappers, nReducers);
+ exit(0);
+ }
+ sleep(1);
+
+ // spawn reducers
+ char *reducerArgv[] = {"./reducer", NULL, NULL};
+ execute(reducerArgv, nReducers);
+
+ // wait for all children to complete execution
+ while (wait(&status) > 0);
+
+ return 0;
+} \ No newline at end of file
diff --git a/P2/src/reducer.c b/P2/src/reducer.c
new file mode 100644
index 0000000..d62c2c2
--- /dev/null
+++ b/P2/src/reducer.c
@@ -0,0 +1,140 @@
+#include "reducer.h"
+
+finalKeyValueDS *createFinalKeyValueNode(char *word, int count){
+ finalKeyValueDS *newNode = (finalKeyValueDS *)malloc (sizeof(finalKeyValueDS));
+ strcpy(newNode -> key, word);
+ newNode -> value = count;
+ newNode -> next = NULL;
+ return newNode;
+}
+
+finalKeyValueDS *insertNewKeyValue(finalKeyValueDS *root, char *word, int count){
+ finalKeyValueDS *tempNode = root;
+ if(root == NULL)
+ return createFinalKeyValueNode(word, count);
+ while(tempNode -> next != NULL){
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value += count;
+ return root;
+ }
+ tempNode = tempNode -> next;
+ }
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value += count;
+ } else{
+ tempNode -> next = createFinalKeyValueNode(word, count);
+ }
+ return root;
+}
+
+void freeFinalDS(finalKeyValueDS *root) {
+ if(root == NULL) return;
+
+ finalKeyValueDS *tempNode = NULL;
+ while (root != NULL){
+ tempNode = root;
+ root = root -> next;
+ free(tempNode);
+ }
+}
+
+// reduce function
+void reduce(char *key) {
+
+ char* wordFileName = key;
+ int fd = open(wordFileName, O_RDONLY);
+ if (fd < 0){
+ printf("ERROR: Cannot open the file %s\n", wordFileName);
+ exit(0);
+ }
+
+ char buffer[MAXKEYSZ];
+ char word[MAXKEYSZ];
+ memset(word, '\0', sizeof(char) * MAXKEYSZ);
+ int j = 0;
+ int foundKey = 0;
+ while(read(fd, &buffer[j], 1) == 1) {
+ // read one word at a time
+ if (buffer[j] == ' ' ||buffer[j] == '\n' || buffer[j] == 0x0) {
+ buffer[j] = '\0';
+
+ // just a safety code to avoid empty files or blank lines
+ if(strlen(buffer) == 0){
+ j = 0;
+ continue;
+ }
+
+ int count;
+ if(!foundKey){
+ // found the word
+ foundKey = 1;
+ strcpy(word, buffer);
+ count = 0;
+ } else {
+ // convert string count to integer count
+ count = strtol(buffer, NULL, 10);
+ }
+ finalDS = insertNewKeyValue(finalDS, word, count);
+ j = 0;
+ continue;
+ }
+ j++;
+ }
+ close(fd);
+}
+
+void writeFinalDS(int reducerID){
+ char reduceFileName[MAXKEYSZ];
+ memset(reduceFileName, '\0', MAXKEYSZ);
+ sprintf(reduceFileName, "output/ReduceOut/Reduce_%d.txt", reducerID);
+
+ int fdReduce = open(reduceFileName, O_CREAT | O_WRONLY, 0777);
+ if (fdReduce < 0){
+ printf("ERROR: Cannot open the file %s\n", reduceFileName);
+ exit(0);
+ }
+
+ finalKeyValueDS *tempNode = finalDS;
+ while(tempNode){
+ char writeKeyValue[2 * MAXKEYSZ];
+ memset(writeKeyValue, '\0', 2 * MAXKEYSZ);
+ strcpy(writeKeyValue, tempNode -> key);
+ strcat(writeKeyValue, " ");
+ char valueStr[MAXKEYSZ];
+ memset(valueStr, '\0', MAXKEYSZ);
+ sprintf(valueStr, "%d", tempNode -> value);
+ strcat(writeKeyValue, valueStr);
+ writeKeyValue[strlen(writeKeyValue)] = '\n';
+
+ // word write
+ int ret = write(fdReduce, writeKeyValue, strlen(writeKeyValue));
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", reduceFileName);
+ exit(0);
+ }
+ tempNode = tempNode -> next;
+ }
+ close(fdReduce);
+}
+
+int main(int argc, char *argv[]) {
+
+ if(argc < 2){
+ printf("Less number of arguments.\n");
+ printf("./reducer reducerID");
+ }
+
+ // initialize
+ int reducerID = strtol(argv[1], NULL, 10);
+ finalDS = NULL;
+
+ // master will continuously send the word.txt files alloted to the reducer
+ char key[MAXKEYSZ];
+ while(getInterData(key, reducerID))
+ reduce(key);
+
+ writeFinalDS(reducerID);
+
+ freeFinalDS(finalDS);
+ return 0;
+} \ No newline at end of file
diff --git a/P2/test/T1/F1.txt b/P2/test/T1/F1.txt
new file mode 100644
index 0000000..8ab70be
--- /dev/null
+++ b/P2/test/T1/F1.txt
@@ -0,0 +1,2 @@
+
+To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have today. \ No newline at end of file