aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--P1/Project1.pdfbin0 -> 468585 bytes
-rw-r--r--P1/Template/Makefile27
-rw-r--r--P1/Template/README.md1
-rw-r--r--P1/Template/include/mapper.h46
-rw-r--r--P1/Template/include/mapreduce.h6
-rw-r--r--P1/Template/include/reducer.h24
-rw-r--r--P1/Template/include/utils.h52
-rw-r--r--P1/Template/solutionexe/mapperbin0 -> 38848 bytes
-rw-r--r--P1/Template/solutionexe/mapreducebin0 -> 32616 bytes
-rw-r--r--P1/Template/solutionexe/reducerbin0 -> 33848 bytes
-rw-r--r--P1/Template/solutionexe/test/T1/F1.txt3
-rw-r--r--P1/Template/src/mapper.c134
-rw-r--r--P1/Template/src/mapreduce.c54
-rw-r--r--P1/Template/src/reducer.c79
-rw-r--r--P1/Template/test/T1/T1.txt3
15 files changed, 429 insertions, 0 deletions
diff --git a/P1/Project1.pdf b/P1/Project1.pdf
new file mode 100644
index 0000000..9f48794
--- /dev/null
+++ b/P1/Project1.pdf
Binary files differ
diff --git a/P1/Template/Makefile b/P1/Template/Makefile
new file mode 100644
index 0000000..4134c5f
--- /dev/null
+++ b/P1/Template/Makefile
@@ -0,0 +1,27 @@
+CC=gcc
+CFLAGS=-g
+
+SRCDIR=src
+INCLDIR=include
+LIBDIR=lib
+
+mapreduce: $(LIBDIR)/utils.o mapper reducer
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/mapreduce.c -o mapreduce
+
+mapper: $(SRCDIR)/mapper.c $(LIBDIR)/utils.o
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/mapper.c -o mapper
+
+reducer: $(SRCDIR)/reducer.c $(LIBDIR)/utils.o
+ $(CC) $(CFLAGS) -I$(INCLDIR) $(LIBDIR)/utils.o $(SRCDIR)/reducer.c -o reducer
+
+.PHONY: run clean t1
+
+#500KB
+t1:
+# make -i clean
+# make
+ ./mapreduce 5 2 test/T1/F1.txt
+
+clean:
+ rm mapreduce mapper reducer
+ rm -rf output \ No newline at end of file
diff --git a/P1/Template/README.md b/P1/Template/README.md
new file mode 100644
index 0000000..0266e65
--- /dev/null
+++ b/P1/Template/README.md
@@ -0,0 +1 @@
+Add your project details as mentioned in Section Deliverables. \ No newline at end of file
diff --git a/P1/Template/include/mapper.h b/P1/Template/include/mapper.h
new file mode 100644
index 0000000..2e3693c
--- /dev/null
+++ b/P1/Template/include/mapper.h
@@ -0,0 +1,46 @@
+#ifndef MAPPER_H
+#define MAPPER_H
+
+#include "utils.h"
+
+// ###### DO NOT REMOVE ######
+#define MAXKEYSZ 100
+#define MAXVALUESZ 100
+
+// ###### DO NOT REMOVE ######
+char *mapOutDir;
+int mapperID;
+
+
+// You are free to change the intermediate data structure as it suits you
+// If you do so, ensure the provided utility functions are also changed
+// 1 1 1...
+typedef struct valueList {
+ // MAXVALUESZ can be reduced to a small value as you are only storing "1"
+ char value[MAXVALUESZ];
+ struct valueList *next;
+}valueList;
+
+// word 1 1 1...
+typedef struct intermediateDS{
+ char key[MAXKEYSZ];
+ valueList *value;
+ struct intermediateDS *next;
+}intermediateDS;
+
+// ###### DO NOT REMOVE ######
+valueList *createNewValueListNode(char *value);
+valueList *insertNewValueToList(valueList *root, char *count);
+void freeValueList(valueList *root);
+
+// ###### DO NOT REMOVE ######
+intermediateDS *createNewInterDSNode(char *word, char *count);
+intermediateDS *insertPairToInterDS(intermediateDS *root, char *word, char *count);
+void freeInterDS(intermediateDS *root);
+
+// ###### DO NOT REMOVE ######
+void emit(char *key, char *value);
+void map(char *chunkData);
+void writeIntermediateDS();
+
+#endif \ No newline at end of file
diff --git a/P1/Template/include/mapreduce.h b/P1/Template/include/mapreduce.h
new file mode 100644
index 0000000..9b5950b
--- /dev/null
+++ b/P1/Template/include/mapreduce.h
@@ -0,0 +1,6 @@
+#ifndef MAPREDUCE_H
+#define MAPREDUCE_H
+
+#include "utils.h" //sendChunkData and shuffle
+
+#endif \ No newline at end of file
diff --git a/P1/Template/include/reducer.h b/P1/Template/include/reducer.h
new file mode 100644
index 0000000..79ea452
--- /dev/null
+++ b/P1/Template/include/reducer.h
@@ -0,0 +1,24 @@
+#ifndef REDUCER_H
+#define REDUCER_H
+
+#include "utils.h"
+
+#define MAXKEYSZ 50
+
+// ###### DO NOT REMOVE ######
+typedef struct finalKeyValueDS {
+ char key[MAXKEYSZ];
+ int value;
+ struct finalKeyValueDS *next;
+} finalKeyValueDS;
+
+// ###### DO NOT REMOVE ######
+finalKeyValueDS *createFinalKeyValueNode(char *value, int count);
+finalKeyValueDS *insertNewKeyValue(finalKeyValueDS *root, char *word, int count);
+void freeFinalDS(finalKeyValueDS *root);
+
+// ###### DO NOT REMOVE ######
+void writeFinalDS(int reducerID);
+void reduce(char *key);
+
+#endif \ No newline at end of file
diff --git a/P1/Template/include/utils.h b/P1/Template/include/utils.h
new file mode 100644
index 0000000..cfe56f2
--- /dev/null
+++ b/P1/Template/include/utils.h
@@ -0,0 +1,52 @@
+#ifndef UTILS_H
+#define UTILS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/msg.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <ctype.h>
+
+#define chunkSize 1024
+#define MSGSIZE 1100
+#define ENDTYPE 1000
+#define ACKTYPE 1100
+
+struct msgBuffer {
+ long msgType;
+ char msgText[MSGSIZE];
+};
+
+// mapper side
+int validChar(char c);
+// getWord usage - retrieves words from the chunk passed until it is fully traversed
+// given a chunk of data chunkData, the call to getWord should look as below:
+// int i = 0;
+// char *buffer;
+// while ((buffer = getWord(chunkData, &i)) != NULL){
+// your code
+// }
+char *getWord(char *chunk, int *i);
+char *getChunkData(int mapperID);
+void sendChunkData(char *inputFile, int nMappers);
+
+
+// reducer side
+int hashFunction(char* key, int reducers);
+int getInterData(char *key, int reducerID);
+void shuffle(int nMappers, int nReducers);
+
+// directory
+void createOutputDir();
+char *createMapDir(int mapperID);
+void removeOutputDir();
+void bookeepingCode();
+
+#endif \ No newline at end of file
diff --git a/P1/Template/solutionexe/mapper b/P1/Template/solutionexe/mapper
new file mode 100644
index 0000000..63e089d
--- /dev/null
+++ b/P1/Template/solutionexe/mapper
Binary files differ
diff --git a/P1/Template/solutionexe/mapreduce b/P1/Template/solutionexe/mapreduce
new file mode 100644
index 0000000..6857ba1
--- /dev/null
+++ b/P1/Template/solutionexe/mapreduce
Binary files differ
diff --git a/P1/Template/solutionexe/reducer b/P1/Template/solutionexe/reducer
new file mode 100644
index 0000000..58e0ec4
--- /dev/null
+++ b/P1/Template/solutionexe/reducer
Binary files differ
diff --git a/P1/Template/solutionexe/test/T1/F1.txt b/P1/Template/solutionexe/test/T1/F1.txt
new file mode 100644
index 0000000..74353e8
--- /dev/null
+++ b/P1/Template/solutionexe/test/T1/F1.txt
@@ -0,0 +1,3 @@
+
+To give you an estimation of the difference in the original and
+what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have today. \ No newline at end of file
diff --git a/P1/Template/src/mapper.c b/P1/Template/src/mapper.c
new file mode 100644
index 0000000..66ac2ef
--- /dev/null
+++ b/P1/Template/src/mapper.c
@@ -0,0 +1,134 @@
+#include "mapper.h"
+
+// combined value list corresponding to a word <1,1,1,1....>
+valueList *createNewValueListNode(char *value){
+ valueList *newNode = (valueList *)malloc (sizeof(valueList));
+ strcpy(newNode -> value, value);
+ newNode -> next = NULL;
+ return newNode;
+}
+
+// insert new count to value list
+valueList *insertNewValueToList(valueList *root, char *count){
+ valueList *tempNode = root;
+ if(root == NULL)
+ return createNewValueListNode(count);
+ while(tempNode -> next != NULL)
+ tempNode = tempNode -> next;
+ tempNode -> next = createNewValueListNode(count);
+ return root;
+}
+
+// free value list
+void freeValueList(valueList *root) {
+ if(root == NULL) return;
+
+ valueList *tempNode = root -> next;;
+ while (tempNode != NULL){
+ free(root);
+ root = tempNode;
+ tempNode = tempNode -> next;
+ }
+}
+
+// create <word, value list>
+intermediateDS *createNewInterDSNode(char *word, char *count){
+ intermediateDS *newNode = (intermediateDS *)malloc (sizeof(intermediateDS));
+ strcpy(newNode -> key, word);
+ newNode -> value = NULL;
+ newNode -> value = insertNewValueToList(newNode -> value, count);
+ newNode -> next = NULL;
+ return newNode;
+}
+
+// insert or update a <word, value> to intermediate DS
+intermediateDS *insertPairToInterDS(intermediateDS *root, char *word, char *count){
+ intermediateDS *tempNode = root;
+ if(root == NULL)
+ return createNewInterDSNode(word, count);
+ while(tempNode -> next != NULL) {
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value = insertNewValueToList(tempNode -> value, count);
+ return root;
+ }
+ tempNode = tempNode -> next;
+
+ }
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value = insertNewValueToList(tempNode -> value, count);
+ } else {
+ tempNode -> next = createNewInterDSNode(word, count);
+ }
+ return root;
+}
+
+// free the DS after usage. Call this once you are done with the writing of DS into file
+void freeInterDS(intermediateDS *root) {
+ if(root == NULL) return;
+
+ intermediateDS *tempNode = root -> next;;
+ while (tempNode != NULL){
+ freeValueList(root -> value);
+ free(root);
+ root = tempNode;
+ tempNode = tempNode -> next;
+ }
+}
+
+// emit the <key, value> into intermediate DS
+void emit(char *key, char *value) {
+
+}
+
+// map function
+void map(char *chunkData){
+
+ // you can use getWord to retrieve words from the
+ // chunkData one by one. Example usage in utils.h
+}
+
+// write intermediate data to separate word.txt files
+// Each file will have only one line : word 1 1 1 1 1 ...
+void writeIntermediateDS() {
+
+}
+
+int main(int argc, char *argv[]) {
+
+ if (argc < 2) {
+ printf("Less number of arguments.\n");
+ printf("./mapper mapperID\n");
+ exit(0);
+ }
+ // ###### DO NOT REMOVE ######
+ mapperID = strtol(argv[1], NULL, 10);
+
+ // ###### DO NOT REMOVE ######
+ // create folder specifically for this mapper in output/MapOut
+ // mapOutDir has the path to the folder where the outputs of
+ // this mapper should be stored
+ mapOutDir = createMapDir(mapperID);
+
+ // ###### DO NOT REMOVE ######
+ while(1) {
+ // create an array of chunkSize=1024B and intialize all
+ // elements with '\0'
+ char chunkData[chunkSize + 1]; // +1 for '\0'
+ memset(chunkData, '\0', chunkSize + 1);
+
+ char *retChunk = getChunkData(mapperID);
+ if(retChunk == NULL) {
+ break;
+ }
+
+ strcpy(chunkData, retChunk);
+ free(retChunk);
+
+ map(chunkData);
+ }
+
+ // ###### DO NOT REMOVE ######
+ writeIntermediateDS();
+
+ return 0;
+} \ No newline at end of file
diff --git a/P1/Template/src/mapreduce.c b/P1/Template/src/mapreduce.c
new file mode 100644
index 0000000..5b63f3f
--- /dev/null
+++ b/P1/Template/src/mapreduce.c
@@ -0,0 +1,54 @@
+#include "mapreduce.h"
+
+int main(int argc, char *argv[]) {
+
+ if(argc < 4) {
+ printf("Less number of arguments.\n");
+ printf("./mapreduce #mappers #reducers inputFile\n");
+ exit(0);
+ }
+
+ // ###### DO NOT REMOVE ######
+ int nMappers = strtol(argv[1], NULL, 10);
+ int nReducers = strtol(argv[2], NULL, 10);
+ char *inputFile = argv[3];
+
+ // ###### DO NOT REMOVE ######
+ bookeepingCode();
+
+ // ###### DO NOT REMOVE ######
+ pid_t pid = fork();
+ if(pid == 0){
+ //send chunks of data to the mappers in RR fashion
+ sendChunkData(inputFile, nMappers);
+ exit(0);
+ }
+ sleep(1);
+
+
+ // To do
+ // spawn mappers processes and run 'mapper' executable using exec
+
+ // To do
+ // wait for all children to complete execution
+
+
+ // ###### DO NOT REMOVE ######
+ // shuffle sends the word.txt files generated by mapper
+ // to reducer based on a hash function
+ pid = fork();
+ if(pid == 0){
+ shuffle(nMappers, nReducers);
+ exit(0);
+ }
+ sleep(1);
+
+
+ // To do
+ // spawn reducer processes and run 'reducer' executable using exec
+
+ // To do
+ // wait for all children to complete execution
+
+ return 0;
+} \ No newline at end of file
diff --git a/P1/Template/src/reducer.c b/P1/Template/src/reducer.c
new file mode 100644
index 0000000..bdf093b
--- /dev/null
+++ b/P1/Template/src/reducer.c
@@ -0,0 +1,79 @@
+#include "reducer.h"
+
+// create a key value node
+finalKeyValueDS *createFinalKeyValueNode(char *word, int count){
+ finalKeyValueDS *newNode = (finalKeyValueDS *)malloc (sizeof(finalKeyValueDS));
+ strcpy(newNode -> key, word);
+ newNode -> value = count;
+ newNode -> next = NULL;
+ return newNode;
+}
+
+// insert or update an key value
+finalKeyValueDS *insertNewKeyValue(finalKeyValueDS *root, char *word, int count){
+ finalKeyValueDS *tempNode = root;
+ if(root == NULL)
+ return createFinalKeyValueNode(word, count);
+ while(tempNode -> next != NULL){
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value += count;
+ return root;
+ }
+ tempNode = tempNode -> next;
+ }
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value += count;
+ } else{
+ tempNode -> next = createFinalKeyValueNode(word, count);
+ }
+ return root;
+}
+
+// free the DS after usage. Call this once you are done with the writing of DS into file
+void freeFinalDS(finalKeyValueDS *root) {
+ if(root == NULL) return;
+
+ finalKeyValueDS *tempNode = root -> next;;
+ while (tempNode != NULL){
+ free(root);
+ root = tempNode;
+ tempNode = tempNode -> next;
+ }
+}
+
+// reduce function
+void reduce(char *key) {
+
+}
+
+// write the contents of the final intermediate structure
+// to output/ReduceOut/Reduce_reducerID.txt
+void writeFinalDS(int reducerID){
+
+}
+
+int main(int argc, char *argv[]) {
+
+ if(argc < 2){
+ printf("Less number of arguments.\n");
+ printf("./reducer reducerID");
+ }
+
+ // ###### DO NOT REMOVE ######
+ // initialize
+ int reducerID = strtol(argv[1], NULL, 10);
+
+ // ###### DO NOT REMOVE ######
+ // master will continuously send the word.txt files
+ // alloted to the reducer
+ char key[MAXKEYSZ];
+ while(getInterData(key, reducerID))
+ reduce(key);
+
+ // You may write this logic. You can somehow store the
+ // <key, value> count and write to Reduce_reducerID.txt file
+ // So you may delete this function and add your logic
+ writeFinalDS(reducerID);
+
+ return 0;
+} \ No newline at end of file
diff --git a/P1/Template/test/T1/T1.txt b/P1/Template/test/T1/T1.txt
new file mode 100644
index 0000000..74353e8
--- /dev/null
+++ b/P1/Template/test/T1/T1.txt
@@ -0,0 +1,3 @@
+
+To give you an estimation of the difference in the original and
+what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have today. To give you an estimation of the difference in the original and what we have todayTo give you an estimation of the difference in the original and what we have today. \ No newline at end of file