aboutsummaryrefslogtreecommitdiffstats
path: root/P2/src/mapper.c
diff options
context:
space:
mode:
authorRossTheRoss <mstrapp@protonmail.com>2020-10-22 07:50:12 -0500
committerRossTheRoss <mstrapp@protonmail.com>2020-10-22 07:50:12 -0500
commit6ff784158f61a082fe6d4332c743a066ef007674 (patch)
tree9148a10ce43358d697d8a8f80bd7cdef7b91e06d /P2/src/mapper.c
parentget rid of & (diff)
downloadcsci4061-6ff784158f61a082fe6d4332c743a066ef007674.tar
csci4061-6ff784158f61a082fe6d4332c743a066ef007674.tar.gz
csci4061-6ff784158f61a082fe6d4332c743a066ef007674.tar.bz2
csci4061-6ff784158f61a082fe6d4332c743a066ef007674.tar.lz
csci4061-6ff784158f61a082fe6d4332c743a066ef007674.tar.xz
csci4061-6ff784158f61a082fe6d4332c743a066ef007674.tar.zst
csci4061-6ff784158f61a082fe6d4332c743a066ef007674.zip
Add Project 2 Files
Diffstat (limited to 'P2/src/mapper.c')
-rw-r--r--P2/src/mapper.c221
1 files changed, 221 insertions, 0 deletions
diff --git a/P2/src/mapper.c b/P2/src/mapper.c
new file mode 100644
index 0000000..469b32e
--- /dev/null
+++ b/P2/src/mapper.c
@@ -0,0 +1,221 @@
+#include "mapper.h"
+
+// combined value list corresponding to a word <1,1,1,1....>
+valueList *createNewValueListNode(char *value){
+ valueList *newNode = (valueList *)malloc (sizeof(valueList));
+ strcpy(newNode -> value, value);
+ newNode -> next = NULL;
+ return newNode;
+}
+
+// insert new count to value list
+valueList *insertNewValueToList(valueList *root, char *count){
+ valueList *tempNode = root;
+ if(root == NULL)
+ return createNewValueListNode(count);
+ while(tempNode -> next != NULL)
+ tempNode = tempNode -> next;
+ tempNode -> next = createNewValueListNode(count);
+ return root;
+}
+
+// free value list
+void freeValueList(valueList *root) {
+ if(root == NULL) return;
+
+ valueList *tempNode = NULL;
+ while (root != NULL){
+ tempNode = root;
+ root = root -> next;
+ free(tempNode);
+ }
+}
+
+// create <word, value list>
+intermediateDS *createNewInterDSNode(char *word, char *count){
+ intermediateDS *newNode = (intermediateDS *)malloc (sizeof(intermediateDS));
+ strcpy(newNode -> key, word);
+ newNode -> value = NULL;
+ newNode -> value = insertNewValueToList(newNode -> value, count);
+ newNode -> next = NULL;
+ return newNode;
+}
+
+// insert <word, value> to intermediate DS
+intermediateDS *insertPairToInterDS(intermediateDS *root, char *word, char *count){
+ intermediateDS *tempNode = root;
+ if(root == NULL)
+ return createNewInterDSNode(word, count);
+ while(tempNode -> next != NULL) {
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value = insertNewValueToList(tempNode -> value, count);
+ return root;
+ }
+ tempNode = tempNode -> next;
+
+ }
+ if(strcmp(tempNode -> key, word) == 0){
+ tempNode -> value = insertNewValueToList(tempNode -> value, count);
+ } else {
+ tempNode -> next = createNewInterDSNode(word, count);
+ }
+ return root;
+}
+
+// free intermediate DS
+void freeInterDS(intermediateDS *root) {
+ if(root == NULL) return;
+
+ intermediateDS *tempNode = NULL;
+ while (root != NULL){
+ tempNode = root;
+ root = root -> next;
+ freeValueList(tempNode -> value);
+ free(tempNode);
+ }
+}
+
+// emit the <key, value> into intermediate DS
+void emit(char *key, char *value) {
+ // printf("To be emitted: %s, %s\n", (char *)key, value);
+ interDS = insertPairToInterDS(interDS, key, value);
+}
+
+// map function
+void map(char *chunkData){
+ char *cData = chunkData;
+ char *buffer;
+ int i = 0;
+
+ while((buffer = getWord(cData, &i)) != NULL){
+ emit(buffer, "1");
+ free(buffer);
+ buffer = NULL;
+ }
+}
+
+// generate the file name for word.txt
+char *generateWordFileName(char *word){
+ char fileName[MAXKEYSZ];
+ memset(fileName, '\0', MAXKEYSZ);
+ strcpy(fileName, word);
+ strcat(fileName, ".txt");
+ fileName[strlen(fileName)] = '\0';
+
+ char *wordFileName = (char*)malloc(sizeof(char) * (2 * MAXKEYSZ));
+ memset(wordFileName, '\0', (2 * MAXVALUESZ));
+ strcpy(wordFileName, mapOutDir);
+ strcat(wordFileName, "/");
+ strcat(wordFileName, fileName);
+ wordFileName[strlen(wordFileName)] = '\0';
+
+ return wordFileName;
+
+}
+
+// write intermediate data to separate word.txt files
+// Each file will have only one line <word 1 1 1 1 1 ...>
+void writeIntermediateDS() {
+ intermediateDS *travNode = interDS;
+
+ while(travNode != NULL) {
+ // create file word.txt
+ // content : word 1 1 1 1 1 1.....
+ char *wordFileName = generateWordFileName(travNode -> key);
+ int fd = open(wordFileName, O_CREAT | O_WRONLY, 0777);
+ if (fd < 0){
+ printf("ERROR: Cannot open the file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // word write
+ int ret = write(fd, travNode -> key, strlen(travNode -> key));
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // append space
+ ret = write(fd, " ", 1);
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // append value list
+ valueList *tNode = travNode -> value;
+ while(tNode -> next != NULL){
+ ret = write(fd, tNode -> value, strlen(tNode -> value)); // space after the word
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // append space
+ ret = write(fd, " ", 1);
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+ tNode = tNode -> next;
+ }
+
+ // last value
+ ret = write(fd, tNode -> value, strlen(tNode -> value)); // space after the word
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+
+ // append newline
+ ret = write(fd, "\n", 1);
+ if(ret < 0){
+ printf("ERROR: Cannot write to file %s\n", wordFileName);
+ exit(0);
+ }
+ //close file
+ close(fd);
+
+ // go to next word in intermediate DS
+ travNode = travNode -> next;
+ }
+}
+
+int main(int argc, char *argv[]) {
+
+ if (argc < 2) {
+ printf("Less number of arguments.\n");
+ printf("./mapper mapperID\n");
+ exit(0);
+ }
+
+ // initializing global variables
+ mapperID = strtol(argv[1], NULL, 10);
+ interDS = NULL;
+
+ //create folder specifically for this mapper in output/MapOut
+ mapOutDir = createMapDir(mapperID);
+
+
+ int count = 0;
+ while(1) {
+ char chunkData[chunkSize + 1];
+ memset(chunkData, '\0', chunkSize + 1);
+
+ char *retChunk = getChunkData(mapperID);
+ if(retChunk == NULL) {
+ break;
+ }
+ count++;
+ strcpy(chunkData, retChunk);
+ free(retChunk);
+
+ map(chunkData);
+ }
+
+ //student code
+ writeIntermediateDS();
+ freeInterDS(interDS);
+
+ return 0;
+} \ No newline at end of file