280 lines
6.6 KiB
C
280 lines
6.6 KiB
C
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
#include <math.h>
|
||
|
|
||
|
//________________________SETTINGS__________________________
|
||
|
|
||
|
#define WORDLEN 100
|
||
|
#define AUTO_HIST_SCALE 120
|
||
|
|
||
|
//Maximum initial WordList size
|
||
|
int MAX_LIST_LEN = 25;
|
||
|
|
||
|
//Total distinct words encountered
|
||
|
int nWords = 0;
|
||
|
|
||
|
//Symbol for discrete unit of histogram
|
||
|
char symbolHistogram = '|';
|
||
|
|
||
|
//Scale applied to Histogram
|
||
|
//customScale > 0 acts as multiplier
|
||
|
//customScale <= 0 invokes Auto Scaling based on most frequent word
|
||
|
double customScale = 0;
|
||
|
|
||
|
//______________________________________________________________________
|
||
|
|
||
|
//Input file pointer
|
||
|
FILE * fp;
|
||
|
|
||
|
|
||
|
//________________________________STRUCT______________________________________
|
||
|
//TODO: Define structure called "WordCount"
|
||
|
//Also, define a pointer called "WordList" to this structure here (Array of structures)
|
||
|
//Then, malloc this defined structure in main
|
||
|
|
||
|
typedef struct WordCount {
|
||
|
char* word;
|
||
|
int count;
|
||
|
} WordCount;
|
||
|
|
||
|
WordCount** WordList;
|
||
|
|
||
|
//______________________________________________________________________
|
||
|
|
||
|
char buffer[WORDLEN];
|
||
|
int pos = 0;
|
||
|
|
||
|
//_________NEXTWORD: Given to students: Returns nextword in file_______
|
||
|
|
||
|
static char * nextWordInFile() {
|
||
|
char c;
|
||
|
while((c=fgetc(fp)) != EOF) {
|
||
|
if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) {
|
||
|
buffer[pos++] = c;
|
||
|
buffer[pos] = '\0';
|
||
|
}
|
||
|
else {
|
||
|
if(pos > 0) {
|
||
|
pos = 0;
|
||
|
return strdup(buffer);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
//________________________Helper Functions______________________________
|
||
|
|
||
|
char * toLower(char * word) {
|
||
|
//Helper to convert a word to lower case
|
||
|
char * s = word;
|
||
|
while(*s) {
|
||
|
if(*s >= 'A' && *s <= 'Z') {
|
||
|
*s += ' ';
|
||
|
}
|
||
|
s++;
|
||
|
}
|
||
|
return word;
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
int getWordIndex(char * word) {
|
||
|
//Helper to get i-th word from array of structures
|
||
|
//Could additionally be used to check existence of a word in the array
|
||
|
for(int i = 0; i < nWords; i++) {
|
||
|
if(!strcmp(WordList[i]->word, word)) {
|
||
|
return i;
|
||
|
}
|
||
|
}
|
||
|
return -1;
|
||
|
|
||
|
}
|
||
|
|
||
|
void help() {
|
||
|
printf("SYNTAX: ./WordCount <filename> [ --sort-lex | --sort-num ] --histogram?\n");
|
||
|
}
|
||
|
|
||
|
void printStruct() {
|
||
|
//Helper to print your structure
|
||
|
//Traverse and print members with their count
|
||
|
/*
|
||
|
word1 word_count
|
||
|
.
|
||
|
.
|
||
|
.
|
||
|
wordN word_count
|
||
|
*/
|
||
|
for (int i = 0; i < nWords; i++) {
|
||
|
printf("%s %d\n", WordList[i]->word, WordList[i]->count);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
//_________________________________________________________________________
|
||
|
|
||
|
//_________________________Task 1: WordCount_______________________________
|
||
|
|
||
|
/*
|
||
|
*Update <wordList> with every new word encountered
|
||
|
*With repeated instance of a word, only update <count>
|
||
|
*New entries: Realloc if greater than macro<MAX_LIST_LEN>
|
||
|
*/
|
||
|
|
||
|
void wordCount() {
|
||
|
//TODO: Your Implementation here
|
||
|
char* word = nextWordInFile();
|
||
|
while (word != NULL) {
|
||
|
toLower(word);
|
||
|
int i = getWordIndex(word);
|
||
|
if (i != -1)
|
||
|
WordList[i]->count++;
|
||
|
else {
|
||
|
if (nWords >= MAX_LIST_LEN) {
|
||
|
MAX_LIST_LEN *= 2;
|
||
|
WordList = (WordCount **) realloc(WordList, sizeof(WordCount) * MAX_LIST_LEN);
|
||
|
}
|
||
|
|
||
|
WordCount* newCount = malloc(sizeof(WordCount));
|
||
|
newCount->word = strdup(word);
|
||
|
free(word);
|
||
|
word = NULL;
|
||
|
newCount->count = 1;
|
||
|
WordList[nWords] = newCount;
|
||
|
nWords++;
|
||
|
}
|
||
|
word = nextWordInFile();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//________________________________________________________________
|
||
|
//_________________________Task 2: Flags (Optional)_______________________________
|
||
|
//Task 2 a) : Sorting Alphabetically (Ascending)
|
||
|
|
||
|
void sortLex() {
|
||
|
//TODO: Your Implementation here
|
||
|
int sortFlag;
|
||
|
|
||
|
for (int i = 1; i < nWords; i++) {
|
||
|
sortFlag = 0;
|
||
|
for (int j = 0; j < nWords - 1; j++) {
|
||
|
if (strcmp(WordList[j]->word, WordList[j + 1]->word) > 0) {
|
||
|
int tempCount = WordList[j]->count;
|
||
|
char* tempStr = WordList[j]->word;
|
||
|
WordList[j]->count = WordList[j + 1]->count;
|
||
|
WordList[j]->word = WordList[j + 1]->word;
|
||
|
WordList[j + 1]->count = tempCount;
|
||
|
WordList[j + 1]->word = tempStr;
|
||
|
|
||
|
sortFlag = 1;
|
||
|
}
|
||
|
}
|
||
|
if (sortFlag == 0)
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//Task 2 b) : Sorting by frequencies (Descending)
|
||
|
|
||
|
void sortNum() {
|
||
|
//TODO: Your Implementation here
|
||
|
int sortFlag;
|
||
|
|
||
|
for (int i = 1; i < nWords; i++) {
|
||
|
sortFlag = 0;
|
||
|
for (int j = 0; j < nWords - 1; j++) {
|
||
|
if (WordList[j]->count < WordList[j + 1]->count) {
|
||
|
int tempCount = WordList[j]->count;
|
||
|
char* tempStr = WordList[j]->word;
|
||
|
WordList[j]->count = WordList[j + 1]->count;
|
||
|
WordList[j]->word = WordList[j + 1]->word;
|
||
|
WordList[j + 1]->count = tempCount;
|
||
|
WordList[j + 1]->word = tempStr;
|
||
|
|
||
|
sortFlag = 1;
|
||
|
}
|
||
|
}
|
||
|
if (sortFlag == 0)
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//________________________________________________________________
|
||
|
|
||
|
//___________________Extra Credit: Histogram______________________
|
||
|
|
||
|
void plotHistogram(double scale, char symbol) {
|
||
|
int maxLen = 20;
|
||
|
int max = 0;
|
||
|
for (int i = 0; i < nWords; i++) {
|
||
|
if (WordList[i]->count > max) {
|
||
|
max = WordList[i]->count;
|
||
|
}
|
||
|
//if (strlen(WordList[i]->word) > maxLen) {
|
||
|
// maxLen = strlen(WordList[i]->word);
|
||
|
//}
|
||
|
}
|
||
|
|
||
|
if (scale <= 0) {
|
||
|
scale = (1 / ((double) max)) * AUTO_HIST_SCALE;
|
||
|
printf("HISTOGRAM WITH AUTO SCALING OF (CURR / MAX) * AUTO_HIST_SCALE: %d\n", AUTO_HIST_SCALE);
|
||
|
printf("USING HIGHEST FREQUENCY AS: %d\n", max);
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < nWords; i++) {
|
||
|
printf("%s", WordList[i]->word);
|
||
|
int len = strlen(WordList[i]->word);
|
||
|
int size = ceil((int) (((double) WordList[i]->count) * scale)) + 1; //(WordList[i]->count / max)
|
||
|
for (int k = 0; k < maxLen - len; k++) {
|
||
|
printf(" ");
|
||
|
}
|
||
|
for (int j = 0; j < size; j++) {
|
||
|
printf("|");
|
||
|
}
|
||
|
printf("\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//_____________________________________________________________________
|
||
|
|
||
|
int main(int argc, char **argv) {
|
||
|
//WordCount array
|
||
|
//Malloc your structure array here
|
||
|
|
||
|
// Defined above
|
||
|
WordList = (WordCount **) malloc(sizeof(WordCount) * MAX_LIST_LEN);
|
||
|
|
||
|
//Reads file with <filename> given by ARGV[1]
|
||
|
//nextWordInFile returns words from file pointed by FILE * fp
|
||
|
if (argc == 2 && strcmp(argv[1], "--help") == 0)
|
||
|
help();
|
||
|
else if(argc >= 2) {
|
||
|
fp = fopen(argv[1], "r");
|
||
|
if(!fp) return 0;
|
||
|
|
||
|
//Call your WordCount method and sorting methods HERE as needed by your flags
|
||
|
wordCount();
|
||
|
|
||
|
if (argc >= 3 && strcmp(argv[2], "--sort-num") == 0)
|
||
|
sortNum();
|
||
|
else if (argc >= 3 && strcmp(argv[2], "--sort-lex") == 0)
|
||
|
sortLex();
|
||
|
|
||
|
if ((argc >= 3 && strcmp(argv[2], "--histogram") == 0) || (argc >= 4 && strcmp(argv[3], "--histogram") == 0))
|
||
|
plotHistogram(customScale, symbolHistogram);
|
||
|
else
|
||
|
printStruct();
|
||
|
|
||
|
//TIP: Remember to change argc condition with additional flags. Current implementation only works for 2 arguments
|
||
|
}
|
||
|
else {
|
||
|
help();
|
||
|
}
|
||
|
|
||
|
|
||
|
//TODO: Use argc and argv to check for flags
|
||
|
|
||
|
return 0;
|
||
|
}
|