#include #include #include #include /*.......................................................*/ #define BUFFER_SIZE 10000000 #define READ_SIZE BUFFER_SIZE char buff[BUFFER_SIZE]; int bcount=0; /*.......................................................*/ typedef struct _wordcell { char *word; int count; struct _wordcell *left; struct _wordcell *right; } wordcell_t; int cell_count = 0; wordcell_t cells[100000]; /*.......................................................*/ #define LIST_SIZE 25 int topcount = LIST_SIZE; void (*add)(char*); /*....................................................... HASHTABLE .......................................................*/ #define HASH_BITS 18 #define HASH_SIZE (1<>28) ^ *(str++); return ((h ^ (h>>10) ^ (h>>20)) & HASH_MASK); } void addhash(char *str) { wordcell_t **lpp = &hashtable[hash(str)]; wordcell_t *pc = &cells[cell_count]; while (*lpp != NULL) { if (!strcmp((*lpp)->word,str)) { ((*lpp)->count) += 1; return ; } lpp = &(*lpp)->right; } cell_count++; pc->word = str; pc->count = 1; pc->right = NULL; *lpp = pc; return; } /*....................................................... TREE .......................................................*/ wordcell_t *root; void addtree(char *str) { int c; wordcell_t **lpp = &root; wordcell_t *pc = &cells[cell_count]; while (*lpp != NULL) { c = strcmp((*lpp)->word,str); if (c == 0) { ((*lpp)->count) += 1; return ; } else if (c > 0) { lpp = &(*lpp)->right; } else if (c < 0) { lpp = &(*lpp)->left; } } cell_count++; pc->word = str; pc->count = 1; pc->left = NULL;pc->right = NULL; *lpp = pc; } /*....................................................... SORTING .......................................................*/ void swap(wordcell_t* data, int i, int j) { wordcell_t t = data[i]; data[i] = data[j]; data[j] = t; } void sort(wordcell_t data[], int n) { /* Standard quicksort */ int i , last = 0; if (n<=1) return; swap(data, 0, rand()%n); for (i=0; i data[0].count) swap(data, ++last, i); swap(data, 0, last); sort(data, last); sort(data+last+1, n - last-1); } /*....................................................... STREAM HANDLING .......................................................*/ void load(FILE *stream) { char *b = buff; while (!feof(stream)) { bcount += fread(b,1,READ_SIZE,stream); b += bcount; } } void parse() { int i,wl = 0; char *p = buff; for(i=0; i 'z' ) { buff[i] = '\0'; if (wl > 0) { add(p); wl = 0;} p = &buff[i] + 1; } else { wl++; } } } void print() { int i; for(i=0;i1) args(argc, argv); /* process command line arguments*/ load(stdin); /* duh */ parse(); /* maybe put it in load?*/ sort(cells,cell_count-1); /* use quicksort */ print(); /* that the way to do it*/ }
edit