/* ZSORT.H begins here */ #ifndef H__ZSORT #define H__ZSORT typedef int (*ZSORTCOMPARE)(void *, void *, void *); typedef struct ZSORT_handle { int dummy; } *ZSORT; #ifdef __cplusplus extern "C" { #endif extern ZSORT ZsortOpen(unsigned, ZSORTCOMPARE, unsigned, void *); extern int ZsortSubmit(ZSORT, const void *p); extern int ZsortRetrieve(ZSORT, void *p); extern void ZsortClose(ZSORT, int); #ifdef __cplusplus } #endif #define ZSORTEND 1 #define ZSORTABORT 2 #define ZSORTMEMFAIL 3 #define ZSORTFILEERR 4 /* These values must be in the range of type "int" and outside the range */ /* of numbers oridnarily returned by the comparison function. */ #define ZCOMPABORT 0x80000000 #define ZCOMPDUPP 0x80000001 #define ZCOMPDUPQ 0x80000002 #endif /* ZSORT.C begins here */ #include #include #include #include "zsort.h" #ifndef __cplusplus typedef int bool; #define true 1 #define false 0 #endif struct item { struct item *next; bool end_of_block; char item[1]; }; struct zsort { unsigned size; ZSORTCOMPARE compare; bool retrieving; bool files_used; unsigned memory; void *pointer; struct memory_struct { struct item *first, *last; unsigned count; } queue[4]; struct item *empty; int destination_file; int retrieval_queue_or_file; struct file_struct { FILE *fp; unsigned long count; /* used to count items in temporary files */ } file[4]; }; ZSORT ZsortOpen(unsigned size, ZSORTCOMPARE compare, unsigned memory, void *pointer) { struct zsort *p = (struct zsort *) malloc(sizeof(struct zsort)); if (p != NULL) { int i; p->size = size; p->compare = compare; p->retrieving = false; p->files_used = false; p->memory = memory < 4 ? 4 : memory; p->pointer = pointer; p->empty = NULL; for (i = 0; i < 4; i++) { p->queue[i].first = NULL; p->queue[i].count = 0; p->file[i].fp = NULL; } } return (ZSORT) p; } #define hp ((struct zsort *) h) /*--------------------------------------------------------------------------- This function frees a linked list of items, clearing the item if the erase argument is nonzero. ---------------------------------------------------------------------------*/ static void free_list(ZSORT h, struct item *q, int erase) { while (q != NULL) { struct item *next = q->next; if (erase) memset(q->item, 0, hp->size); free(q); q = next; } } void ZsortClose(ZSORT h, int erase) { int i; free_list(h, hp->empty, erase); for (i = 0; i < 4; i++) { free_list(h, hp->queue[i].first, erase); if (hp->file[i].fp != NULL) { if (erase) { long length; fseek(hp->file[i].fp, 0, SEEK_END); length = ftell(hp->file[i].fp); fseek(hp->file[i].fp, 0, SEEK_SET); while (length > 0) { fputc(0, hp->file[i].fp); length--; } } fclose(hp->file[i].fp); } } free(hp); } /*--------------------------------------------------------------------------- This function gets a new item from the empty list or by allocating a new item structure. It returns a pointer to the item, or NULL if there is a memory allocation failure. ---------------------------------------------------------------------------*/ static struct item *new_item(ZSORT h) { struct item *q; if (hp->empty != NULL) { q = hp->empty; hp->empty = q->next; } else q = (struct item *) malloc(sizeof(struct item) + hp->size); return q; } /*--------------------------------------------------------------------------- This function creates a temporary file, or rewinds a temporary file if it already exists. It returns false if there is a file open error. ---------------------------------------------------------------------------*/ static bool open_file(ZSORT h, int n) { if (hp->file[n].fp == NULL) hp->file[n].fp = tmpfile(); else rewind(hp->file[n].fp); hp->file[n].count = 0; return hp->file[n].fp != NULL; } /*--------------------------------------------------------------------------- This function sorts the items in queues 0 and 1 and returns the queue number containing the result. If the comparison function returns ZCOMPABORT, this function returns a negative number. ---------------------------------------------------------------------------*/ static int sort_queues(ZSORT h) { int source = 0; while (hp->queue[source+1].count != 0) { int destination = source^2; hp->queue[destination].count = hp->queue[destination+1].count = 0; hp->queue[destination].first = hp->queue[destination+1].first = NULL; while (true) { bool end_of_block[2]; end_of_block[0] = hp->queue[source].count == 0; end_of_block[1] = hp->queue[source+1].count == 0; if (end_of_block[0] && end_of_block[1]) break; while (!end_of_block[0] || !end_of_block[1]) { int move; if (end_of_block[0]) move = 1; else if (end_of_block[1]) move = 0; else { int n = (*hp->compare)(hp->queue[source].first->item, hp->queue[source+1].first->item, hp->pointer); #ifdef ZCOMPABORT if (n == ZCOMPABORT) return -1; #endif #ifdef ZCOMPDUPP if (n == ZCOMPDUPP || n == ZCOMPDUPQ) { struct item *next = hp->queue[source+1].first->next; int discard; move = n == ZCOMPDUPP ? 0 : 1; discard = move^1; if (hp->queue[source+discard].first->end_of_block) end_of_block[discard] = true; hp->queue[source+discard].first->next = hp->empty; hp->empty = hp->queue[source+discard].first; hp->queue[source+discard].first = next; hp->queue[source+discard].count--; } else #endif move = n < 0 ? 0 : 1; } { struct item *q = hp->queue[source+move].first; if (q->end_of_block) end_of_block[move] = true; hp->queue[source+move].first = q->next; if (hp->queue[destination].first == NULL) hp->queue[destination].first = q; else hp->queue[destination].last->next = q; hp->queue[destination].last = q; hp->queue[source+move].count--; hp->queue[destination].count++; q->next = NULL; q->end_of_block = false; } } hp->queue[destination].last->end_of_block = true; destination ^= 1; } source = destination & 2; } return source; } /*--------------------------------------------------------------------------- This function writes a block from the specified queue to the destination file. It returns false if there is a file error. ---------------------------------------------------------------------------*/ static bool write_block(ZSORT h, int n) { FILE *fp = hp->file[hp->destination_file].fp; while (true) { struct item *q = hp->queue[n].first; if (q == NULL) break; if (fputc(0, fp) != 0 || fwrite(q->item, hp->size, 1, fp) != 1) { return false; } hp->queue[n].first = q->next; q->next = hp->empty; hp->empty = q; } hp->queue[n].count = 0; hp->file[hp->destination_file].count++; return fputc(1, fp) == 1; } int ZsortSubmit(ZSORT h, const void *p) { if (hp->retrieving) { int i; /* put existing items into the empty list */ for (i = 0; i < 4; i++) { if (hp->queue[i].first != NULL) { hp->queue[i].last->next = hp->empty; hp->empty = hp->queue[i].first; hp->queue[i].first = NULL; } } hp->queue[0].count = hp->queue[1].count = 0; hp->destination_file = -1; hp->retrieving = false; hp->files_used = false; } if (hp->queue[0].count + hp->queue[1].count >= hp->memory) { int n; if (!hp->files_used) { if (!open_file(h, 0) || !open_file(h, 1)) return ZSORTFILEERR; hp->files_used = true; hp->destination_file = 0; } n = sort_queues(h); if (n < 0) return ZSORTABORT; if (!write_block(h, n)) return ZSORTFILEERR; hp->destination_file ^= 1; } { struct item *q; struct memory_struct *z; q = new_item(h); if (q == NULL) return ZSORTMEMFAIL; q->end_of_block = true; memcpy(q->item, p, hp->size); /* put new item into shorter queue */ z = hp->queue[0].count <= hp->queue[1].count ? &hp->queue[0] : &hp->queue[1]; q->next = z->first; z->first = q; z->count++; } return 0; } /*--------------------------------------------------------------------------- This function reads an item from the specified file. If an end of block is encountered, it fills in only the end_of_block member. ---------------------------------------------------------------------------*/ static void read_item(ZSORT h, int n) { FILE *fp = hp->file[n].fp; if (fgetc(fp) != 0) hp->queue[n].first->end_of_block = true; else { hp->queue[n].first->end_of_block = false; fread(hp->queue[n].first->item, hp->size, 1, fp); } } int ZsortRetrieve(ZSORT h, void *p) { struct item *next; if (!hp->retrieving) { int n = sort_queues(h); if (n < 0) return ZSORTABORT; if (hp->files_used) { int i; int source; if (!write_block(h, n)) return ZSORTFILEERR; /* put one buffer into each queue for use with corresponding file */ for (i = 0; i < 4; i++) { hp->queue[i].first = hp->empty; hp->empty = hp->empty->next; hp->queue[i].first->next = NULL; } rewind(hp->file[0].fp); rewind(hp->file[1].fp); if (!open_file(h, 2) || !open_file(h, 3)) return ZSORTFILEERR; source = 0; while (hp->file[source].count > 1) { int destination = source^2; hp->file[destination].count = hp->file[destination+1].count = 0; while (hp->file[source].count > 0) { read_item(h, source); if (hp->file[source+1].count > 0) read_item(h, source+1); else hp->queue[source+1].first->end_of_block = true; while (!hp->queue[source].first->end_of_block || !hp->queue[source+1].first->end_of_block) { int move; if (hp->queue[source].first->end_of_block) move = 1; else if (hp->queue[source+1].first->end_of_block) move = 0; else { int n = (*hp->compare)(hp->queue[source].first->item, hp->queue[source+1].first->item, hp->pointer); #ifdef ZCOMPABORT if (n == ZCOMPABORT) return ZSORTABORT; #endif #ifdef ZCOMPDUPP if (n == ZCOMPDUPP || n == ZCOMPDUPQ) { move = n == ZCOMPDUPP ? 0 : 1; read_item(h, source+1-move); } else #endif move = n < 0 ? 0 : 1; } if (fputc(0, hp->file[destination].fp) != 0 || fwrite(hp->queue[source+move].first->item, hp->size, 1, hp->file[destination].fp) != 1) { return ZSORTFILEERR; } read_item(h, source+move); } hp->file[destination].count++; if (fputc(1, hp->file[destination].fp) != 1) { return ZSORTFILEERR; } hp->file[source].count--; hp->file[source+1].count--; destination ^= 1; } source = destination & 2; rewind(hp->file[0].fp); rewind(hp->file[1].fp); rewind(hp->file[2].fp); rewind(hp->file[3].fp); } hp->retrieval_queue_or_file = source; read_item(h, source); read_item(h, source+1); } else hp->retrieval_queue_or_file = n; hp->retrieving = true; } if (hp->files_used) { int move; int source = hp->retrieval_queue_or_file; if (hp->queue[source].first->end_of_block) { if (hp->queue[source+1].first->end_of_block) return ZSORTEND; move = 1; } else if (hp->queue[source+1].first->end_of_block) move = 0; else { int n = (*hp->compare)(hp->queue[source].first->item, hp->queue[source+1].first->item, hp->pointer); #ifdef ZCOMPABORT if (n == ZCOMPABORT) return ZSORTABORT; #endif #ifdef ZCOMPDUPP if (n == ZCOMPDUPP || n == ZCOMPDUPQ) { move = n == ZCOMPDUPP ? 0 : 1; read_item(h, source+1-move); } else #endif move = n < 0 ? 0 : 1; } memcpy(p, hp->queue[source+move].first->item, hp->size); read_item(h, source+move); return 0; } else { struct memory_struct *z = &hp->queue[hp->retrieval_queue_or_file]; if (z->first == NULL) return ZSORTEND; memcpy(p, z->first->item, hp->size); next = z->first->next; z->first->next = hp->empty; hp->empty = z->first; z->first = next; } return 0; } /* Test program begins here */ /*--------------------------------------------------------------------------- The test program reads all words from standard input, where a word is a series of letters, apostrophes and hyphens, beginning with a letter, preceded by a non-letter and followed by a character that is not a letter, apostrophe or hyphen. Each word is converted to all uppercase and is truncted to 15 characters if it is more than 15 characters long. Each word is accompanied by a count, which is initialized to 1. The words are sorted into alphabetical order, removing duplicates. When duplicates are removed, the count of the one removed is added to the count of the one retained. This produces an alphabetical list of words and the number of times each one appeared in the standard input. Finally, the list is again sorted into order of descending count. Words with equal counts are kept in alphabetical order. The result is a list of word counts, with the most frequently used words at the top of the list. ---------------------------------------------------------------------------*/ #include #define WORD_SIZE 15 #define MEMORY 10 struct item_tag { long count; char word[WORD_SIZE+1]; }; int compare_words(struct item_tag *p, struct item_tag *q, void *pointer) { int n = strcmp(p->word, q->word); if (n == 0) { p->count += q->count; return ZCOMPDUPP; } return n; } int compare_counts(struct item_tag *p, struct item_tag *q, void *pointer) { long n = q->count - p->count; return n == 0 ? strcmp(p->word, q->word) : n; } void main(void) { ZSORT h1, h2; int c; struct item_tag item; h1 = ZsortOpen(sizeof(item), (ZSORTCOMPARE) compare_words, MEMORY, NULL); c = getchar(); while (1) { int i = 0; while (c != EOF && !isalpha(c)) c = getchar(); if (c == EOF) break; while (isalpha(c) || c == '\'' || c == '-') { if (i < WORD_SIZE) item.word[i++] = toupper(c); c = getchar(); } item.word[i] = 0; item.count = 1; ZsortSubmit(h1, &item); } h2 = ZsortOpen(sizeof(item), (ZSORTCOMPARE) compare_counts, MEMORY, NULL); while (ZsortRetrieve(h1, &item) == 0) ZsortSubmit(h2, &item); ZsortClose(h1, 1); while (ZsortRetrieve(h2, &item) == 0) printf("%10d %s\n", item.count, item.word); ZsortClose(h2, 1); }