#include #include #include #include #include #include #include typedef struct WordHash { uint32_t ordinals; uint64_t distributions; } WordHash; typedef struct WordList { char* word; uint32_t next; } WordList; typedef struct AnagramTree { WordHash hash; uint32_t lower; uint32_t higher; uint32_t firstWord; uint32_t lastWord; } AnagramTree; typedef struct Anagrams { AnagramTree* tree; uint32_t treeSize; uint32_t treeIncreaseRate; uint32_t nextFreeTree; WordList* wordList; uint32_t wordListSize; uint32_t wordListIncreaseRate; uint32_t nextFreeWord; } Anagrams; void condenseHash(WordHash* hash, uint8_t distrib[]) { uint64_t distributions = 0; uint32_t ordinals = hash->ordinals; for (uint8_t *d = &distrib[0]; ordinals; d++, ordinals >>= 1) { if (ordinals & 1) { distributions = (distributions << 4) | *d; } } hash->distributions = distributions; } long readWordFile(const char* fileName, char** words) { *words = NULL; FILE* wordFile = fopen(fileName, "r"); if (!wordFile) { printf("Error opening file \"%s\"\n", fileName); return 0; } fseek(wordFile, 0, SEEK_END); long fileSize = ftell(wordFile); if (fileSize < 1) { printf("Error reading file \"%s\"\n", fileName); fclose(wordFile); return 0; } rewind(wordFile); char *newWords = malloc(fileSize + 1); if (!newWords) { printf("Error allocating memory\n"); return 0; } // zoopathy: 308c081, 1121111 // zootypic: 308c104, 1121111 size_t size = fread(newWords, 1, fileSize, wordFile); newWords[fileSize] = '\0'; // Make sure last word is terminated if ((long)size != fileSize || ferror(wordFile)) { free(newWords); newWords = NULL; printf("Error reading file \"%s\"\n", fileName); fileSize = 0; } fclose(wordFile); *words = newWords; return fileSize + 1; } uint32_t insertAnagram(Anagrams* anagrams, WordHash* hash, uint32_t word) { uint32_t position = anagrams->nextFreeTree; if (position == anagrams->treeSize) { do { uint32_t newSize = anagrams->treeSize + anagrams->treeIncreaseRate; AnagramTree* newTree = realloc(anagrams->tree, newSize * sizeof *newTree); if (newTree) { anagrams->tree = newTree; anagrams->treeSize = newSize; break; } anagrams->treeIncreaseRate >>= 1; } while (anagrams->treeIncreaseRate); if (anagrams->treeIncreaseRate == 0) { printf("Error allocating more memory\n"); exit(EXIT_FAILURE); } } #if defined(DEBUG) printf("DEBUG: inserting %s (%X, %llX)\n", anagrams->wordList[word].word, hash->ordinals, hash->distributions); #endif AnagramTree* newTree = &anagrams->tree[position]; newTree->hash = *hash; newTree->lower = 0; newTree->higher = 0; newTree->firstWord = word; newTree->lastWord = word; anagrams->nextFreeTree++; return position; } void addAnagram(Anagrams* anagrams, WordHash* hash, char* word) { uint32_t wordIndex = anagrams->nextFreeWord; if (wordIndex == anagrams->wordListSize) { do { uint32_t newSize = anagrams->wordListSize + anagrams->wordListIncreaseRate; WordList* newList = realloc(anagrams->wordList, newSize * sizeof *newList); if (newList) { anagrams->wordList = newList; anagrams->wordListSize = newSize; break; } anagrams->wordListIncreaseRate >>= 1; } while (anagrams->wordListIncreaseRate); if (anagrams->wordListIncreaseRate == 0) { printf("Error allocating more memory\n"); exit(EXIT_FAILURE); } } WordList* wordEntry = &anagrams->wordList[wordIndex]; wordEntry->word = word; wordEntry->next = 0; anagrams->nextFreeWord++; if (anagrams->nextFreeTree == 1) { insertAnagram(anagrams, hash, wordIndex); return; } AnagramTree* tree = anagrams->tree; uint32_t position = 1; while (hash->ordinals != tree[position].hash.ordinals || hash->distributions != tree[position].hash.distributions) { if (hash->ordinals < tree[position].hash.ordinals) { uint32_t newPosition = tree[position].lower; if (newPosition == 0) { uint32_t node = insertAnagram(anagrams, hash, wordIndex); tree[position].lower = node; return; } position = newPosition; } else if (hash->ordinals > tree[position].hash.ordinals) { uint32_t newPosition = tree[position].higher; if (newPosition == 0) { uint32_t node = insertAnagram(anagrams, hash, wordIndex); tree[position].higher = node; return; } position = newPosition; } else { if (hash->distributions < tree[position].hash.distributions) { uint32_t newPosition = tree[position].lower; if (newPosition == 0) { uint32_t node = insertAnagram(anagrams, hash, wordIndex); tree[position].lower = node; return; } position = newPosition; } else { uint32_t newPosition = tree[position].higher; if (newPosition == 0) { uint32_t node = insertAnagram(anagrams, hash, wordIndex); tree[position].higher = node; return; } position = newPosition; } } } uint32_t lastWord = tree[position].lastWord; #if defined(DEBUG) printf("DEBUG: adding %s after %s\n", anagrams->wordList[wordIndex].word, anagrams->wordList[lastWord].word); #endif anagrams->wordList[lastWord].next = wordIndex; tree[position].lastWord = wordIndex; } void buildAnagramList(Anagrams* anagrams, char* dictionary, long dictionarySize) { for (int32_t position = 0; position < dictionarySize; position++) { char letter = dictionary[position]; if (letter < 'a' || letter > 'z') { while (dictionary[position] != '\n') { position++; } continue; } int32_t currentWord = position; WordHash hash; hash.ordinals = 0; hash.distributions = 0; uint8_t distribution[26]; memset(&distribution[0], 0, sizeof distribution); for (;;letter = dictionary[++position]) { if (letter >= 'a' && letter <= 'z') { int ordinal = letter - 'a'; hash.ordinals |= 1 << ordinal; distribution[ordinal]++; continue; } if (letter == '\n') { dictionary[position] = '\0'; condenseHash(&hash, &distribution[0]); addAnagram(anagrams, &hash, &dictionary[currentWord]); } else { while (letter != '\n') { position++; letter = dictionary[position]; } } break; } } } void displayAnagrams(Anagrams* anagrams) { WordList* words = anagrams->wordList; for (uint32_t index = 1; index < anagrams->nextFreeTree; index++) { AnagramTree* tree = &anagrams->tree[index]; if (tree->firstWord != tree->lastWord) { uint32_t index = tree->firstWord; WordList* word = &words[index]; fputs(word->word, stdout); fputs(": ", stdout); word = &words[word->next]; fputs(word->word, stdout); while (word->next) { word = &words[word->next]; fputs(", ", stdout); fputs(word->word, stdout); } fputs("\n", stdout); } } } int main(int argc, char *argv[]) { const char* file; if (argc >= 2) { file = argv[1]; } else { file = "/usr/share/dict/british-english-insane"; } char* dictionary = NULL; long fileSize = readWordFile(file, &dictionary); if (!fileSize) { exit(EXIT_FAILURE); } AnagramTree* initialTree = malloc(399360 * sizeof *initialTree); if (!initialTree) { printf("Error allocating memory\n"); exit(EXIT_FAILURE); } WordList* initialWordList = malloc(430080 * sizeof *initialWordList); if (!initialWordList) { printf("Error allocating memory\n"); exit(EXIT_FAILURE); } Anagrams anagrams = { .tree = initialTree, .treeSize = 399360, .treeIncreaseRate = 65536, .nextFreeTree = 1, .wordList = initialWordList, .wordListSize = 430080, .wordListIncreaseRate = 65536, .nextFreeWord = 1 }; buildAnagramList(&anagrams, dictionary, fileSize); displayAnagrams(&anagrams); free(anagrams.tree); free(anagrams.wordList); free(dictionary); exit(EXIT_SUCCESS); }