// // insane-british-anagram.cpp - Find words that have valid anagrams // Words sourced from Debian's british-english-insane dictionary // // heater - 2019-07-25 // #include #include #include #include #include #include std::string getFileContents(const char *filename) { std::ifstream in(filename, std::ios::in | std::ios::binary); if (in) { std::string contents; in.seekg(0, std::ios::end); contents.resize(in.tellg()); in.seekg(0, std::ios::beg); in.read(&contents[0], contents.size()); in.close(); return(contents); } throw(std::runtime_error("Shit happened!")); } // One prime number for each lower case letter of the alphabet int primes[26] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101}; uint64_t primeHash (char* word) { uint64_t hash = 1; for (size_t i = 0; i < strlen(word); i++) { int index = word[i] - 97; hash = hash * primes[index]; } return hash; } int main (int argc, char* argv[]) { // Map container for sets of anagrams // An anagram set is simply a vector of pointers to words in the dictionary // Keys for the anagram sets in the map are the ordered characters of the words. // std::unordered_map >anagramMap; // std::unordered_map>::iterator it; std::unordered_map >anagramMap; std::unordered_map>::iterator it; // An ordered index of anagram set keys std::vector index; auto dictionary = getFileContents("/usr/share/dict/british-english-insane"); char* dictionaryPtr = (char*)dictionary.c_str(); char* wordPtr = dictionaryPtr; char* charPtr = wordPtr; bool reject = false; while (1) { if (islower(*charPtr)) { // We are scanning a valid word charPtr++; } else if ((*charPtr) == '\n') { // We have hit the end of a word, use the word if it's valid *charPtr = 0; if (!reject) { // Do we have a word with this key (potential anagram)? uint64_t key = primeHash(wordPtr); it = anagramMap.find(key); if (it == anagramMap.end()) { // No: Add it to the map as start of new anagram set. anagramMap[key].push_back(wordPtr); // And add the new anagram set to index index.push_back(key); } else { // Yes: Append it to the existing anagram set. it->second.push_back(wordPtr); } } charPtr++; wordPtr = charPtr; reject = false; } else if ((*charPtr) != 0) { // Invalid character reject = true; charPtr++; } else { // End of dictionary break; } } // Iterate over the collected anagram sets in order of the index. for(auto const& key: index) { it = anagramMap.find(key); if (it->second.size() > 1) { int count = 0; for(const auto& value: it->second) { if (count == 1) { fputs(": ", stdout); } else if (count > 1) { fputs(", ", stdout); } fputs(value, stdout); count++; } fputs("\n", stdout); } } return (0); }