// // insane-british-anagram-gmp.cpp - Find words that have valid anagrams // Words sourced from Debian's british-english-insane dictionary // // heater - 2019-07-28 // #include #include #include #include #include #include #include #include #include // GMP does not have a hash function so we create one for it. struct GMPHash { std::size_t operator()(const mpz_class& k) const { const std::string s = k.get_str (10); return std::hash()(s); } }; std::string getFileContents(const char *filename) { std::ifstream in(filename, std::ios::in | std::ios::binary); if (in) { std::string contents; in.seekg(0, std::ios::end); contents.resize(in.tellg()); in.seekg(0, std::ios::beg); in.read(&contents[0], contents.size()); in.close(); return(contents); } throw(std::runtime_error("Shit happened!")); } mpz_class primeHash (const char* const word) { // One prime number for each lower case letter of the alphabet std::vector primes { 7, 61, 29, 41, 2, 67, 53, 47, 3, 101, 73, 23, 43, 11, 13, 37, 97, 17, 5, 19, 31, 71, 79, 83, 59, 89 }; mpz_class hash(1); const size_t len = strlen(word); for (size_t i = 0; i < len; i++) { int index = word[i] - 97; hash = hash * primes[index]; } return hash; } int main (int argc, char* argv[]) { // Map container for sets of anagrams // An anagram set is simply a vector of pointers to words in the dictionary // Keys for the anagram sets in the map are the ordered characters of the words. std::unordered_map, GMPHash> anagramMap; // An ordered index of anagram set keys std::vector index; auto dictionary = getFileContents("/usr/share/dict/british-english-insane"); char* dictionaryPtr = (char*)dictionary.c_str(); char* wordPtr = dictionaryPtr; char* charPtr = wordPtr; bool reject = false; while (1) { if (islower(*charPtr)) { // We are scanning a valid word charPtr++; } else if ((*charPtr) == '\n') { // We have hit the end of a word, use the word if it's valid *charPtr = 0; if (!reject) { // Do we have a word with this key (potential anagram)? mpz_class key = primeHash(wordPtr); auto it = anagramMap.find(key); if (it == anagramMap.end()) { // No: Add it to the map as start of new anagram set. anagramMap[key].push_back(wordPtr); // And add the new anagram set to index index.push_back(key); } else { // Yes: Append it to the existing anagram set. it->second.push_back(wordPtr); } } charPtr++; wordPtr = charPtr; reject = false; } else if ((*charPtr) != 0) { // Invalid character reject = true; charPtr++; } else { // End of dictionary break; } } // Iterate over the collected anagram sets in order of the index. for(auto const& key: index) { auto it = anagramMap.find(key); if (it->second.size() > 1) { int count = 0; for(const auto& value: it->second) { if (count == 1) { fputs(": ", stdout); } else if (count > 1) { fputs(", ", stdout); } fputs(value, stdout); count++; } fputs("\n", stdout); } } return (0); }