/* hanagram.c -- Find anagrams Written July 31, 2019 by Eric Olson This program demonstrates using letter counts and a heap to find anagrams in the insane British dictionary. */ #include #include #include #include #include #include #include #include #include #include static char *words,*cursor; static char *skipword(){ for(;*cursor;cursor++){ if(*cursor=='\n') return ++cursor; } return 0; } static char *getword(){ do { char *r=cursor; while(islower(*cursor)) cursor++; if(*cursor=='\n'){ *cursor++=0; return r; } } while(skipword()); return 0; } typedef struct { char v[27]; } keytype; typedef struct anatype { char *word; struct anatype *next; keytype key; } anatype; static keytype strkey(char *p){ keytype r; memset(&r,'0',26*sizeof(char)); r.v[26]=0; for(;*p;p++) r.v[*p-'a']++; return r; } static int anacmp(anatype *ap,anatype *bp){ int r=strcmp(ap->key.v,bp->key.v); if(r) return r; return strcmp(ap->word,bp->word); } static anatype **hp; int hpind=0; static void anaput(anatype *ap){ int r=++hpind,s; for(hp[r]=ap;(s=r/2);r=s){ if(anacmp(hp[r],hp[s])>=0) return; anatype *t=hp[s]; hp[s]=hp[r]; hp[r]=t; } } static anatype *anaget(void){ if(!hpind) return 0; anatype *ap=hp[1]; hp[1]=hp[hpind--]; for(int s=1;;){ int r0=2*s; if(r0>hpind) return ap; int r1=r0+1; if(r1<=hpind && anacmp(hp[r0],hp[r1])>=0){ if(anacmp(hp[r1],hp[s])>=0) return ap; anatype *t=hp[r1]; hp[r1]=hp[s]; hp[s]=t; s=r1; } else { if(anacmp(hp[r0],hp[s])>=0) return ap; anatype *t=hp[r0]; hp[r0]=hp[s]; hp[s]=t; s=r0; } } } static anatype *an; int anmax=0; int main(){ int fd=open("/usr/share/dict/british-english-insane",O_RDONLY); struct stat fdstat; fstat(fd,&fdstat); words=malloc(fdstat.st_size+1); read(fd,words,fdstat.st_size); words[fdstat.st_size]=0; close(fd); int lines=2; for(char *p=words;*p;p++) if(*p=='\n') lines++; an=malloc(lines*sizeof(anatype)); hp=malloc(lines*sizeof(anatype *)); hp--; char *wp; for(cursor=words;(wp=getword());anmax++){ an[anmax].word=wp; an[anmax].next=0; an[anmax].key=strkey(wp); anaput(an+anmax); } for(anatype *ap=anaget();ap;){ anatype *op=ap,*bp=ap; for(;(ap=anaget());bp=ap){ if(strcmp(ap->key.v,op->key.v)) break; bp->next=ap; } if(op!=bp) bp->next=op; } for(int i=0;inext) { fputs(s,stdout); s=", "; fputs(bp->word,stdout); anatype *op=bp; bp=bp->next; op->next=0; } fputc('\n',stdout); } } return 0; }