%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % Spell checker % % Author: Paul Fodor (original Python version by: Peter Norvig (http://www.norvig.com/spell-correct.html)) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% :-dynamic(probabilityModel/2). ?- load_dyn(probabilityModel). % words(+File,-Words) % % reads file from the standard input and collects all the words (transforming them into lower case format) % Note: Words sequences of characters delimited by non-characters. words(File,Words):- see(File), get_code(Char), % reads the lookaH words_with_lookup(Char,Words), seen. % words_with_lookup(+Character,-Words) % EOF ends the input. words_with_lookup(-1,[]):- !. % A letter starts a word words_with_lookup(Char,[Word|Words]):- ( (Char>64,Char<91); % A-Z (Char>96,Char<123) % a-z ), !, read_word(Char,Word,NextLookup), % get the word words_with_lookup(NextLookup,Words). % Everything else is ignored words_with_lookup(_,Words):- !, get_code(Char), words_with_lookup(Char, Words). % read_word(+Char,-Word,-NextLookup) % % reads a list of characters from standard input % A letter is added to the list read_word(Char,[LowerChar|Chars],Last):- ( (Char>64,Char<91,LowerChar is Char+32); % A-Z (Char>96,Char<123,LowerChar is Char) % a-z ), !, get_code(NextLookup), read_word(NextLookup,Chars,Last). % A non-character ends a word read_word(NC,[],NC). % train(ProbabilisticModel) % % trains a probability model for the set of words in the file 'big.txt' train(ProbabilisticModel):- words('big.txt',Words), recursive_train(Words,[],ProbabilisticModel). % recursive_train(+Words,+PartialProbabilisticModel,+ProbabilisticModel) % % constructs the probability model as facts in the database: % probabilityModel(Word,Count) recursive_train([],ProbabilisticModel,ProbabilisticModel):- !. recursive_train([Word|T],PartialProbabilisticModel,ProbabilisticModel):- member(probabilityModel(Word,Count),PartialProbabilisticModel,RestProbabilisticModel), !, NewCount is Count+1, recursive_train(T,[probabilityModel(Word,NewCount)|RestProbabilisticModel],ProbabilisticModel). recursive_train([Word|T],PartialProbabilityModel,ProbabilityModel):- recursive_train(T,[probabilityModel(Word,1)|PartialProbabilityModel],ProbabilityModel). % outputProbabilityModel(+ProbabilityModel) % % writes the probability model into a file, so we don't have to compute it every time % Example: train(ProbabilityModel),outputProbabilityModel(ProbabilityModel). outputProbabilityModel(ProbabilityModel):- tell('probabilitymodel.pl'), outputProbabilityModelWrite(ProbabilityModel), told. outputProbabilityModelWrite([]):- !. outputProbabilityModelWrite([probabilityModel(Word,Count)|T]):- write('probabilityModel(['), writeList(Word), write('],'), write(Count), write(').'), nl, outputProbabilityModelWrite(T). % writeList(L) % % writes a list writeList([]):- !. writeList([H]):- !, write(H). writeList([H|T]):- !, write(H), write(','), writeList(T). % member(+X,+L,-RestL) % % checks is an element X is a member of a list L and returns the rest of the list L member(H,[H|T],T). member(H1,[H2|T],[H2|T2]):- member(H1,T,T2). % edits1(+Word,-Set) % % computes the set of all words that are one edit away from Word edits1(Word,Set):- deletion1(Word,DelSet), transposition1(Word,TransSet), append(DelSet,TransSet,Temp1), alteration1(Word,AltSet), insertion1(Word,InsSet), append(AltSet,InsSet,Temp2), append(Temp1,Temp2,Set). % deletion1(+Word,-DelSet) % % computes the set of all words that are one delete away from Word deletion1(Word,DelSet):- findall(RestWord,member(_,Word,RestWord),DelSet). % transposition1(+Word,-TransSet) % % computes the set of all words that are one transposition away from Word % uses difference lists transposition1(Word,TransSet):- findall(Transposition,transposition(Word,Transposition),TransSet). % transposition(+Word,-Transpozition) % % computes a transposition of a word transposition([],[]). transposition([H],[H]). transposition([H1,H2|T],[H2,H1|T]). transposition([H|T],[H|T2]):- transposition(T,T2). % alteration1(+Word,-AltSet) % % computes the set of all words that are one alteration away from Word alteration1(Word,AltSet):- findall(Alternation,alteration(Word,Alternation),AltSet). % alteration(+Word,-Alternation) % % computes an alteration of a word alteration([_|T],[C|T]):- character(C). alteration([H|T1],[H|T2]):- alteration(T1,T2). % character(-C) % % returns a character in the alphabet character(C):- member(C,[97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122]). % insertion1(+Word,-InsSet) % % computes the set of all words that are one insert away from Word insertion1(Word,InsSet):- findall(Insertion,insertion(Word,Insertion),InsSet). % insertion(+Word,-Insertion) % % computes an insertion of a word insertion(L,[C|L]):- character(C). insertion([H|T1],[H|T2]):- insertion(T1,T2). % edits2(+Word,-Set) % % computes the set of all words that are two edit away from Word edits2(Word,Set):- edits1(Word,E1Set), edits2_iterate(E1Set,[],Set). % edits2_iterate(E1Set,PartialSet,Set) edits2_iterate([],Set,Set):- !. edits2_iterate([H|T],PartialSet,Set):- edits1(H,HE1Set), append(HE1Set,PartialSet,NewPartialSet), edits2_iterate(T,NewPartialSet,Set), !. % known_edits2(+Word,-Set) % % computes the set of known all words that are two edit away from Word known_edits2(Word,Set):- edits1(Word,E1Set), known_edits2_iterate(E1Set,[],Set). % known_edits2_iterate(E1Set,PartialSet,Set) known_edits2_iterate([],Set,Set):- !. known_edits2_iterate([H|T],PartialSet,Set):- edits1(H,HE1Set), known(HE1Set,KnownHE1Set), append(KnownHE1Set,PartialSet,NewPartialSet), known_edits2_iterate(T,NewPartialSet,Set), !. % known(+Words,-KnownWords) % % computes the set of known words known([],[]). known([H|T],[H|T2]):- probabilityModel(H,_), !, known(T,T2). known([_H|T],T2):- known(T,T2). % correct(+Word,-CorrectWord) % % finds the correct word for the variable Word correct(Word,Word):- known([Word],[Word]), !. correct(Word,CorrectWord):- edits1(Word,E1Set), known(E1Set,KE1Set), KE1Set\=[], !, max(KE1Set,CorrectWord). correct(Word,CorrectWord):- known_edits2(Word,E2Set), E2Set\=[], max(E2Set,CorrectWord). % max(+Set,-CorrectWord) % % selects the maximum from the probability model max(Set,CorrectWord):- max(Set,0,[],CorrectWord). % max(+Set,+TempMax,+TempCorrectWord,-CorrectWord) max([],_TempMax,CorrectWord,CorrectWord):- !. max([H|T],TempMax,_TempCorrectWord,CorrectWord):- probabilityModel(H,N), N>TempMax, !, max(T,N,H,CorrectWord). max([_H|T],TempMax,TempCorrectWord,CorrectWord):- max(T,TempMax,TempCorrectWord,CorrectWord). append([],L,L). append([X|L1],L2,[X|L12]):- append(L1,L2,L12). writeString(L):- atom_codes(A,L), write(A). member(H,[H|_T]). member(H,[_H2|T]):- member(H,T).