A Spelling Corrector in Prolog

A translation of the spell checker : http://www.norvig.com/spell-correct.html in Prolog:

Download: spell.pl . Computed probability model: probabilitymodel.pl

% words(+File,-Words)
%
%  reads file from the standard input and collects all the words (transforming them into lower case format)
%     Note: Words sequences of characters delimited by non-characters.
words(File,Words):-
  see(File),
  get_byte(Char), % reads the lookaH
  words_with_lookup(Char,Words),
  seen.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% train(ProbabilisticModel)
%
% trains a probability model for the set of words in the file 'big.txt'
train(ProbabilisticModel):-
  words('big.txt',Words),
  recursive_train(Words,[],ProbabilisticModel).

% recursive_train(+Words,+PartialProbabilisticModel,+ProbabilisticModel)
%
%  constructs the probability model as facts in the database:
%   probabilityModel(Word,Count)
recursive_train([],ProbabilisticModel,ProbabilisticModel):-
  !.
recursive_train([Word|T],PartialProbabilisticModel,ProbabilisticModel):-
  member(probabilityModel(Word,Count),PartialProbabilisticModel,RestProbabilisticModel),
  !,
  NewCount is Count+1,
  recursive_train(T,[probabilityModel(Word,NewCount)|RestProbabilisticModel],ProbabilisticModel).
recursive_train([Word|T],PartialProbabilityModel,ProbabilityModel):-
  recursive_train(T,[probabilityModel(Word,1)|PartialProbabilityModel],ProbabilityModel).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% edits1(+Word,-Set)
%
% computes the set of all words that are one edit away from Word
edits1(Word,Set):-
  deletion1(Word,DelSet),
  transposition1(Word,TransSet),
  append(DelSet,TransSet,Temp1),
  alteration1(Word,AltSet),
  insertion1(Word,InsSet),
  append(AltSet,InsSet,Temp2),
  append(Temp1,Temp2,Set).

% deletion1(+Word,-DelSet)
%
%  computes the set of all words that are one delete away from Word
deletion1(Word,DelSet):-
  findall(RestWord,member(_,Word,RestWord),DelSet).

% transposition1(+Word,-TransSet)
%
%  computes the set of all words that are one transposition away from Word
%  uses difference lists
transposition1(Word,TransSet):-
  findall(Transposition,transposition(Word,Transposition),TransSet).

% transposition(+Word,-Transpozition)
%
%  computes a transposition of a word
transposition([],[]).
transposition([H],[H]).
transposition([H1,H2|T],[H2,H1|T]).
transposition([H|T],[H|T2]):-
  transposition(T,T2).

% alteration1(+Word,-AltSet)
%
%  computes the set of all words that are one alteration away from Word
alteration1(Word,AltSet):-
  findall(Alternation,alteration(Word,Alternation),AltSet).

% alteration(+Word,-Alternation)
%
%  computes an alteration of a word
alteration([_|T],[C|T]):-
  character(C).
alteration([H|T1],[H|T2]):-
  alteration(T1,T2).

% insertion1(+Word,-InsSet)
%
%  computes the set of all words that are one insert away from Word
insertion1(Word,InsSet):-
  findall(Insertion,insertion(Word,Insertion),InsSet).

% insertion(+Word,-Insertion)
%
%  computes an insertion of a word
insertion(L,[C|L]):-
  character(C).
insertion([H|T1],[H|T2]):-
  insertion(T1,T2).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% edits2(+Word,-Set)
%
% computes the set of all words that are two edit away from Word
edits2(Word,Set):-
  edits1(Word,E1Set),
  edits2_iterate(E1Set,[],Set).
% edits2_iterate(E1Set,PartialSet,Set)
edits2_iterate([],Set,Set):-
  !.
edits2_iterate([H|T],PartialSet,Set):-
  edits1(H,HE1Set),
  append(HE1Set,PartialSet,NewPartialSet),
  edits2_iterate(T,NewPartialSet,Set),
  !.

% known_edits2(+Word,-Set)
%
% computes the set of known all words that are two edit away from Word
known_edits2(Word,Set):-
  edits1(Word,E1Set),
  known_edits2_iterate(E1Set,[],Set).
% known_edits2_iterate(E1Set,PartialSet,Set)
known_edits2_iterate([],Set,Set):-
  !.
known_edits2_iterate([H|T],PartialSet,Set):-
  edits1(H,HE1Set),
  known(HE1Set,KnownHE1Set),
  append(KnownHE1Set,PartialSet,NewPartialSet),
  known_edits2_iterate(T,NewPartialSet,Set),
  !.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% known(+Words,-KnownWords)
%
% computes the set of known words
known([],[]).
known([H|T],[H|T2]):-
  probabilityModel(H,_),
  !,
  known(T,T2).
known([_H|T],T2):-
  known(T,T2).

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% correct(+Word,-CorrectWord)
%
%  finds the correct word for the variable Word
correct(Word,Word):-
  known([Word],[Word]),
  !.
correct(Word,CorrectWord):-
  edits1(Word,E1Set),
  known(E1Set,KE1Set),
  KE1Set\=[],
  !,
  max(KE1Set,CorrectWord).

Examples:

17 ?- correct("surfac",S),writeString(S).
surface
S = [115, 117, 114, 102, 97, 99, 101]
18 ?- correct("stuc",S),writeString(S).
stuck
S = [115, 116, 117, 99, 107]
Paul Fodor , Stony Brook University, Date: 09/10/2007