How to Write a Spelling Corrector

# Spelling corrector in R
# Claudio Sacchettini
#
# translated from
# How to Write a Spelling Corrector (Peter Norvig)
# http://norvig.com/spell-correct.html


words <- function(text) strsplit(tolower(text),'[^a-z]+')[[1]]

train <- function(features) tapply(features, features, length)

con <- file("big.txt", "r")
NWORDS = train(words(readChar(con,10000000)))
close(con)

alphabet = "abcdefghijklmnopqrstuvwxyz"

edits1 <- function(word) {
  a <- vector()
  b <- vector()
  for (i in 0:nchar(word)) {a[i+1] <- substring(word,1,i)
                            b[i+1] <- substring(word,i+1,nchar(word))}
  c <- unlist(strsplit(alphabet, NULL))
  deletes <- paste(a[b!=""],substring(b[b!=""],2), sep="")
  transposes <- paste(a, substring(b[length(b)>1],2,2), substring(b[length(b)>1],1,1), substring(b[length(b)>1],3), sep="")
  replaces <- paste(rep(a[b!=""],each=nchar(alphabet)), rep(c,nchar(word)), rep(substring(b[b!=""],2),each=nchar(alphabet)), sep="")
  inserts <- paste(rep(a,each=nchar(alphabet)), rep(c,nchar(word)), rep(b,each=nchar(alphabet)), sep="")
  return(unique(c(deletes, transposes, replaces, inserts)))
  }

known_edits2 <- function(word) {
  e2 <- vector()
  for (e1 in 1:length(edits1(word))) {
  e2 <- c(e2, edits1(edits1(word)[e1]))
  }
  return(unique(e2[e2 %in% names(NWORDS)]))
  }

known <- function(words) words[words %in% names(NWORDS)]

correction <- function(word) {
  candidates <- if(length(known(word))>0) known(word) else (if(length(known(edits1(word)))>0) known(edits1(word)) else (if(length(known_edits2(word))>0) known_edits2(word) else word))
  return(if (length(candidates)==1 & candidates[1]==word) candidates else names(which.max(NWORDS[names(NWORDS) %in% candidates])))
  }