Word Prediction using a Shiny Webapp

Feng Li
2 min readMay 19, 2022

--

Toronto, ON, Sep 6 2021

We have built some bigram and trigram data model/data frame in this post. So we can further predict next word based on the input. This simple webapp is hosted by Shiny framework.

Front end:

library(shiny)

# Define UI for application
shinyUI(fluidPage(

# Application title
titlePanel("Words Prediction!"),

# Sidebar with a slider input for number of bins
sidebarLayout(
sidebarPanel(
h3("Brief Readme"),
tags$div(class="header", checked=NA,
tags$p("Input text in below text field and click \"Predict\" button or press \"Enter\", the mostlikely next word
will be displayed at right panel. For example, input \"happy\", \"birthday\" will be
predicted. Input \"let us\", \"know\" will be predicted.")
),
h3("Input Text (English)"),
textInput("inputWords",
"Please input:"),
submitButton("Predict")
),

# Show a plot of the generated distribution
mainPanel(
textOutput("value")
)
)
))

Server side:

library(shiny)
library(stringr)
library(stringi)
library(tm)
# Load trigram/bigram dataframes
trigram.df <- readRDS(file="./data/trigram.RDS")
bigram.df <- readRDS(file="./data/bigram.RDS")
suppressMessages(attach(trigram.df))
suppressMessages(attach(bigram.df))

# Define server logic required to draw a histogram
shinyServer(function(input, output) {
wordPredict <- reactive({
text2 <- input$inputWords

# input text cleaning
cleanText <- tolower(text2)
cleanText <- removePunctuation(cleanText)
cleanText <- removeNumbers(cleanText)
cleanText <- str_replace_all(cleanText, "[^[:alnum:]]", " ")
cleanText <- stripWhitespace(cleanText)

if (!is.null(cleanText)) {
text.length <- stri_count(cleanText,regex="\\S+")

if (text.length > 2) {
#text <- "NA"
text.new <- unlist(strsplit(cleanText, split=" "))
text.length2 <- length(text.new)
text.last.two <- paste(text.new[text.length2-1], text.new[text.length2], sep=" ")
text.last <- text.new[text.length2]

result <- subset(trigram.df, X==text.last.two, select="Y")
result2 <- as.character(result[1,])
if(is.na(result2)) {
result <- subset(bigram.df, X==text.last, select="Y")
result2 <- as.character(result[1,])
} else {
result2
}
} else if (text.length == 2) {
result <- subset(trigram.df, X==cleanText, select="Y")
result2 <- as.character(result[1,])
if(is.na(result2)) {
text.new <- unlist(strsplit(cleanText, split=" "))
text.length2 <- length(text.new)
text.last <- text.new[text.length2]

result <- subset(bigram.df, X==text.last, select="Y")
result2 <- as.character(result[1,])
} else {
result2
}
} else if (text.length == 1) {
result <- subset(bigram.df, X==cleanText, select="Y")
result2 <- as.character(result[1,])
} else {
text <- "NA"
}
} else {
text <- "NA"
}
})

output$value <- renderText({
text <- wordPredict()
})

})

UI looks like following

Happy Reading!

--

--

Feng Li

Software Engineer, playing with Snowflake, AWS and Azure. Snowflake Data Superhero. Jogger, Hiker.