R Solution for Excel Puzzles

Numbers around us
Numbers around us
Published in
6 min readNov 27, 2023

Week 47 — Puzzles no. 329–333

Puzzles

Author: ExcelBI

Puzzles:
# 329: content file
# 330: content file
# 331: content file
# 332: content file
# 333: content file

Lets dive into solutions!

Puzzle 329

In this puzzle we were asked to generate first 50 elements of Iccanobif sequence. Wait what? It is Fibonacci backwords… and that is exactly our goal to generate Fibonacci like sequence with one constraint: next element is a sum of previous two but with digits in reversed order.
As it looks hard, it really isn’t. And this solution is so short in base R, that translating it to another syntax would be just over complicating.

Loading data and libraries

library(tidyverse)
library(readxl)
library(stringi)

test = read_excel("Iccanobif Numbers.xlsx", range = "A2:A51", col_names = FALSE) %>% pull()

Approach: Base R

reverse_digits <- function(n) {
as.numeric(stri_reverse(as.character(n)))
}

generate_iccanobif <- function(N) {
iccanobif <- c(0, 1)
for (i in 3:N) {
next_term <- reverse_digits(iccanobif[i - 1]) + reverse_digits(iccanobif[i - 2])
iccanobif <- c(iccanobif, next_term)
}
iccanobif
}

iccanobif_50 <- generate_iccanobif(50)

Validation

identical(iccanobif_50, test)
#> [1] TRUE

Puzzle 330

In puzzle #330 we have sentences in table and our goal is to return matrix populated rowwise containing only words that were longer than average word in original sentence.

Load data and libraries

library(tidyverse)
library(tidytext)
library(readxl)
library(data.table)

input = read_excel("Average Word Length.xlsx", range= "A1:A10")
test = read_excel("Average Word Length.xlsx", range = "B2:D5", col_names = F)
colnames(test) = c("1", "2", "3")

Approach 1: tidyverse

result = input %>%
mutate(number = row_number()) %>%
unnest_tokens(word, Books) %>%
group_by(number) %>%
mutate(word_len = nchar(word)) %>%
reframe(number, word, word_len, avg_len = mean(word_len)) %>%
ungroup() %>%
filter(word_len > avg_len) %>%
select(word) %>%
mutate(group = rep(1:4, each = 3)) %>%
group_by(group) %>%
mutate(row = row_number()) %>%
pivot_wider(names_from = row, values_from = word) %>%
ungroup() %>%
select(-group) %>%
mutate(across(everything(), ~ str_to_title(.x)))

Approach 2: data.table

input_dt <- setDT(copy(input))

input_dt[, number := .I]
input_dt_long <- input_dt[, .(word = unlist(strsplit(Books, " "))), by = number]
input_dt_long[, word_len := nchar(word)]
input_dt_long[, avg_len := mean(word_len), by = number]

input_dt_filtered <- input_dt_long[word_len > avg_len, .(word, number)]
input_dt_filtered[, group := rep(1:4, each = 3, length.out = .N)]
input_dt_filtered[, row := rep(1:3, times = 4, length.out = .N)]
wider_dt <- dcast(input_dt_filtered, group ~ row, value.var = "word")
setnames(wider_dt, old = names(wider_dt), new = c("group", "1", "2", "3"))

wider_dt[, group := NULL]

Approach 3: base R

input_base <- input
input_base$number = seq_along(input_base$Books)
words_list_base = strsplit(input_base$Books, " ")
words_df_base = setNames(data.frame(do.call(rbind, lapply(words_list_base, function(x) data.frame(word = x))), row.names = NULL), c("word"))
words_df_base$number = rep(input_base$number, sapply(words_list_base, length))
words_df_base$word_len = nchar(as.character(words_df_base$word))
words_df_base$avg_len = ave(words_df_base$word_len, words_df_base$number, FUN = mean)
filtered_df_base = subset(words_df_base, word_len > avg_len, select = c("word", "number"))
filtered_df_base$group = rep(1:4, each = 3, length.out = nrow(filtered_df_base))
mat = matrix(filtered_df_base$word, nrow = 4, ncol = 3, byrow = TRUE)
wider_df_base = as_tibble(as.data.frame(mat))
colnames(wider_df_base) = c("1", "2", "3")
wider_df_base[] = lapply(wider_df_base, function(x) if(is.character(x)) tools::toTitleCase(x) else x)

Validation

identical(test, result)
#> [1] TRUE

identical(test, wider_dt)
#> [1] TRUE

identical(test, wider_df_base)
#> [1] TRUE

Puzzle 331

In this puzzle we were asked to find which of given numbers can be represented as sum of two cubes.

Load data and libraries

library(tidyverse)
library(readxl)
library(data.table)

input = read_excel("Sum of Cube of Two Numbers.xlsx", range = "A1:A10")
test = read_excel("Sum of Cube of Two Numbers.xlsx",
range = "B2:D6",
col_names = c("Number", "Factor1", "Factor2"))

Approach 1: tidyverse

check_if_sum_of_cubes = function(number) {
x = floor(number^(1/3))

range = data.frame(Number = number, Factor1 = 1:x) %>%
mutate(diff = number - Factor1^3,
is_cube = round(diff^(1/3))^3 == diff) %>%
filter(is_cube) %>%
mutate(Factor2 = diff^(1/3)) %>%
slice(1) %>%
select(Number, Factor1, Factor2)
return(range)
}

result = map_dfr(input$Number, check_if_sum_of_cubes) %>% as_tibble()

Approach 2: data.table

check_if_sum_of_cubes_dt <- function(number) {
x <- floor(number^(1/3))

DT <- CJ(Factor1 = 1:x, Number = number)
DT[, `:=`(diff = Number - Factor1^3)]
DT[, `:=`(is_cube = round((diff)^(1/3))^3 == diff,
Factor2 = (diff)^(1/3))]

result <- DT[is_cube == TRUE, .(Number, Factor1, Factor2)]
result <- result[order(Factor1)][1]

return(result)
}

input_numbers_dt <- as.vector(input$Number)
result_dt <- rbindlist(lapply(input_numbers_dt, check_if_sum_of_cubes_dt))
result_dt <- as.data.frame(result_dt) %>% drop_na()

Approach 3: base R

check_if_sum_of_cubes_base <- function(number) {
x <- floor(number^(1/3))

df <- expand.grid(Factor1 = 1:x, Number = number)
df$diff <- df$Number - df$Factor1^3
df$is_cube <- round(df$diff^(1/3))^3 == df$diff
df$Factor2 <- df$diff^(1/3)

result <- df[df$is_cube, ]
result <- result[order(result$Factor1), ]
result <- result[1, c("Number", "Factor1", "Factor2")]

return(result)
}

input_numbers <- as.vector(input$Number)
result_base <- do.call("rbind", lapply(input_numbers, check_if_sum_of_cubes_base))
result_base <- as.data.frame(result_base) %>% drop_na()

Validation

Sometimes aligning all structures to be identical generate to much code. Then I we can also check it visually.

> test
# A tibble: 5 × 3
Number Factor1 Factor2
<dbl> <dbl> <dbl>
1 35 2 3
2 855 7 8
3 3744 10 14
4 300827 4 67
5 90000576 44.0 448

> result
# A tibble: 5 × 3
Number Factor1 Factor2
<dbl> <int> <dbl>
1 35 2 3
2 855 7 8
3 3744 10 14
4 300827 4 67
5 90000576 44 448

> result_base
Number Factor1 Factor2
2 35 2 3
7 855 7 8
10 3744 10 14
4 300827 4 67
44 90000576 44 448

> result_dt
Number Factor1 Factor2
1 35 2 3
2 855 7 8
3 3744 10 14
4 300827 4 67
5 90000576 44 448

Puzzle 332

One of the shortest solutions which I solved so far, but it is linked to the fact that there were new functions introduced in MS Excel. We had to find maximal and minimal value per group.

Load data and libraries

library(tidyverse)
library(readxl)
library(data.table)

input = read_excel("Max Min.xlsx", range = "A1:B20")
test = read_excel("Max Min.xlsx", range = "D2:F6")

Approach 1: tidyverse

result = input %>%
group_by(Zone) %>%
summarise(Max = max(Sales), Min = min(Sales))

Approach 2: data.table

input_dt = setDT(input)

result_dt = input_dt[, .(Max = max(Sales), Min = min(Sales)), by = Zone][order(Zone)]
result_dt = as_tibble(result_dt)

Approach 3: base R

result_base <- do.call(data.frame, aggregate(Sales ~ Zone, data = input, FUN = function(x) c(Max = max(x), Min = min(x))))
names(result_base) <- c("Zone","Max", "Min")

Validation

identical(test, result)
#> [1] TRUE

identical(test, result_dt)
#> [1] TRUE

identical(test, as_tibble(result_base))
#> [1] TRUE

Puzzle 333

As we all know words consists of letters, and in many words letters repeat itself. And that is the topic in this puzzle. We have words and we need to cut them… before second occurence of first repeating letter (bull becomes bul, excel becomes exc and so on).

Load data and libraries

library(tidyverse)
library(readxl)
library(data.table)

input = read_excel("Extract String Before a Repeated Character.xlsx", range = "A1:A10")
test = read_excel("Extract String Before a Repeated Character.xlsx", range = "B1:B10")

Approach 1: tidyverse

result = input %>%
mutate(lett = str_split(str_to_lower(String), pattern = ""),
reps = map(lett, ~ duplicated(.x)),
first_reps = map_int(reps, ~ which(.x)[1]),
`Answer Expected` = ifelse(!is.na(first_reps),
str_sub(String, 1, first_reps-1),
String)) %>%
select(`Answer Expected`)

Approach 2: data.table

input_dt <- as.data.table(input)

input_dt[, lett := strsplit(tolower(String), "")]

input_dt[, reps := lapply(lett, function(x) duplicated(x))]
input_dt[, first_reps := sapply(reps, function(x) { pos <- which(x)[1]; if (length(pos) > 0) pos else NA })]
input_dt[, `Answer Expected` := ifelse(!is.na(first_reps), substr(String, 1, first_reps - 1), String)]

result_dt <- input_dt$`Answer Expected`

Approach 3: base R

input_b <- input
input_b$lett <- strsplit(tolower(input_b$String), "")
input_b$reps <- lapply(input_b$lett, function(x) duplicated(x))
input_b$first_reps <- sapply(input_b$reps, function(x) { pos <- which(x)[1]; if (length(pos) > 0) pos else NA })
input_b$`Answer Expected` <- ifelse(!is.na(input_b$first_reps), substr(input_b$String, 1, input_b$first_reps - 1), input_b$String)
result_b <- input_b$`Answer Expected`

Validation

identical(result, test)
#> [1] TRUE

identical(result_dt, test$`Answer Expected`)
#> [1] TRUE

identical(result_b, test$`Answer Expected`)
#> [1] TRUE

Conclussion

Feel free to comment, share and contact me with advices, questions and your ideas how to improve anything.

--

--

Numbers around us
Numbers around us

Self developed analyst. BI Developer, R programmer. Delivers what you need, not what you asked for.