Module 1: Interactive Explorers
PLS 206 — Applied Multivariate Modeling
Use these interactive tools to build intuition for core statistical concepts. Adjust the controls and watch results update live — no R installation needed.
Distribution Explorer
Choose a distribution family, adjust its parameters, apply a transform, and see the histogram, boxplot, and Q–Q plot update together. The normal overlay shows whether the data (or its transform) is approximately normal.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 920
library(shiny)
ui <- fluidPage(
sidebarLayout(
sidebarPanel(
selectInput("dist", "Distribution",
choices = c("Normal" = "norm",
"Uniform" = "unif",
"Exponential" = "exp",
"Log-normal" = "lnorm",
"t (heavy tails)" = "t")),
conditionalPanel("input.dist == 'norm'",
sliderInput("norm_mean", "Mean (μ)", -10, 10, 0, 0.5),
sliderInput("norm_sd", "Std Dev (σ)", 0.1, 5, 1, 0.1)
),
conditionalPanel("input.dist == 'unif'",
sliderInput("unif_min", "Min", -10, 10, 0, 0.5),
sliderInput("unif_max", "Max", 0, 20, 5, 0.5)
),
conditionalPanel("input.dist == 'exp'",
sliderInput("exp_rate", "Rate (λ = 1/mean)", 0.1, 5, 1, 0.1)
),
conditionalPanel("input.dist == 'lnorm'",
sliderInput("ln_meanlog", "Mean of log(x)", -2, 3, 1, 0.1),
sliderInput("ln_sdlog", "SD of log(x)", 0.1, 2, 0.5, 0.1)
),
conditionalPanel("input.dist == 't'",
sliderInput("t_df", "Degrees of freedom", 1, 30, 3, 1)
),
sliderInput("n", "Sample size (n)", 20, 1000, 200, 20),
hr(),
selectInput("transform", "Transform x",
choices = c("x (no transform)" = "none",
"log(x)" = "log",
"sqrt(x)" = "sqrt",
"x^2" = "sq",
"1/x" = "inv")),
hr(),
checkboxInput("show_code", "Show R code", FALSE),
helpText("Try: Exponential + log(x) — watch the Q–Q plot straighten out.")
),
mainPanel(
plotOutput("distPlot", height = "530px"),
verbatimTextOutput("stats"),
conditionalPanel("input.show_code == true",
verbatimTextOutput("code_out")
)
)
)
)
server <- function(input, output) {
raw_data <- reactive({
n <- input$n
switch(input$dist,
norm = rnorm(n, input$norm_mean, input$norm_sd),
unif = runif(n, min(input$unif_min, input$unif_max - 0.1),
max(input$unif_max, input$unif_min + 0.1)),
exp = rexp(n, input$exp_rate),
lnorm = rlnorm(n, input$ln_meanlog, input$ln_sdlog),
t = rt(n, input$t_df)
)
})
plot_data <- reactive({
x <- raw_data()
switch(input$transform,
none = x,
log = { v <- x[x > 0]; log(v) },
sqrt = { v <- x[x >= 0]; sqrt(v) },
sq = x^2,
inv = { v <- x[x != 0]; 1/v }
)
})
x_label <- reactive({
switch(input$transform,
none = "x", log = "log(x)", sqrt = "sqrt(x)", sq = "x\u00b2", inv = "1/x"
)
})
output$distPlot <- renderPlot({
x <- plot_data()
lbl <- x_label()
if (length(x) < 5) {
plot.new()
text(0.5, 0.5,
"Too few valid values after transform.\nTry a different distribution or transform.",
cex = 1.2)
return(invisible(NULL))
}
layout(matrix(1:3, ncol = 1), heights = c(1, 2.5, 2.5))
# 1. Boxplot
par(mar = c(1, 4, 2, 1))
boxplot(x, horizontal = TRUE,
col = adjustcolor("steelblue", 0.45), border = "#2c5f8a",
xaxt = "n",
main = if (input$transform != "none")
paste0(input$dist, " \u2192 transform: ", lbl)
else input$dist)
# 2. Histogram + normal overlay
par(mar = c(3, 4, 0.5, 1))
h <- hist(x, plot = FALSE, breaks = 30)
ymax <- max(h$density) * 1.35
hist(x, freq = FALSE, breaks = 30,
col = adjustcolor("red", 0.45), border = "white",
ylim = c(0, ymax), xlab = lbl, ylab = "Density", main = "")
mu <- mean(x); sg <- sd(x)
xs <- seq(min(x) - 0.1 * diff(range(x)),
max(x) + 0.1 * diff(range(x)), length.out = 300)
lines(xs, dnorm(xs, mu, sg), col = "#002855", lwd = 2)
abline(v = mu, col = "orange", lty = 2, lwd = 1.8)
legend("topright", bty = "n", cex = 0.8,
legend = c(sprintf("Normal(\u03bc\u0302=%.2f, \u03c3\u0302=%.2f)", mu, sg),
"Mean"),
col = c("#002855", "orange"), lty = c(1, 2), lwd = 2)
# 3. Q-Q plot
par(mar = c(4, 4, 0.5, 1))
qqnorm(x, pch = 16, cex = 0.5,
col = adjustcolor("steelblue4", 0.6),
main = "", xlab = "Theoretical normal quantiles",
ylab = paste0("Sample quantiles (", lbl, ")"))
qqline(x, col = "orange", lwd = 2)
})
output$stats <- renderPrint({
x <- plot_data()
raw <- raw_data()
lbl <- x_label()
if (length(x) < 5) {
cat("Too few valid values after transform.\n"); return(invisible(NULL))
}
n_raw <- length(raw); n_v <- length(x)
if (n_v < n_raw)
cat(sprintf("Note: %d/%d values filtered by transform (domain restriction).\n\n",
n_raw - n_v, n_raw))
q <- quantile(x, c(0.25, 0.5, 0.75))
cat(sprintf("n = %d\n", n_v))
cat(sprintf("Mean = %8.3f | Median = %.3f\n", mean(x), q[2]))
cat(sprintf("SD = %8.3f | IQR = %.3f\n", sd(x), q[3] - q[1]))
cat(sprintf("Q1 = %8.3f | Q3 = %.3f\n", q[1], q[3]))
cat(sprintf("Min = %8.3f | Max = %.3f\n\n", min(x), max(x)))
sw <- tryCatch(shapiro.test(if (n_v > 5000) sample(x, 5000) else x),
error = function(e) NULL)
if (!is.null(sw)) {
cat(sprintf("Shapiro-Wilk: W = %.4f, p = %.4f\n", sw$statistic, sw$p.value))
cat(if (sw$p.value < 0.05)
" \u2192 Evidence against normality (p < 0.05)\n"
else
" \u2192 Consistent with normality (p \u2265 0.05)\n")
}
})
output$code_out <- renderPrint({
lbl <- x_label()
n <- input$n
gen <- switch(input$dist,
norm = sprintf("x <- rnorm(%d, mean = %.1f, sd = %.1f)",
n, input$norm_mean, input$norm_sd),
unif = sprintf("x <- runif(%d, min = %.1f, max = %.1f)",
n, input$unif_min, input$unif_max),
exp = sprintf("x <- rexp(%d, rate = %.1f)", n, input$exp_rate),
lnorm = sprintf("x <- rlnorm(%d, meanlog = %.1f, sdlog = %.1f)",
n, input$ln_meanlog, input$ln_sdlog),
t = sprintf("x <- rt(%d, df = %d)", n, input$t_df)
)
trans <- switch(input$transform,
none = "", log = "x <- log(x[x > 0])", sqrt = "x <- sqrt(x[x >= 0])",
sq = "x <- x^2", inv = "x <- 1 / x[x != 0]"
)
cat("# Minimal R code to reproduce this view:\n\n")
cat(gen, "\n")
if (nchar(trans) > 0) cat(trans, "\n")
cat("\npar(mfrow = c(3, 1))\n")
cat(sprintf("boxplot(x, horizontal = TRUE, xlab = \"%s\")\n", lbl))
cat(sprintf("hist(x, freq = FALSE, breaks = 30, xlab = \"%s\")\n", lbl))
cat(sprintf("qqnorm(x); qqline(x)\n\n"))
cat("summary(x)\nshapiro.test(x)\n")
})
}
shinyApp(ui, server)
t-test Explorer
Adjust the two group means, shared SD, and sample size. Both a Welch t-test and a Wilcoxon rank-sum test run on every sample. Toggle ranked view to see what the Wilcoxon test is actually comparing.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 660
library(shiny)
ui <- fluidPage(
sidebarLayout(
sidebarPanel(
sliderInput("mean1", "Group A mean", min = 0, max = 20, value = 5, step = 0.5),
sliderInput("mean2", "Group B mean", min = 0, max = 20, value = 7, step = 0.5),
sliderInput("sd", "Std dev (both groups)", min = 0.5, max = 8, value = 2, step = 0.25),
sliderInput("n", "Sample size per group", min = 5, max = 200, value = 20, step = 5),
hr(),
checkboxInput("show_ranks", "Show ranked values", value = FALSE),
hr(),
helpText("Increase n with fixed means — the p-value drops even though the biology hasn't changed."),
helpText("Toggle ranked view: the Wilcoxon test compares rank sums, not raw means.")
),
mainPanel(
plotOutput("tPlot", height = "280px"),
verbatimTextOutput("tResult")
)
)
)
server <- function(input, output) {
dat <- reactive({
list(
a = rnorm(input$n, input$mean1, input$sd),
b = rnorm(input$n, input$mean2, input$sd)
)
})
output$tPlot <- renderPlot({
d <- dat()
if (input$show_ranks) {
all_ranks <- rank(c(d$a, d$b))
plot_a <- all_ranks[seq_len(input$n)]
plot_b <- all_ranks[seq_len(input$n) + input$n]
ylab_text <- "Rank (pooled)"
main_text <- "Ranked values \u2014 what the Wilcoxon test compares"
} else {
plot_a <- d$a
plot_b <- d$b
ylab_text <- "Value"
main_text <- "Raw values \u2014 what the t-test compares"
}
ylo <- min(plot_a, plot_b) - 0.5
yhi <- max(plot_a, plot_b) + 0.5
par(mar = c(4, 4, 2, 1))
boxplot(list("Group A" = plot_a, "Group B" = plot_b),
col = c(adjustcolor("steelblue", 0.6),
adjustcolor("orange", 0.6)),
ylim = c(ylo, yhi),
ylab = ylab_text, main = main_text,
outline = FALSE, cex.axis = 1.2, cex.lab = 1.2)
stripchart(list("Group A" = plot_a, "Group B" = plot_b),
vertical = TRUE, method = "jitter",
jitter = 0.08, pch = 16, add = TRUE,
col = c(adjustcolor("steelblue", 0.5),
adjustcolor("orange", 0.5)))
points(1:2, c(mean(plot_a), mean(plot_b)),
pch = 18, cex = 2, col = "black")
})
output$tResult <- renderPrint({
d <- dat()
tres <- t.test(d$a, d$b)
wres <- wilcox.test(d$a, d$b, conf.int = TRUE, exact = FALSE)
eff <- abs(mean(d$a) - mean(d$b)) / input$sd
cat("--- Welch t-test (parametric) ---\n")
cat(sprintf("t = %.3f, df = %.1f, p = %.4f %s\n",
tres$statistic, tres$parameter, tres$p.value,
ifelse(tres$p.value < 0.05, "[*]", "[ ]")))
cat(sprintf("95%% CI of difference: [%.3f, %.3f]\n",
tres$conf.int[1], tres$conf.int[2]))
cat(sprintf("Cohen's d: %.3f (%s)\n\n", eff,
ifelse(eff < 0.2, "negligible",
ifelse(eff < 0.5, "small",
ifelse(eff < 0.8, "medium", "large")))))
cat("--- Wilcoxon rank-sum test (non-parametric) ---\n")
cat(sprintf("W = %.0f, p = %.4f %s\n",
wres$statistic, wres$p.value,
ifelse(wres$p.value < 0.05, "[*]", "[ ]")))
cat(sprintf("95%% CI of location shift: [%.3f, %.3f]\n",
wres$conf.int[1], wres$conf.int[2]))
cat(sprintf("\nAgree on significance: %s\n",
ifelse((tres$p.value < 0.05) == (wres$p.value < 0.05),
"YES", "NO \u2014 they disagree!")))
})
}
shinyApp(ui, server)
Regression Explorer
Control the true intercept and slope independently, add noise, shift the predictor mean, and watch the estimated line track the true one. Notice that changing slope does not change the estimated intercept — slope and intercept are estimated independently by OLS.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 680
library(shiny)
ui <- fluidPage(
sidebarLayout(
sidebarPanel(
sliderInput("n", "Sample size (n)", min = 5, max = 300, value = 30, step = 5),
sliderInput("intercept", "True intercept (\u03b2\u2080)", min = -10, max = 10, value = 0, step = 0.5),
sliderInput("slope", "True slope (\u03b2\u2081)", min = -2, max = 2, value = 0.5, step = 0.1),
sliderInput("noise", "Noise (SD)", min = 0.1, max = 5, value = 1, step = 0.1),
sliderInput("mean_x", "Mean of x (predictor)", min = -10, max = 10, value = 0, step = 0.5),
hr(),
checkboxInput("show_code", "Show R code", FALSE),
hr(),
helpText("Change slope \u2192 estimated intercept stays put (they are estimated independently)."),
helpText("Change intercept slider \u2192 estimated intercept follows. Move mean_x to see its effect on intercept significance.")
),
mainPanel(
plotOutput("regPlot", height = "300px"),
verbatimTextOutput("regStats"),
conditionalPanel("input.show_code == true",
verbatimTextOutput("reg_code")
)
)
)
)
server <- function(input, output) {
dat <- reactive({
set.seed(42)
x <- rnorm(input$n, mean = input$mean_x)
y <- input$intercept + input$slope * x + rnorm(input$n, sd = input$noise)
data.frame(x = x, y = y)
})
output$regPlot <- renderPlot({
d <- dat()
fit <- lm(y ~ x, data = d)
par(mar = c(4, 4, 2, 1))
plot(d$x, d$y,
col = adjustcolor("steelblue", 0.6),
pch = 16, cex = 1.2,
xlab = "x", ylab = "y",
main = paste0("n=", input$n,
" \u03b2\u2080=", input$intercept,
" \u03b2\u2081=", input$slope,
" noise=", input$noise,
" mean(x)=", input$mean_x))
abline(fit, col = "#002855", lwd = 2)
abline(a = input$intercept, b = input$slope,
col = "orange", lwd = 1.5, lty = 2)
abline(v = input$mean_x, col = "gray60", lty = 3, lwd = 1.2)
legend("topleft",
legend = c("Estimated", "True", "Mean of x"),
col = c("#002855", "orange", "gray60"),
lty = c(1, 2, 3), lwd = c(2, 1.5, 1.2), bty = "n")
})
output$regStats <- renderPrint({
d <- dat()
fit <- lm(y ~ x, data = d)
s <- summary(fit)
coefs <- coef(s)
cat("--- Intercept ---\n")
cat(sprintf(" Estimated: %+.3f (true: %+.1f)\n",
coefs[1,1], input$intercept))
cat(sprintf(" p-value: %.4f %s\n", coefs[1,4],
ifelse(coefs[1,4] < 0.05, "[*]", "[ ]")))
cat("\n--- Slope ---\n")
cat(sprintf(" Estimated: %+.3f (true: %+.1f)\n",
coefs[2,1], input$slope))
cat(sprintf(" p-value: %.4f %s\n", coefs[2,4],
ifelse(coefs[2,4] < 0.05, "[*]", "[ ]")))
cat(sprintf("\nR\u00b2: %.3f\n", s$r.squared))
})
output$reg_code <- renderPrint({
cat("# Minimal R code to reproduce this view:\n\n")
cat(sprintf("set.seed(42)\n"))
cat(sprintf("x <- rnorm(%d, mean = %.1f)\n", input$n, input$mean_x))
cat(sprintf("y <- %.1f + %.1f * x + rnorm(%d, sd = %.1f)\n",
input$intercept, input$slope, input$n, input$noise))
cat("fit <- lm(y ~ x)\n")
cat("summary(fit)\n\n")
cat("plot(x, y, pch = 16, col = \"steelblue\")\n")
cat("abline(fit, col = \"#002855\", lwd = 2) # estimated\n")
cat(sprintf("abline(a = %.1f, b = %.1f, col = \"orange\", lty = 2) # true\n",
input$intercept, input$slope))
})
}
shinyApp(ui, server)
Correlation Explorer
Set the true correlation strength, sample size, and number of outliers. Watch how Pearson gets pulled by outliers while Spearman stays stable.
#| '!! shinylive warning !!': |
#| shinylive does not work in self-contained HTML documents.
#| Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 520
library(shiny)
ui <- fluidPage(
sidebarLayout(
sidebarPanel(
sliderInput("r", "True correlation (r)", min = -0.99, max = 0.99, value = 0.5, step = 0.05),
sliderInput("n", "Sample size (n)", min = 10, max = 300, value = 50, step = 10),
sliderInput("out", "Outliers", min = 0, max = 5, value = 0, step = 1),
hr(),
helpText("Add outliers to see why Spearman is more robust than Pearson.")
),
mainPanel(
plotOutput("corrPlot", height = "300px"),
verbatimTextOutput("corrResult")
)
)
)
server <- function(input, output) {
dat <- reactive({
r_val <- max(-0.99, min(0.99, input$r))
sigma <- matrix(c(1, r_val, r_val, 1), 2, 2)
L <- chol(sigma)
raw <- matrix(rnorm(input$n * 2), ncol = 2) %*% L
df <- data.frame(x = raw[,1], y = raw[,2], outlier = FALSE)
if (input$out > 0) {
outs <- data.frame(x = runif(input$out, 2, 4),
y = runif(input$out, -4, -2),
outlier = TRUE)
df <- rbind(df, outs)
}
df
})
output$corrPlot <- renderPlot({
d <- dat()
norm <- d[!d$outlier, ]
par(mar = c(4, 4, 1, 1))
plot(d$x, d$y,
col = ifelse(d$outlier, "red", adjustcolor("gray40", 0.7)),
pch = 16, cex = 1.3,
xlab = "X", ylab = "Y",
cex.axis = 1.2, cex.lab = 1.2)
if (nrow(norm) > 1)
abline(lm(y ~ x, data = norm), col = "steelblue", lwd = 2)
abline(lm(y ~ x, data = d), col = "red", lty = 2, lwd = 1.8)
legend("topleft",
legend = c("Normal pts", "Outlier", "Fit (no outliers)", "Fit (all)"),
col = c("gray40", "red", "steelblue", "red"),
pch = c(16, 16, NA, NA),
lty = c(NA, NA, 1, 2),
lwd = c(NA, NA, 2, 1.8),
bty = "n", cex = 0.9)
})
output$corrResult <- renderPrint({
d <- dat()
pear <- cor.test(d$x, d$y, method = "pearson")
spear <- cor.test(d$x, d$y, method = "spearman", exact = FALSE)
cat(sprintf("Pearson r = %+.3f (p = %.4f)\n", pear$estimate, pear$p.value))
cat(sprintf("Spearman \u03c1 = %+.3f (p = %.4f)\n", spear$estimate, spear$p.value))
cat(sprintf("\nTrue r = %.2f | n = %d | outliers = %d\n",
input$r, input$n, input$out))
})
}
shinyApp(ui, server)