Module 1: Interactive Explorers

PLS 206 — Applied Multivariate Modeling

Use these interactive tools to build intuition for core statistical concepts. Adjust the controls and watch results update live — no R installation needed.


Distribution Explorer

Choose a distribution family, adjust its parameters, apply a transform, and see the histogram, boxplot, and Q–Q plot update together. The normal overlay shows whether the data (or its transform) is approximately normal.

#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 920

library(shiny)

ui <- fluidPage(
  sidebarLayout(
    sidebarPanel(
      selectInput("dist", "Distribution",
                  choices = c("Normal"          = "norm",
                              "Uniform"         = "unif",
                              "Exponential"     = "exp",
                              "Log-normal"      = "lnorm",
                              "t (heavy tails)" = "t")),

      conditionalPanel("input.dist == 'norm'",
        sliderInput("norm_mean", "Mean (μ)",    -10, 10, 0,   0.5),
        sliderInput("norm_sd",   "Std Dev (σ)", 0.1,  5, 1,   0.1)
      ),
      conditionalPanel("input.dist == 'unif'",
        sliderInput("unif_min", "Min", -10, 10, 0, 0.5),
        sliderInput("unif_max", "Max",   0, 20, 5, 0.5)
      ),
      conditionalPanel("input.dist == 'exp'",
        sliderInput("exp_rate", "Rate (λ = 1/mean)", 0.1, 5, 1, 0.1)
      ),
      conditionalPanel("input.dist == 'lnorm'",
        sliderInput("ln_meanlog", "Mean of log(x)", -2, 3,   1, 0.1),
        sliderInput("ln_sdlog",   "SD of log(x)",  0.1, 2, 0.5, 0.1)
      ),
      conditionalPanel("input.dist == 't'",
        sliderInput("t_df", "Degrees of freedom", 1, 30, 3, 1)
      ),

      sliderInput("n", "Sample size (n)", 20, 1000, 200, 20),
      hr(),

      selectInput("transform", "Transform x",
                  choices = c("x  (no transform)" = "none",
                              "log(x)"            = "log",
                              "sqrt(x)"           = "sqrt",
                              "x^2"               = "sq",
                              "1/x"               = "inv")),
      hr(),
      checkboxInput("show_code", "Show R code", FALSE),
      helpText("Try: Exponential + log(x) — watch the Q–Q plot straighten out.")
    ),

    mainPanel(
      plotOutput("distPlot", height = "530px"),
      verbatimTextOutput("stats"),
      conditionalPanel("input.show_code == true",
        verbatimTextOutput("code_out")
      )
    )
  )
)

server <- function(input, output) {

  raw_data <- reactive({
    n <- input$n
    switch(input$dist,
      norm  = rnorm(n, input$norm_mean, input$norm_sd),
      unif  = runif(n, min(input$unif_min, input$unif_max - 0.1),
                       max(input$unif_max, input$unif_min + 0.1)),
      exp   = rexp(n, input$exp_rate),
      lnorm = rlnorm(n, input$ln_meanlog, input$ln_sdlog),
      t     = rt(n, input$t_df)
    )
  })

  plot_data <- reactive({
    x <- raw_data()
    switch(input$transform,
      none = x,
      log  = { v <- x[x > 0];  log(v)    },
      sqrt = { v <- x[x >= 0]; sqrt(v)   },
      sq   = x^2,
      inv  = { v <- x[x != 0]; 1/v       }
    )
  })

  x_label <- reactive({
    switch(input$transform,
      none = "x", log = "log(x)", sqrt = "sqrt(x)", sq = "x\u00b2", inv = "1/x"
    )
  })

  output$distPlot <- renderPlot({
    x   <- plot_data()
    lbl <- x_label()

    if (length(x) < 5) {
      plot.new()
      text(0.5, 0.5,
           "Too few valid values after transform.\nTry a different distribution or transform.",
           cex = 1.2)
      return(invisible(NULL))
    }

    layout(matrix(1:3, ncol = 1), heights = c(1, 2.5, 2.5))

    # 1. Boxplot
    par(mar = c(1, 4, 2, 1))
    boxplot(x, horizontal = TRUE,
            col = adjustcolor("steelblue", 0.45), border = "#2c5f8a",
            xaxt = "n",
            main = if (input$transform != "none")
                     paste0(input$dist, "  \u2192  transform: ", lbl)
                   else input$dist)

    # 2. Histogram + normal overlay
    par(mar = c(3, 4, 0.5, 1))
    h    <- hist(x, plot = FALSE, breaks = 30)
    ymax <- max(h$density) * 1.35
    hist(x, freq = FALSE, breaks = 30,
         col = adjustcolor("red", 0.45), border = "white",
         ylim = c(0, ymax), xlab = lbl, ylab = "Density", main = "")
    mu <- mean(x); sg <- sd(x)
    xs <- seq(min(x) - 0.1 * diff(range(x)),
              max(x) + 0.1 * diff(range(x)), length.out = 300)
    lines(xs, dnorm(xs, mu, sg), col = "#002855", lwd = 2)
    abline(v = mu, col = "orange", lty = 2, lwd = 1.8)
    legend("topright", bty = "n", cex = 0.8,
           legend = c(sprintf("Normal(\u03bc\u0302=%.2f, \u03c3\u0302=%.2f)", mu, sg),
                      "Mean"),
           col = c("#002855", "orange"), lty = c(1, 2), lwd = 2)

    # 3. Q-Q plot
    par(mar = c(4, 4, 0.5, 1))
    qqnorm(x, pch = 16, cex = 0.5,
           col  = adjustcolor("steelblue4", 0.6),
           main = "", xlab = "Theoretical normal quantiles",
           ylab = paste0("Sample quantiles (", lbl, ")"))
    qqline(x, col = "orange", lwd = 2)
  })

  output$stats <- renderPrint({
    x   <- plot_data()
    raw <- raw_data()
    lbl <- x_label()

    if (length(x) < 5) {
      cat("Too few valid values after transform.\n"); return(invisible(NULL))
    }
    n_raw <- length(raw); n_v <- length(x)
    if (n_v < n_raw)
      cat(sprintf("Note: %d/%d values filtered by transform (domain restriction).\n\n",
                  n_raw - n_v, n_raw))

    q <- quantile(x, c(0.25, 0.5, 0.75))
    cat(sprintf("n = %d\n", n_v))
    cat(sprintf("Mean   = %8.3f  |  Median = %.3f\n", mean(x), q[2]))
    cat(sprintf("SD     = %8.3f  |  IQR    = %.3f\n", sd(x), q[3] - q[1]))
    cat(sprintf("Q1     = %8.3f  |  Q3     = %.3f\n", q[1], q[3]))
    cat(sprintf("Min    = %8.3f  |  Max    = %.3f\n\n", min(x), max(x)))

    sw <- tryCatch(shapiro.test(if (n_v > 5000) sample(x, 5000) else x),
                   error = function(e) NULL)
    if (!is.null(sw)) {
      cat(sprintf("Shapiro-Wilk: W = %.4f,  p = %.4f\n", sw$statistic, sw$p.value))
      cat(if (sw$p.value < 0.05)
            "  \u2192 Evidence against normality (p < 0.05)\n"
          else
            "  \u2192 Consistent with normality (p \u2265 0.05)\n")
    }
  })

  output$code_out <- renderPrint({
    lbl <- x_label()
    n   <- input$n

    gen <- switch(input$dist,
      norm  = sprintf("x <- rnorm(%d, mean = %.1f, sd = %.1f)",
                      n, input$norm_mean, input$norm_sd),
      unif  = sprintf("x <- runif(%d, min = %.1f, max = %.1f)",
                      n, input$unif_min, input$unif_max),
      exp   = sprintf("x <- rexp(%d, rate = %.1f)", n, input$exp_rate),
      lnorm = sprintf("x <- rlnorm(%d, meanlog = %.1f, sdlog = %.1f)",
                      n, input$ln_meanlog, input$ln_sdlog),
      t     = sprintf("x <- rt(%d, df = %d)", n, input$t_df)
    )
    trans <- switch(input$transform,
      none = "", log  = "x <- log(x[x > 0])", sqrt = "x <- sqrt(x[x >= 0])",
      sq   = "x <- x^2", inv  = "x <- 1 / x[x != 0]"
    )

    cat("# Minimal R code to reproduce this view:\n\n")
    cat(gen, "\n")
    if (nchar(trans) > 0) cat(trans, "\n")
    cat("\npar(mfrow = c(3, 1))\n")
    cat(sprintf("boxplot(x, horizontal = TRUE, xlab = \"%s\")\n", lbl))
    cat(sprintf("hist(x, freq = FALSE, breaks = 30, xlab = \"%s\")\n", lbl))
    cat(sprintf("qqnorm(x); qqline(x)\n\n"))
    cat("summary(x)\nshapiro.test(x)\n")
  })
}

shinyApp(ui, server)

t-test Explorer

Adjust the two group means, shared SD, and sample size. Both a Welch t-test and a Wilcoxon rank-sum test run on every sample. Toggle ranked view to see what the Wilcoxon test is actually comparing.

#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 660

library(shiny)

ui <- fluidPage(
  sidebarLayout(
    sidebarPanel(
      sliderInput("mean1", "Group A mean", min = 0, max = 20, value = 5,  step = 0.5),
      sliderInput("mean2", "Group B mean", min = 0, max = 20, value = 7,  step = 0.5),
      sliderInput("sd",    "Std dev (both groups)", min = 0.5, max = 8, value = 2, step = 0.25),
      sliderInput("n",     "Sample size per group", min = 5, max = 200, value = 20, step = 5),
      hr(),
      checkboxInput("show_ranks", "Show ranked values", value = FALSE),
      hr(),
      helpText("Increase n with fixed means — the p-value drops even though the biology hasn't changed."),
      helpText("Toggle ranked view: the Wilcoxon test compares rank sums, not raw means.")
    ),
    mainPanel(
      plotOutput("tPlot", height = "280px"),
      verbatimTextOutput("tResult")
    )
  )
)

server <- function(input, output) {
  dat <- reactive({
    list(
      a = rnorm(input$n, input$mean1, input$sd),
      b = rnorm(input$n, input$mean2, input$sd)
    )
  })

  output$tPlot <- renderPlot({
    d <- dat()

    if (input$show_ranks) {
      all_ranks <- rank(c(d$a, d$b))
      plot_a    <- all_ranks[seq_len(input$n)]
      plot_b    <- all_ranks[seq_len(input$n) + input$n]
      ylab_text <- "Rank (pooled)"
      main_text <- "Ranked values \u2014 what the Wilcoxon test compares"
    } else {
      plot_a    <- d$a
      plot_b    <- d$b
      ylab_text <- "Value"
      main_text <- "Raw values \u2014 what the t-test compares"
    }

    ylo <- min(plot_a, plot_b) - 0.5
    yhi <- max(plot_a, plot_b) + 0.5
    par(mar = c(4, 4, 2, 1))
    boxplot(list("Group A" = plot_a, "Group B" = plot_b),
            col     = c(adjustcolor("steelblue", 0.6),
                        adjustcolor("orange",    0.6)),
            ylim    = c(ylo, yhi),
            ylab    = ylab_text, main = main_text,
            outline = FALSE, cex.axis = 1.2, cex.lab = 1.2)
    stripchart(list("Group A" = plot_a, "Group B" = plot_b),
               vertical = TRUE, method = "jitter",
               jitter = 0.08, pch = 16, add = TRUE,
               col = c(adjustcolor("steelblue", 0.5),
                       adjustcolor("orange",    0.5)))
    points(1:2, c(mean(plot_a), mean(plot_b)),
           pch = 18, cex = 2, col = "black")
  })

  output$tResult <- renderPrint({
    d    <- dat()
    tres <- t.test(d$a, d$b)
    wres <- wilcox.test(d$a, d$b, conf.int = TRUE, exact = FALSE)
    eff  <- abs(mean(d$a) - mean(d$b)) / input$sd

    cat("--- Welch t-test (parametric) ---\n")
    cat(sprintf("t = %.3f,  df = %.1f,  p = %.4f  %s\n",
                tres$statistic, tres$parameter, tres$p.value,
                ifelse(tres$p.value < 0.05, "[*]", "[ ]")))
    cat(sprintf("95%% CI of difference: [%.3f, %.3f]\n",
                tres$conf.int[1], tres$conf.int[2]))
    cat(sprintf("Cohen's d: %.3f  (%s)\n\n", eff,
                ifelse(eff < 0.2, "negligible",
                       ifelse(eff < 0.5, "small",
                              ifelse(eff < 0.8, "medium", "large")))))

    cat("--- Wilcoxon rank-sum test (non-parametric) ---\n")
    cat(sprintf("W = %.0f,  p = %.4f  %s\n",
                wres$statistic, wres$p.value,
                ifelse(wres$p.value < 0.05, "[*]", "[ ]")))
    cat(sprintf("95%% CI of location shift: [%.3f, %.3f]\n",
                wres$conf.int[1], wres$conf.int[2]))

    cat(sprintf("\nAgree on significance: %s\n",
                ifelse((tres$p.value < 0.05) == (wres$p.value < 0.05),
                       "YES", "NO \u2014 they disagree!")))
  })
}

shinyApp(ui, server)

Regression Explorer

Control the true intercept and slope independently, add noise, shift the predictor mean, and watch the estimated line track the true one. Notice that changing slope does not change the estimated intercept — slope and intercept are estimated independently by OLS.

#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 680

library(shiny)

ui <- fluidPage(
  sidebarLayout(
    sidebarPanel(
      sliderInput("n",         "Sample size (n)",       min = 5,   max = 300, value = 30,  step = 5),
      sliderInput("intercept", "True intercept (\u03b2\u2080)", min = -10, max = 10,  value = 0,   step = 0.5),
      sliderInput("slope",     "True slope (\u03b2\u2081)",     min = -2,  max = 2,   value = 0.5, step = 0.1),
      sliderInput("noise",     "Noise (SD)",             min = 0.1, max = 5,   value = 1,   step = 0.1),
      sliderInput("mean_x",    "Mean of x (predictor)", min = -10, max = 10,  value = 0,   step = 0.5),
      hr(),
      checkboxInput("show_code", "Show R code", FALSE),
      hr(),
      helpText("Change slope \u2192 estimated intercept stays put (they are estimated independently)."),
      helpText("Change intercept slider \u2192 estimated intercept follows. Move mean_x to see its effect on intercept significance.")
    ),
    mainPanel(
      plotOutput("regPlot", height = "300px"),
      verbatimTextOutput("regStats"),
      conditionalPanel("input.show_code == true",
        verbatimTextOutput("reg_code")
      )
    )
  )
)

server <- function(input, output) {
  dat <- reactive({
    set.seed(42)
    x <- rnorm(input$n, mean = input$mean_x)
    y <- input$intercept + input$slope * x + rnorm(input$n, sd = input$noise)
    data.frame(x = x, y = y)
  })

  output$regPlot <- renderPlot({
    d   <- dat()
    fit <- lm(y ~ x, data = d)

    par(mar = c(4, 4, 2, 1))
    plot(d$x, d$y,
         col  = adjustcolor("steelblue", 0.6),
         pch  = 16, cex = 1.2,
         xlab = "x", ylab = "y",
         main = paste0("n=", input$n,
                       "  \u03b2\u2080=", input$intercept,
                       "  \u03b2\u2081=", input$slope,
                       "  noise=", input$noise,
                       "  mean(x)=", input$mean_x))
    abline(fit, col = "#002855", lwd = 2)
    abline(a = input$intercept, b = input$slope,
           col = "orange", lwd = 1.5, lty = 2)
    abline(v = input$mean_x, col = "gray60", lty = 3, lwd = 1.2)
    legend("topleft",
           legend = c("Estimated", "True", "Mean of x"),
           col    = c("#002855", "orange", "gray60"),
           lty    = c(1, 2, 3), lwd = c(2, 1.5, 1.2), bty = "n")
  })

  output$regStats <- renderPrint({
    d     <- dat()
    fit   <- lm(y ~ x, data = d)
    s     <- summary(fit)
    coefs <- coef(s)

    cat("--- Intercept ---\n")
    cat(sprintf("  Estimated: %+.3f  (true: %+.1f)\n",
                coefs[1,1], input$intercept))
    cat(sprintf("  p-value:   %.4f  %s\n", coefs[1,4],
                ifelse(coefs[1,4] < 0.05, "[*]", "[ ]")))

    cat("\n--- Slope ---\n")
    cat(sprintf("  Estimated: %+.3f  (true: %+.1f)\n",
                coefs[2,1], input$slope))
    cat(sprintf("  p-value:   %.4f  %s\n", coefs[2,4],
                ifelse(coefs[2,4] < 0.05, "[*]", "[ ]")))

    cat(sprintf("\nR\u00b2: %.3f\n", s$r.squared))
  })

  output$reg_code <- renderPrint({
    cat("# Minimal R code to reproduce this view:\n\n")
    cat(sprintf("set.seed(42)\n"))
    cat(sprintf("x   <- rnorm(%d, mean = %.1f)\n", input$n, input$mean_x))
    cat(sprintf("y   <- %.1f + %.1f * x + rnorm(%d, sd = %.1f)\n",
                input$intercept, input$slope, input$n, input$noise))
    cat("fit <- lm(y ~ x)\n")
    cat("summary(fit)\n\n")
    cat("plot(x, y, pch = 16, col = \"steelblue\")\n")
    cat("abline(fit, col = \"#002855\", lwd = 2)         # estimated\n")
    cat(sprintf("abline(a = %.1f, b = %.1f, col = \"orange\", lty = 2)  # true\n",
                input$intercept, input$slope))
  })
}

shinyApp(ui, server)

Correlation Explorer

Set the true correlation strength, sample size, and number of outliers. Watch how Pearson gets pulled by outliers while Spearman stays stable.

#| '!! shinylive warning !!': |
#|   shinylive does not work in self-contained HTML documents.
#|   Please set `embed-resources: false` in your metadata.
#| standalone: true
#| viewerHeight: 520

library(shiny)

ui <- fluidPage(
  sidebarLayout(
    sidebarPanel(
      sliderInput("r",   "True correlation (r)", min = -0.99, max = 0.99, value = 0.5, step = 0.05),
      sliderInput("n",   "Sample size (n)", min = 10, max = 300, value = 50, step = 10),
      sliderInput("out", "Outliers", min = 0, max = 5, value = 0, step = 1),
      hr(),
      helpText("Add outliers to see why Spearman is more robust than Pearson.")
    ),
    mainPanel(
      plotOutput("corrPlot", height = "300px"),
      verbatimTextOutput("corrResult")
    )
  )
)

server <- function(input, output) {
  dat <- reactive({
    r_val <- max(-0.99, min(0.99, input$r))
    sigma <- matrix(c(1, r_val, r_val, 1), 2, 2)
    L     <- chol(sigma)
    raw   <- matrix(rnorm(input$n * 2), ncol = 2) %*% L
    df    <- data.frame(x = raw[,1], y = raw[,2], outlier = FALSE)
    if (input$out > 0) {
      outs <- data.frame(x = runif(input$out, 2, 4),
                         y = runif(input$out, -4, -2),
                         outlier = TRUE)
      df <- rbind(df, outs)
    }
    df
  })

  output$corrPlot <- renderPlot({
    d    <- dat()
    norm <- d[!d$outlier, ]
    par(mar = c(4, 4, 1, 1))
    plot(d$x, d$y,
         col  = ifelse(d$outlier, "red", adjustcolor("gray40", 0.7)),
         pch  = 16, cex = 1.3,
         xlab = "X", ylab = "Y",
         cex.axis = 1.2, cex.lab = 1.2)
    if (nrow(norm) > 1)
      abline(lm(y ~ x, data = norm), col = "steelblue", lwd = 2)
    abline(lm(y ~ x, data = d), col = "red", lty = 2, lwd = 1.8)
    legend("topleft",
           legend = c("Normal pts", "Outlier", "Fit (no outliers)", "Fit (all)"),
           col    = c("gray40", "red", "steelblue", "red"),
           pch    = c(16, 16, NA, NA),
           lty    = c(NA, NA, 1, 2),
           lwd    = c(NA, NA, 2, 1.8),
           bty    = "n", cex = 0.9)
  })

  output$corrResult <- renderPrint({
    d     <- dat()
    pear  <- cor.test(d$x, d$y, method = "pearson")
    spear <- cor.test(d$x, d$y, method = "spearman", exact = FALSE)
    cat(sprintf("Pearson r  = %+.3f  (p = %.4f)\n", pear$estimate,  pear$p.value))
    cat(sprintf("Spearman \u03c1 = %+.3f  (p = %.4f)\n", spear$estimate, spear$p.value))
    cat(sprintf("\nTrue r = %.2f  |  n = %d  |  outliers = %d\n",
                input$r, input$n, input$out))
  })
}

shinyApp(ui, server)