Go alongs for categorical predictors

Part I

Where do the means show up on the jittered graph?

Can we use the means to fill in the estimates for the intercept and slope for the following model?

\[ \hat{y_i} = b_0 + b_1x_i + e_i \]

\[ \hat{y_i} = 1.062 + (0.144 \times black_i) \]

Part II

lm_dummy <- lm(lnyears ~ black, data = df)
lm_dummy |> tidy(conf.int = TRUE, conf.level = 0.95) |> select(term, estimate, std.error, conf.low, conf.high)
lm_dummy |> glance() |> select(r.squared, sigma)

\[ \hat{y_i} = 1.062 + (0.144 \times black_i) \]

lm_factor <- lm(lnyears ~ black.f, data = df)
lm_factor |> tidy(conf.int = TRUE, conf.level = 0.95) |> select(term, estimate, std.error, conf.low, conf.high)
lm_factor |> glance() |> select(r.squared, sigma)
df <- 
  df |> 
  mutate(white.f = relevel(black.f, ref = "Black"))

lm_factor_rotate <- lm(lnyears ~ white.f, data = df)
lm_factor_rotate |> tidy(conf.int = TRUE, conf.level = 0.95) |> select(term, estimate, std.error, conf.low, conf.high)
lm_factor_rotate |> glance() |> select(r.squared, sigma)

Part III

Unadjusted means

lm_factor <- lm(lnyears ~ black.f, data = df)

unadjusted_means <- 
  lm_factor |> 
  predictions(
    df = insight::get_df(lm_factor),
    conf.level = 0.95,
    newdata = datagrid(
    black.f = unique
    )) |> 
  select(black.f, estimate, std.error, conf.low, conf.high)  |> 
  as_tibble()

unadjusted_means

Adjusted means

df <-
  df |> 
  mutate(primlev.c = primlev - mean(primlev))

lm_factor_primlev <- lm(lnyears ~ black.f + primlev.c, data = df)
lm_factor_primlev |> tidy(conf.int = TRUE, conf.level = 0.95)
lm_factor_primlev |> glance() |> select(r.squared, sigma)

\[ \hat{y_i} = 1.176 + (-0.103 \times black_i) + (0.350 \times primlev.c_i) \]

adjusted_means <- 
  lm_factor_primlev |> 
  predictions(
    df = insight::get_df(lm_factor_primlev),
    conf.level = 0.95,
    newdata = datagrid(
    black.f = unique,
    grid_type = "mean_or_mode")) |> 
  select(black.f, estimate, std.error, conf.low, conf.high)  |> 
  as_tibble()

adjusted_means

\[ \hat{y_i} = 1.176 + (-0.103 \times black_i) + (0.350 \times primlev.c_i) \] \[ \hat{y_i} = 1.176 + (-0.103 \times 1) + (0.350 \times 0) \]

\[ \hat{y_i} = 1.073 \]

Parallel Slopes Plot

Enhanced Plot

Part IV

lm_inter <- lm(lnyears ~ black.f*primlev.c, data = df)
lm_inter |> 
  tidy(conf.int = TRUE, conf.level = 0.95) |> 
  select(term, estimate, std.error, conf.low, conf.high)
lm_inter |> 
  glance() |> 
  select(r.squared, sigma)

\[ \hat{y_i} = 1.176 + (-0.103 \times black_i) + (0.351 \times primlev.c_i) + (-.003 \times black_i \times primlev.c_i) \]

Compute the Effects of Severity for Each Group

\[ \hat{y_i} = 1.176 + (-0.103 \times black_i) + (0.351 \times primlev.c_i) + (-.003 \times black_i \times primlev.c_i) \]

  • Equation for Black individuals:

\[ \hat{y_i} = 1.176 + (-0.103 \times 1) + (0.351 \times primlev.c_i) + (-.003 \times 1 \times primlev.c_i) \]

\[ \hat{y_i} = 1.073 + (0.348 \times primlev.c_i) \]

  • Equation for White individuals:

\[ \hat{y_i} = 1.176 + (-0.103 \times 0) + (0.351 \times primlev.c_i) + (-.003 \times 0 \times primlev.c_i) \]

\[ \hat{y_i} = 1.176 + (0.351 \times primlev.c_i) \]

Compare Slopes

lm_inter |> 
  slopes(variables = "primlev.c", by = "black.f") |> 
  as_tibble() |> 
  select(term, black.f, estimate, std.error, conf.low, conf.high)

Interaction Plot

Enhanced Plot