fit3 <- lm(review_scores_rating ~ room_type , data = airbnb)
fit3 %>% get_regression_table() %>% knitr::kable() %>% kable_styling()
term estimate std_error statistic p_value lower_ci upper_ci
intercept 93.202 0.137 680.602 0 92.934 93.471
room_typeHotel room -4.448 0.667 -6.666 0 -5.755 -3.140
room_typePrivate room 0.930 0.197 4.723 0 0.544 1.316
room_typeShared room -4.276 0.465 -9.203 0 -5.186 -3.365



kable(
    list(
        model.matrix(fit3) %>% head(),
        airbnb %>% select(room_type) %>% head()
    ),
    caption = "R's representation of categorical variables vs. what we see in the data",
    valign = 't'
) %>% kable_styling()
R’s representation of categorical variables vs. what we see in the data
(Intercept) room_typeHotel room room_typePrivate room room_typeShared room
1 0 1 0
1 0 0 0
1 0 1 0
1 0 1 0
1 0 0 0
1 0 0 0
room_type
Private room
Entire home/apt
Private room
Private room
Entire home/apt
Entire home/apt

Four cases

  1. \(\hat{f}(\text{entire home}) = \hat{\beta}_{0} + 0 + 0 + 0\)
  2. \(\hat{f}(\text{hotel}) = \hat{\beta}_{0} + \hat{\beta}_{\text{hotel}} \cdot 1 + 0 + 0\)
  3. \(\hat{f}(\text{private}) = \hat{\beta}_{0} + 0 + \hat{\beta}_{\text{private}} \cdot 1 + 0\)
  4. \(\hat{f}(\text{shared}) = \hat{\beta}_{0} + 0 + 0 + \hat{\beta}_{\text{shared}} \cdot 1\)