Week 06, clean up

d085b70b · Oliver Müller · f0d2d42f · d085b70b · d085b70b
Commit d085b70b authored 3 years ago by Oliver Müller
--- a/Week 06/house_prices_subset_selection.Rmd
+++ b/Week 06/house_prices_subset_selection.Rmd
@@ -106,7 +106,7 @@ coef(fit_stepw, 1:7)
 Create a loop to refit models and evaluate them on the test set.

 ```{r}
-for (nvars in seq(2,7)) {
+for (nvars in seq(1,7)) {
  vars <- attr(coef(fit_stepw, nvars), "names")[2:(nvars+1)]
  vars <- append(vars, "SalePrice")
  fit <- lm(SalePrice ~ ., data = select(train, vars))

--- a/Week 06/house_prices_tidymodels_lasso.Rmd
+++ b/Week 06/house_prices_tidymodels_lasso.Rmd
@@ -73,12 +73,17 @@ Next, we specify a `recipe` for data preprocessing. The function `step_other` is
 ```{r}
 house_prices_recipe <- 
  recipe(SalePrice ~ ., data = train) %>% 
-  step_normalize(all_numeric(), -all_outcomes()) %>% 
-  step_dummy(all_nominal(), -all_outcomes()) %>% 
+  step_normalize(all_numeric_predictors()) %>% 
+  step_dummy(all_nominal_predictors()) %>% 
  step_nzv(all_predictors()) %>% 
-  step_other(all_nominal(), threshold = 0.1) 
+  step_other(all_nominal_predictors(), threshold = 0.1)

-house_prices_recipe
+```
+
+`Bake` the `recipe` and show the results. 
+
+```{r}
+train_baked <- bake(prep(house_prices_recipe), new_data = train)

 ```

@@ -103,7 +108,7 @@ house_prices_fit

 ```

-And inspect the results...
+And inspect the model coefficients for the given lambda value.

 ```{r}
 house_prices_fit %>%
@@ -112,17 +117,16 @@ house_prices_fit %>%

 ```

-... and the predictive accuracy on test data.
+Let's evaluate the predictive accuracy on test data.

 ```{r}
 preds <- predict(house_prices_fit, new_data = test)
-names(preds) <- "pred"

 test_w_preds <- test %>% 
  cbind(preds)

 test_w_preds %>% 
-  yardstick::rmse(truth = SalePrice, estimate = pred)
+  yardstick::rmse(truth = SalePrice, estimate = .pred)

 ```

@@ -142,25 +146,28 @@ house_prices_wflow_tunable

 ```

-Here we define a search strategy, e.g., a `regular_grid` search over 100 candidate values between 10\^-5 and 10\^5.
+Here we define a search strategy, e.g., a `regular_grid` search over 1000 candidate values between 10\^-10 and 10\^10.

 ```{r}
-lambda_grid <- grid_regular(penalty(range = c(-5,5), trans = log10_trans()), levels = 100)
+lambda_grid <- grid_regular(
+  penalty(range = c(-10, 10), trans = log10_trans()), 
+  levels = 1000
+  )

 ```

-To evaluate how well a candidate value for lambda performs, we use 10-fold cross validation (`vfold_cv`).
+To evaluate how well a candidate value for lambda performs, we use 5-fold cross validation (`vfold_cv`).

 ```{r}
-folds <- vfold_cv(train, v = 10)
+folds <- vfold_cv(train, v = 5)

 ```

 Finally, we can start the tuning process by passing the workflow to the `tune_grid` function.

 ```{r}
-house_prices_wflow_tuned <- house_prices_wflow_tunable %>% 
-  tune_grid(
+house_prices_wflow_tuned <- tune_grid(
+    house_prices_wflow_tunable,
    resamples = folds,
    grid = lambda_grid
    )
@@ -170,11 +177,9 @@ house_prices_wflow_tuned <- house_prices_wflow_tunable %>%
 Collect the results.

 ```{r}
-m <- house_prices_wflow_tuned %>% 
+house_prices_wflow_tuned %>% 
  collect_metrics()

-m
-
 ```

 Plot the results.
@@ -200,7 +205,7 @@ And extract the single best value (i.e., with the lowest RMSE).

 ```{r}
 penalty_w_lowest_rmse <- house_prices_wflow_tuned %>%
-  select_best("rmse", maximize = FALSE)
+  select_best("rmse")

 penalty_w_lowest_rmse

@@ -219,7 +224,25 @@ lasso_mod_final <- finalize_workflow(
 ... and refit the model on the whole training data and evaluate on the test data.

 ```{r}
-last_fit(lasso_mod_final, data_split) %>%
-  collect_metrics()
+lasso_mod_final_fitted <- lasso_mod_final %>% 
+  fit(data = train)
+
+preds <- predict(lasso_mod_final_fitted, new_data = test)
+
+test_w_preds <- test %>% 
+  cbind(preds)
+
+test_w_preds %>% 
+  yardstick::rmse(truth = SalePrice, estimate = .pred)
+

 ```
+
+Inspect the model coefficients for the tuned lambda value.
+
+```{r}
+lasso_mod_final_fitted %>%
+  pull_workflow_fit() %>%
+  tidy()
+
+```
\ No newline at end of file