1

If I have a vector, new_cols and I want to add n number of columns to a data.frame with constant values. But I can't quite grok a tidyverse way of doing this. Here is what I've tried so far:

library(dplyr)
library(purrr)

# Create a tibble
df <- tibble(
  col1 = c(1, 2),
  col2 = c(3, 4)
)

# Vector of column names
new_cols <- c("miss1", "miss2")

# Using mutate() and !! operator and purrr
map_df(new_cols, ~ {
  df %>% mutate(!!.x := "unknown")
})
#> # A tibble: 4 × 4
#>    col1  col2 miss1   miss2  
#>   <dbl> <dbl> <chr>   <chr>  
#> 1     1     3 unknown <NA>   
#> 2     2     4 unknown <NA>   
#> 3     1     3 <NA>    unknown
#> 4     2     4 <NA>    unknown


# Using mutate() and across() function
df <- df %>%
  mutate(across(all_of(new_cols), ~"unknown", .names = "{.col}"))
#> Error in `mutate()`:
#> ℹ In argument: `across(all_of(new_cols), ~"unknown", .names =
#>   "{.col}")`.
#> Caused by error in `all_of()`:
#> ! Can't subset columns that don't exist.
#> ✖ Columns `miss1` and `miss2` don't exist.
#> Backtrace:
#>      ▆
#>   1. ├─df %>% mutate(across(all_of(new_cols), ~"unknown", .names = "{.col}"))
#>   2. ├─dplyr::mutate(., across(all_of(new_cols), ~"unknown", .names = "{.col}"))
#>   3. ├─dplyr:::mutate.data.frame(...)
#>   4. │ └─dplyr:::mutate_cols(.data, dplyr_quosures(...), by)
#>   5. │   ├─base::withCallingHandlers(...)
#>   6. │   └─dplyr:::mutate_col(dots[[i]], data, mask, new_columns)
#>   7. │     └─dplyr:::expand_across(dot)
#>   8. │       └─dplyr:::across_setup(...)
#>   9. │         └─tidyselect::eval_select(cols, data = data, error_call = error_call)
#>  10. │           └─tidyselect:::eval_select_impl(...)
#>  11. │             ├─tidyselect:::with_subscript_errors(...)
#>  12. │             │ └─rlang::try_fetch(...)
#>  13. │             │   └─base::withCallingHandlers(...)
#>  14. │             └─tidyselect:::vars_select_eval(...)
#>  15. │               └─tidyselect:::walk_data_tree(expr, data_mask, context_mask)
#>  16. │                 └─tidyselect:::eval_context(expr, context_mask, call = error_call)
#>  17. │                   ├─tidyselect:::with_chained_errors(...)
#>  18. │                   │ └─rlang::try_fetch(...)
#>  19. │                   │   ├─base::tryCatch(...)
#>  20. │                   │   │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  21. │                   │   │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  22. │                   │   │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>  23. │                   │   └─base::withCallingHandlers(...)
#>  24. │                   └─rlang::eval_tidy(as_quosure(expr, env), context_mask)
#>  25. ├─tidyselect::all_of(new_cols)
#>  26. │ └─tidyselect:::as_indices_impl(x, vars = vars, strict = TRUE)
#>  27. │   └─tidyselect:::chr_as_locations(x, vars, call = call, arg = arg)
#>  28. │     └─vctrs::vec_as_location(...)
#>  29. └─vctrs (local) `<fn>`()
#>  30.   └─vctrs:::stop_subscript_oob(...)
#>  31.     └─vctrs:::stop_subscript(...)
#>  32.       └─rlang::abort(...)

This is actually what I am after but I want it generalizable because it could take any number of columns.

# Using mutate() and !! operator
df %>%
  mutate(
    !!new_cols[1] := "unknown", # Add first column with NA values
    !!new_cols[2] := "unknown"
  ) # Add second column with NA values
#> # A tibble: 2 × 4
#>    col1  col2 miss1   miss2  
#>   <dbl> <dbl> <chr>   <chr>  
#> 1     1     3 unknown unknown
#> 2     2     4 unknown unknown

Created on 2023-07-07 with reprex v2.0.2

boshek
  • 4,100
  • 1
  • 31
  • 55

0 Answers0