#> List of 2
#> $ 1forge.com :List of 3
#> ..$ added : chr "2017-05-30T08:34:14.000Z"
#> ..$ preferred: chr "0.0.1"
#> ..$ versions :List of 1
#> $ 1password.com:events:List of 3
#> ..$ added : chr "2021-07-19T10:17:09.188Z"
#> ..$ preferred: chr "1.0.0"
#> ..$ versions :List of 1
(not written yet, but general idea will be…)
value
a list of lists of observations?
tidyr::unnest_longer(value)
separates into rowsvalue
a list of lists of variables?
tidyr::unnest_wider(value)
separates into columnstidyr::unnest_auto()
uses heuristic to choose between thoseunnest_auto()
choose?flowchart TD B{{"How many elements have names?"}} B -->|"none"| C["Unnest longer, drop indices"] B -->|"some"| C B -->|"all"| D{{"Are any names reused?"}} D -->|"yes"| E["Unnest wider"] D -->|"no"| F["Unnest longer, keep indices"]
unnest_auto()
miss? (1)Column is a named list (not a list of named lists).
rlang::is_named(df$col)
dplyr::mutate(col_name = names(col))
before unnesting.unnest_auto()
miss? (2)One or more elements of a list column are empty.
unnest_longer()
, even if it’s just some missing values among a column that is obviously otherwise unnest_wider
-worthy.any(lengths(df$col) == 0)
unnest_auto()
checks only on values of col
that have lengths(col) > 0
unnest_auto()
miss? (3)One or more elements of a list column lacks names, other elements have names.
unnest_longer(col, indices_to = "descriptive_name")
as fall-throughflowchart TD A{{"Is the overall column named?"}} A -->|"yes"| AA["Create a names column"] AA --> B A -->|"no"| B B{{"How many non-empty elements have names?"}} B -->|"none"| C["Unnest longer, drop indices"] B -->|"some"| F B -->|"all"| D{{"Are any names reused?"}} D -->|"yes"| E["Unnest wider"] D -->|"no"| F["Unnest longer, keep indices"]
enframe()
#> # A tibble: 2,529 × 2
#> api_name value
#> <chr> <list>
#> 1 1forge.com <named list [3]>
#> 2 1password.com:events <named list [3]>
#> 3 1password.local:connect <named list [3]>
#> 4 6-dot-authentiqio.appspot.com <named list [3]>
#> 5 ably.io:platform <named list [3]>
#> 6 ably.net:control <named list [3]>
#> 7 abstractapi.com:geolocation <named list [3]>
#> 8 adafruit.com <named list [3]>
#> 9 adobe.com:aem <named list [3]>
#> 10 adyen.com:AccountService <named list [3]>
#> # ℹ 2,519 more rows
value
column: Unnest wider or longer?(incorporate flowchart once it exists)
#> # A tibble: 2,529 × 4
#> api_name added preferred versions
#> <chr> <chr> <chr> <list>
#> 1 1forge.com 2017-05-30T08:34:14.000Z 0.0.1 <named list>
#> 2 1password.com:events 2021-07-19T10:17:09.188Z 1.0.0 <named list>
#> 3 1password.local:connect 2021-04-16T15:56:45.939Z 1.5.7 <named list>
#> 4 6-dot-authentiqio.appspot.com 2017-03-15T14:45:58.000Z 6 <named list>
#> 5 ably.io:platform 2019-07-13T11:28:07.000Z 1.1.0 <named list>
#> 6 ably.net:control 2021-07-26T09:45:31.536Z 1.0.14 <named list>
#> 7 abstractapi.com:geolocation 2021-04-14T17:12:40.648Z 1.0.0 <named list>
#> 8 adafruit.com 2018-02-10T10:41:43.000Z 2.0.0 <named list>
#> 9 adobe.com:aem 2019-01-03T07:01:34.000Z 3.7.1-pr… <named list>
#> 10 adyen.com:AccountService 2023-02-17T11:02:20.620Z 6 <named list>
#> # ℹ 2,519 more rows
#> # A tibble: 7,587 × 3
#> api_name value value_id
#> <chr> <named list> <chr>
#> 1 1forge.com <chr [1]> added
#> 2 1forge.com <chr [1]> preferred
#> 3 1forge.com <named list [1]> versions
#> 4 1password.com:events <chr [1]> added
#> 5 1password.com:events <chr [1]> preferred
#> 6 1password.com:events <named list [1]> versions
#> 7 1password.local:connect <chr [1]> added
#> 8 1password.local:connect <chr [1]> preferred
#> 9 1password.local:connect <named list [1]> versions
#> 10 6-dot-authentiqio.appspot.com <chr [1]> added
#> # ℹ 7,577 more rows
choose_unnest_direction()
# Based on `tidyr::unnest_auto` and `tidyr:::guess_dir`
choose_unnest_direction <- function(df, col) {
col <- tidyselect::vars_pull(dplyr::tbl_vars(df), {{col}})
col_contents <- df[[col]]
code <- character()
if (rlang::is_named(col_contents)) {
code <- c(
code,
glue::glue(
"dplyr::mutate(descriptive_name = names({col}), .before = {col})"
)
)
}
non_empties <- col_contents[lengths(col_contents) > 0]
element_names <- purrr::map(non_empties, names)
has_null <- unique(purrr::map_lgl(element_names, is.null))
if (identical(has_null, TRUE)) {
code <- c(
code,
glue::glue("tidyr::unnest_longer({col}, indices_include = FALSE)")
)
return(glue::glue_collapse(code, sep = " |>\n "))
} else if (identical(has_null, FALSE)) {
common_names <- purrr::reduce(element_names, intersect)
n_common <- length(common_names)
if (n_common > 0) {
code <- c(
code,
glue::glue("tidyr::unnest_wider({col})")
)
return(glue::glue_collapse(code, sep = " |>\n "))
}
}
code <- c(
code,
glue::glue("tidyr::unnest_longer({col}, indices_to = \"descriptive_name\")")
)
return(glue::glue_collapse(code, sep = " |>\n "))
}
#> # A tibble: 3,992 × 5
#> api_name added preferred versions version
#> <chr> <chr> <chr> <named list> <chr>
#> 1 1forge.com 2017-05-30T08:3… 0.0.1 <named list> 0.0.1
#> 2 1password.com:events 2021-07-19T10:1… 1.0.0 <named list> 1.0.0
#> 3 1password.local:connect 2021-04-16T15:5… 1.5.7 <named list> 1.5.7
#> 4 6-dot-authentiqio.appspot.com 2017-03-15T14:4… 6 <named list> 6
#> 5 ably.io:platform 2019-07-13T11:2… 1.1.0 <named list> 1.1.0
#> 6 ably.net:control 2021-07-26T09:4… 1.0.14 <named list> 1.0.14
#> 7 abstractapi.com:geolocation 2021-04-14T17:1… 1.0.0 <named list> 1.0.0
#> 8 adafruit.com 2018-02-10T10:4… 2.0.0 <named list> 2.0.0
#> 9 adobe.com:aem 2019-01-03T07:0… 3.7.1-pr… <named list> 3.7.1-…
#> 10 adyen.com:AccountService 2023-02-17T11:0… 6 <named list> 3
#> # ℹ 3,982 more rows
#> # A tibble: 6 × 9
#> api_name added preferred `0.0.1` `1.0.0` `1.5.7` `6`
#> <chr> <chr> <chr> <list> <list> <list> <list>
#> 1 1forge.com 2017… 0.0.1 <named list> <NULL> <NULL> <NULL>
#> 2 1password… 2021… 1.0.0 <NULL> <named list> <NULL> <NULL>
#> 3 1password… 2021… 1.5.7 <NULL> <NULL> <named list> <NULL>
#> 4 6-dot-aut… 2017… 6 <NULL> <NULL> <NULL> <named list>
#> 5 ably.io:p… 2019… 1.1.0 <NULL> <NULL> <NULL> <NULL>
#> 6 ably.net:… 2021… 1.0.14 <NULL> <NULL> <NULL> <NULL>
#> # ℹ 2 more variables: `1.1.0` <list>, `1.0.14` <list>
#> # A tibble: 2,529 × 3
#> api_name version versions
#> <chr> <chr> <named list>
#> 1 1forge.com 0.0.1 <named list [7]>
#> 2 1password.com:events 1.0.0 <named list [7]>
#> 3 1password.local:connect 1.5.7 <named list [7]>
#> 4 6-dot-authentiqio.appspot.com 6 <named list [7]>
#> 5 ably.io:platform 1.1.0 <named list [7]>
#> 6 ably.net:control 1.0.14 <named list [7]>
#> 7 abstractapi.com:geolocation 1.0.0 <named list [8]>
#> 8 adafruit.com 2.0.0 <named list [7]>
#> 9 adobe.com:aem 3.7.1-pre.0 <named list [7]>
#> 10 adyen.com:AccountService 6 <named list [7]>
#> # ℹ 2,519 more rows
#> # A tibble: 2,529 × 10
#> api_name version added info updated swaggerUrl swaggerYamlUrl
#> <chr> <chr> <chr> <list> <chr> <chr> <chr>
#> 1 1forge.com 0.0.1 2017… <named list> 2017-0… https://a… https://api.a…
#> 2 1password.com:e… 1.0.0 2021… <named list> 2023-0… https://a… https://api.a…
#> 3 1password.local… 1.5.7 2021… <named list> 2023-0… https://a… https://api.a…
#> 4 6-dot-authentiq… 6 2017… <named list> 2021-0… https://a… https://api.a…
#> 5 ably.io:platform 1.1.0 2019… <named list> 2021-0… https://a… https://api.a…
#> 6 ably.net:control 1.0.14 2021… <named list> 2021-0… https://a… https://api.a…
#> 7 abstractapi.com… 1.0.0 2021… <named list> 2021-0… https://a… https://api.a…
#> 8 adafruit.com 2.0.0 2018… <named list> 2021-0… https://a… https://api.a…
#> 9 adobe.com:aem 3.7.1-… 2019… <named list> 2023-0… https://a… https://api.a…
#> 10 adyen.com:Accou… 6 2023… <named list> 2023-0… https://a… https://api.a…
#> # ℹ 2,519 more rows
#> # ℹ 3 more variables: openapiVer <chr>, link <chr>, externalDocs <list>
#> Rows: 2,529
#> Columns: 10
#> $ api_name <chr> "1forge.com", "1password.com:events", "1password.local:…
#> $ version <chr> "0.0.1", "1.0.0", "1.5.7", "6", "1.1.0", "1.0.14", "1.0…
#> $ added <chr> "2017-05-30T08:34:14.000Z", "2021-07-19T10:17:09.188Z",…
#> $ info <list> [["contact@1forge.com", "1Forge", "http://1forge.com"]…
#> $ updated <chr> "2017-06-27T16:49:57.000Z", "2023-02-27T15:08:09.418Z",…
#> $ swaggerUrl <chr> "https://api.apis.guru/v2/specs/1forge.com/0.0.1/swagge…
#> $ swaggerYamlUrl <chr> "https://api.apis.guru/v2/specs/1forge.com/0.0.1/swagge…
#> $ openapiVer <chr> "2.0", "3.0.0", "3.0.2", "3.0.0", "3.0.1", "3.0.1", "3.…
#> $ link <chr> "https://api.apis.guru/v2/specs/1forge.com/0.0.1.json",…
#> $ externalDocs <list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, ["API …
#> tidyr::unnest_wider(info)
#> # A tibble: 2,529 × 76
#> api_name version added info_contact info_description info_title info_version
#> <chr> <chr> <chr> <list> <chr> <chr> <chr>
#> 1 1forge.c… 0.0.1 2017… <named list> "Stock and Fore… 1Forge Fi… 0.0.1
#> 2 1passwor… 1.0.0 2021… <NULL> "1Password Even… Events API 1.0.0
#> 3 1passwor… 1.5.7 2021… <named list> "REST API inter… 1Password… 1.5.7
#> 4 6-dot-au… 6 2017… <named list> "Strong authent… Authentiq… 6
#> 5 ably.io:… 1.1.0 2019… <named list> "The [REST API … Platform … 1.1.0
#> 6 ably.net… 1.0.14 2021… <named list> "Use the Contro… Control A… 1.0.14
#> 7 abstract… 1.0.0 2021… <NULL> "Abstract IP ge… IP geoloc… 1.0.0
#> 8 adafruit… 2.0.0 2018… <NULL> "### The Intern… Adafruit … 2.0.0
#> 9 adobe.co… 3.7.1-… 2019… <named list> "Swagger AEM is… Adobe Exp… 3.7.1-pre.0
#> 10 adyen.co… 6 2023… <named list> "This API is us… Account A… 6
#> # ℹ 2,519 more rows
#> # ℹ 69 more variables: `info_x-apisguru-categories` <list>,
#> # `info_x-logo` <list>, `info_x-origin` <list>, `info_x-providerName` <chr>,
#> # `info_x-serviceName` <chr>, info_license <list>, info_termsOfService <chr>,
#> # `info_x-unofficialSpec` <lgl>, `info_x-preferred` <lgl>,
#> # `info_x-publicVersion` <lgl>, `info_x-timestamp` <chr>,
#> # `info_x-description-language` <chr>, `info_x-release-note` <list>, …
hoist()
#> # A tibble: 2,529 × 11
#> api_name version added categories info updated swaggerUrl
#> <chr> <chr> <chr> <list> <list> <chr> <chr>
#> 1 1forge.com 0.0.1 2017… <chr [1]> <named list> 2017-0… https://a…
#> 2 1password.com:events 1.0.0 2021… <chr [1]> <named list> 2023-0… https://a…
#> 3 1password.local:con… 1.5.7 2021… <chr [1]> <named list> 2023-0… https://a…
#> 4 6-dot-authentiqio.a… 6 2017… <chr [1]> <named list> 2021-0… https://a…
#> 5 ably.io:platform 1.1.0 2019… <chr [1]> <named list> 2021-0… https://a…
#> 6 ably.net:control 1.0.14 2021… <chr [1]> <named list> 2021-0… https://a…
#> 7 abstractapi.com:geo… 1.0.0 2021… <chr [1]> <named list> 2021-0… https://a…
#> 8 adafruit.com 2.0.0 2018… <chr [1]> <named list> 2021-0… https://a…
#> 9 adobe.com:aem 3.7.1-… 2019… <chr [1]> <named list> 2023-0… https://a…
#> 10 adyen.com:AccountSe… 6 2023… <chr [1]> <named list> 2023-0… https://a…
#> # ℹ 2,519 more rows
#> # ℹ 4 more variables: swaggerYamlUrl <chr>, openapiVer <chr>, link <chr>,
#> # externalDocs <list>
all_apis_preferred_wide |>
tidyr::hoist(info, categories = "x-apisguru-categories") |>
dplyr::rowwise() |>
dplyr::filter("open_data" %in% categories) |>
dplyr::ungroup()
#> # A tibble: 318 × 11
#> api_name version added categories info updated swaggerUrl
#> <chr> <chr> <chr> <list> <list> <chr> <chr>
#> 1 amentum.space:aviat… 1.5.0 2021… <chr [2]> <named list> 2023-0… https://a…
#> 2 amentum.space:gravi… 1.1.1 2021… <chr [2]> <named list> 2021-0… https://a…
#> 3 amentum.space:space… 1.1.2 2021… <chr [2]> <named list> 2023-0… https://a…
#> 4 api.gov.uk:vehicle-… 1.1.0 2020… <chr [1]> <named list> 2021-0… https://a…
#> 5 apis.guru 2.2.0 2015… <chr [2]> <named list> 2023-0… https://a…
#> 6 apisetu.gov.in:aaha… 3.0.0 2021… <chr [1]> <named list> 2021-0… https://a…
#> 7 apisetu.gov.in:acko 3.0.0 2021… <chr [1]> <named list> 2021-0… https://a…
#> 8 apisetu.gov.in:agtr… 3.0.0 2021… <chr [1]> <named list> 2021-0… https://a…
#> 9 apisetu.gov.in:ahar… 3.0.0 2021… <chr [1]> <named list> 2021-0… https://a…
#> 10 apisetu.gov.in:aiim… 3.0.0 2021… <chr [1]> <named list> 2021-0… https://a…
#> # ℹ 308 more rows
#> # ℹ 4 more variables: swaggerYamlUrl <chr>, openapiVer <chr>, link <chr>,
#> # externalDocs <list>
unnest_auto()
goes wider, but shared names are coincidental.DSLC.io/wapir | Jon Harmon | wapir.io