
Getting Started
getting-started.Rmd
library(countmaskr)
#> Loading required package: dplyr
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#> Loading required package: tibble
#> Loading required package: tidyr
library(knitr)
Code logic plot
%%{init: {
"theme": "base",
"themeVariables": {
"fontSize": "20px",
"primaryColor": "#bbdefb",
"primaryTextColor": "#0d47a1",
"primaryBorderColor": "#1976d2",
"lineColor": "#5a5957"
},
"flowchart": { "nodeSpacing": 50, "rankSpacing": 60, "curve": "basis" }
}}%%
flowchart TD
A([frequency table
for a variable]):::gray --> B{is a primary
cell present?}:::decision
B -- No --> T([terminate]):::terminal
B -- Yes --> C[mask values of primary cells
as < threshold]:::pc
C --> D{are there 2 or more primary
cells in the same column?}:::decision
D -- Yes --> E[mask a column-wise secondary
cell in the same column
in a different row]:::csc
D -- No --> F{one-way table?}:::decision
E --> F
F -- Yes --> T
F -- No --> G{can any masked cells be calculated
from the same row across
different columns?}:::decision
G -- No --> T
G -- Yes --> H[mask a row-wise secondary cell
in a different column
in the same row]:::rsc
H --> I{can this cell be calculated in
the same column from
a different row?}:::decision
I -- No --> T
I -- Yes --> J[mask a column-wise secondary cell
in the same column
in a different row]:::csc
J --> G
classDef gray fill:#e8e7e3,stroke:#aaa,color:#333,font-weight:500
classDef terminal fill:#c8e6c9,stroke:#4caf50,color:#1b5e20,font-weight:600
classDef decision fill:#bbdefb,stroke:#1976d2,color:#0d47a1,font-weight:600
classDef pc fill:#fff9c4,stroke:#f9a825,color:#7a3000,font-weight:600
classDef csc fill:#ffe0b2,stroke:#fb8c00,color:#4e342e,font-weight:600
classDef rsc fill:#dcedc8,stroke:#8bc34a,color:#33691e,font-weight:600
One dimensional frequency table
data("countmaskr_data")
aggregate_table <- countmaskr_data %>%
select(-c(id, age)) %>%
gather(block, Characteristics) %>%
group_by(block, Characteristics) %>%
summarise(N = n()) %>%
ungroup()
Algorithm 1
| block | Characteristics | N | N_masked |
|---|---|---|---|
| age_group | 18-29 | 243 | 243 |
| age_group | 30-39 | 198 | 198 |
| age_group | 40-49 | 215 | 215 |
| age_group | 50-64 | 323 | 323 |
| age_group | 65+ | 521 | 521 |
| ethnicity | Hispanic | 143 | 143 |
| ethnicity | Non-Hispanic | 1346 | 1,346 |
| ethnicity | Other | 11 | 11 |
| gender | Female | 728 | <730 |
| gender | Male | 763 | 763 |
| gender | Other | 9 | <11 |
| race | American Indian/ Pacific Islander | 66 | <70 |
| race | Asian | 215 | 215 |
| race | Black | 453 | 453 |
| race | Other | 6 | <11 |
| race | White | 760 | 760 |
Algorithm 2
| block | Characteristics | N | N_masked |
|---|---|---|---|
| age_group | 18-29 | 243 | 243 |
| age_group | 30-39 | 198 | 198 |
| age_group | 40-49 | 215 | 215 |
| age_group | 50-64 | 323 | 323 |
| age_group | 65+ | 521 | 521 |
| ethnicity | Hispanic | 143 | 143 |
| ethnicity | Non-Hispanic | 1346 | 1,346 |
| ethnicity | Other | 11 | 11 |
| gender | Female | 728 | 728 |
| gender | Male | 763 | >761 |
| gender | Other | 9 | <11 |
| race | American Indian/ Pacific Islander | 66 | 66 |
| race | Asian | 215 | 215 |
| race | Black | 453 | 453 |
| race | Other | 6 | <11 |
| race | White | 760 | >755 |
Algorithm 3
| block | Characteristics | N | N_masked |
|---|---|---|---|
| age_group | 18-29 | 243 | 243 |
| age_group | 30-39 | 198 | 198 |
| age_group | 40-49 | 215 | 215 |
| age_group | 50-64 | 323 | 323 |
| age_group | 65+ | 521 | 521 |
| ethnicity | Hispanic | 143 | 143 |
| ethnicity | Non-Hispanic | 1346 | 1,346 |
| ethnicity | Other | 11 | 11 |
| gender | Female | 728 | 728 |
| gender | Male | 763 | 762 |
| gender | Other | 9 | 10 |
| race | American Indian/ Pacific Islander | 66 | 66 |
| race | Asian | 215 | 214 |
| race | Black | 453 | 452 |
| race | Other | 6 | 10 |
| race | White | 760 | 758 |
Using mask_table()
mask_table() is a multi-tasking function which allows for masking, obtaining original and masked percentages on an aggregated table.
One-way masking on the original column.
mask_table(aggregate_table, group_by = "block", col_groups = list("N")) %>%
kable()| block | Characteristics | N |
|---|---|---|
| age_group | 18-29 | 243 |
| age_group | 30-39 | 198 |
| age_group | 40-49 | 215 |
| age_group | 50-64 | 323 |
| age_group | 65+ | 521 |
| ethnicity | Hispanic | 143 |
| ethnicity | Non-Hispanic | 1,346 |
| ethnicity | Other | 11 |
| gender | Female | <730 |
| gender | Male | 763 |
| gender | Other | <11 |
| race | American Indian/ Pacific Islander | <70 |
| race | Asian | 215 |
| race | Black | 453 |
| race | Other | <11 |
| race | White | 760 |
One-way masking while preserving original column and creating new masked columns
Naming convention for the masked columns follow {col}_N_masked pattern.
mask_table(
aggregate_table,
group_by = "block",
col_groups = list("N"),
overwrite_columns = FALSE
) %>%
kable()| block | Characteristics | N | N_masked |
|---|---|---|---|
| age_group | 18-29 | 243 | 243 |
| age_group | 30-39 | 198 | 198 |
| age_group | 40-49 | 215 | 215 |
| age_group | 50-64 | 323 | 323 |
| age_group | 65+ | 521 | 521 |
| ethnicity | Hispanic | 143 | 143 |
| ethnicity | Non-Hispanic | 1346 | 1,346 |
| ethnicity | Other | 11 | 11 |
| gender | Female | 728 | <730 |
| gender | Male | 763 | 763 |
| gender | Other | 9 | <11 |
| race | American Indian/ Pacific Islander | 66 | <70 |
| race | Asian | 215 | 215 |
| race | Black | 453 | 453 |
| race | Other | 6 | <11 |
| race | White | 760 | 760 |
Owo-way masking with computing original and masked percentages
Naming convention for the original and masked percentages follow {col}_perc and {col}_perc_masked pattern.
mask_table(
aggregate_table,
group_by = "block",
col_groups = list("N"),
overwrite_columns = TRUE,
percentages = TRUE
) %>%
kable()| block | Characteristics | N | N_masked | N_perc | N_perc_masked |
|---|---|---|---|---|---|
| age_group | 18-29 | 243 | 243 | 16 % | 16 % |
| age_group | 30-39 | 198 | 198 | 13 % | 13 % |
| age_group | 40-49 | 215 | 215 | 14 % | 14 % |
| age_group | 50-64 | 323 | 323 | 22 % | 22 % |
| age_group | 65+ | 521 | 521 | 35 % | 35 % |
| ethnicity | Hispanic | 143 | 143 | 10 % | 10 % |
| ethnicity | Non-Hispanic | 1346 | 1,346 | 90 % | 90 % |
| ethnicity | Other | 11 | 11 | 1 % | 1 % |
| gender | Female | 728 | <730 | 49 % | <49 % |
| gender | Male | 763 | 763 | 51 % | 51 % |
| gender | Other | 9 | <11 | 1 % | masked cell |
| race | American Indian/ Pacific Islander | 66 | <70 | 4 % | <5 % |
| race | Asian | 215 | 215 | 14 % | 14 % |
| race | Black | 453 | 453 | 30 % | 30 % |
| race | Other | 6 | <11 | 0 % | masked cell |
| race | White | 760 | 760 | 51 % | 51 % |
Two-way frequency table
two_way_freq_table <- countmaskr_data %>%
count(race, gender) %>%
pivot_wider(names_from = gender, values_from = n) %>%
mutate(
across(all_of(c("Female", "Male", "Other")), ~ ifelse(is.na(.), 0, .)),
Overall = Female + Male + Other,
.after = 1
)
mask_table(
two_way_freq_table,
col_groups = list(c("Overall", "Female", "Male", "Other")),
overwrite_columns = TRUE,
percentages = FALSE
) %>%
kable()| race | Overall | Female | Male | Other |
|---|---|---|---|---|
| American Indian/ Pacific Islander | <70 | 29 | <40 | 0 |
| Asian | 215 | <100 | 118 | <11 |
| Black | 453 | <225 | 228 | <11 |
| Other | <11 | 0 | <11 | 0 |
| White | 760 | 379 | <375 | <11 |