Getting Started

library(countmaskr)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> Loading required package: tibble
#> Loading required package: tidyr
library(knitr)

Code logic plot

%%{init: { "theme": "base", "themeVariables": { "fontSize": "20px", "primaryColor": "#bbdefb", "primaryTextColor": "#0d47a1", "primaryBorderColor": "#1976d2", "lineColor": "#5a5957" }, "flowchart": { "nodeSpacing": 50, "rankSpacing": 60, "curve": "basis" } }}%% flowchart TD A([frequency table for a variable]):::gray --> B{is a primary cell present?}:::decision B -- No --> T([terminate]):::terminal B -- Yes --> C[mask values of primary cells as < threshold]:::pc C --> D{are there 2 or more primary cells in the same column?}:::decision D -- Yes --> E[mask a column-wise secondary cell in the same column in a different row]:::csc D -- No --> F{one-way table?}:::decision E --> F F -- Yes --> T F -- No --> G{can any masked cells be calculated from the same row across different columns?}:::decision G -- No --> T G -- Yes --> H[mask a row-wise secondary cell in a different column in the same row]:::rsc H --> I{can this cell be calculated in the same column from a different row?}:::decision I -- No --> T I -- Yes --> J[mask a column-wise secondary cell in the same column in a different row]:::csc J --> G classDef gray fill:#e8e7e3,stroke:#aaa,color:#333,font-weight:500 classDef terminal fill:#c8e6c9,stroke:#4caf50,color:#1b5e20,font-weight:600 classDef decision fill:#bbdefb,stroke:#1976d2,color:#0d47a1,font-weight:600 classDef pc fill:#fff9c4,stroke:#f9a825,color:#7a3000,font-weight:600 classDef csc fill:#ffe0b2,stroke:#fb8c00,color:#4e342e,font-weight:600 classDef rsc fill:#dcedc8,stroke:#8bc34a,color:#33691e,font-weight:600

One dimensional frequency table

data("countmaskr_data")

aggregate_table <- countmaskr_data %>%
  select(-c(id, age)) %>%
  gather(block, Characteristics) %>%
  group_by(block, Characteristics) %>%
  summarise(N = n()) %>%
  ungroup()

Algorithm 1

aggregate_table %>%
  group_by(block) %>%
  mutate(N_masked = mask_counts(N)) %>%
  kable()

block	Characteristics	N	N_masked
age_group	18-29	243	243
age_group	30-39	198	198
age_group	40-49	215	215
age_group	50-64	323	323
age_group	65+	521	521
ethnicity	Hispanic	143	143
ethnicity	Non-Hispanic	1346	1,346
ethnicity	Other	11	11
gender	Female	728	<730
gender	Male	763	763
gender	Other	9	<11
race	American Indian/ Pacific Islander	66	<70
race	Asian	215	215
race	Black	453	453
race	Other	6	<11
race	White	760	760

Algorithm 2

aggregate_table %>%
  group_by(block) %>%
  mutate(N_masked = mask_counts_2(N)) %>%
  kable()

block	Characteristics	N	N_masked
age_group	18-29	243	243
age_group	30-39	198	198
age_group	40-49	215	215
age_group	50-64	323	323
age_group	65+	521	521
ethnicity	Hispanic	143	143
ethnicity	Non-Hispanic	1346	1,346
ethnicity	Other	11	11
gender	Female	728	728
gender	Male	763	>761
gender	Other	9	<11
race	American Indian/ Pacific Islander	66	66
race	Asian	215	215
race	Black	453	453
race	Other	6	<11
race	White	760	>755

Algorithm 3

aggregate_table %>%
  group_by(block) %>%
  mutate(N_masked = perturb_counts(N)) %>%
  kable()

block	Characteristics	N	N_masked
age_group	18-29	243	243
age_group	30-39	198	198
age_group	40-49	215	215
age_group	50-64	323	323
age_group	65+	521	521
ethnicity	Hispanic	143	143
ethnicity	Non-Hispanic	1346	1,346
ethnicity	Other	11	11
gender	Female	728	728
gender	Male	763	762
gender	Other	9	10
race	American Indian/ Pacific Islander	66	66
race	Asian	215	214
race	Black	453	452
race	Other	6	10
race	White	760	758

Using `mask_table()`

mask_table() is a multi-tasking function which allows for masking, obtaining original and masked percentages on an aggregated table.

One-way masking on the original column.

mask_table(aggregate_table, group_by = "block", col_groups = list("N")) %>%
  kable()

block	Characteristics	N
age_group	18-29	243
age_group	30-39	198
age_group	40-49	215
age_group	50-64	323
age_group	65+	521
ethnicity	Hispanic	143
ethnicity	Non-Hispanic	1,346
ethnicity	Other	11
gender	Female	<730
gender	Male	763
gender	Other	<11
race	American Indian/ Pacific Islander	<70
race	Asian	215
race	Black	453
race	Other	<11
race	White	760

One-way masking while preserving original column and creating new masked columns

Naming convention for the masked columns follow {col}_N_masked pattern.

mask_table(
  aggregate_table,
  group_by = "block",
  col_groups = list("N"),
  overwrite_columns = FALSE
) %>%
  kable()

block	Characteristics	N	N_masked
age_group	18-29	243	243
age_group	30-39	198	198
age_group	40-49	215	215
age_group	50-64	323	323
age_group	65+	521	521
ethnicity	Hispanic	143	143
ethnicity	Non-Hispanic	1346	1,346
ethnicity	Other	11	11
gender	Female	728	<730
gender	Male	763	763
gender	Other	9	<11
race	American Indian/ Pacific Islander	66	<70
race	Asian	215	215
race	Black	453	453
race	Other	6	<11
race	White	760	760

Owo-way masking with computing original and masked percentages

Naming convention for the original and masked percentages follow {col}_perc and {col}_perc_masked pattern.

mask_table(
  aggregate_table,
  group_by = "block",
  col_groups = list("N"),
  overwrite_columns = TRUE,
  percentages = TRUE
) %>%
  kable()

block	Characteristics	N	N_masked	N_perc	N_perc_masked
age_group	18-29	243	243	16 %	16 %
age_group	30-39	198	198	13 %	13 %
age_group	40-49	215	215	14 %	14 %
age_group	50-64	323	323	22 %	22 %
age_group	65+	521	521	35 %	35 %
ethnicity	Hispanic	143	143	10 %	10 %
ethnicity	Non-Hispanic	1346	1,346	90 %	90 %
ethnicity	Other	11	11	1 %	1 %
gender	Female	728	<730	49 %	<49 %
gender	Male	763	763	51 %	51 %
gender	Other	9	<11	1 %	masked cell
race	American Indian/ Pacific Islander	66	<70	4 %	<5 %
race	Asian	215	215	14 %	14 %
race	Black	453	453	30 %	30 %
race	Other	6	<11	0 %	masked cell
race	White	760	760	51 %	51 %

Two-way frequency table

two_way_freq_table <- countmaskr_data %>%
  count(race, gender) %>%
  pivot_wider(names_from = gender, values_from = n) %>%
  mutate(
    across(all_of(c("Female", "Male", "Other")), ~ ifelse(is.na(.), 0, .)),
    Overall = Female + Male + Other,
    .after = 1
  )

mask_table(
  two_way_freq_table,
  col_groups = list(c("Overall", "Female", "Male", "Other")),
  overwrite_columns = TRUE,
  percentages = FALSE
) %>%
  kable()

race	Overall	Female	Male	Other
American Indian/ Pacific Islander	<70	29	<40	0
Asian	215	<100	118	<11
Black	453	<225	228	<11
Other	<11	0	<11	0
White	760	379	<375	<11