Filter Health Data by Demographic Variables
Source:R/sus_data_filter_demographics.R
sus_data_filter_demographics.RdFilters health data based on demographic characteristics such as sex, race,
age range, education level, region, and municipality. This function complements
sus_data_filter_cid() by enabling stratified analyses by population subgroups.
Usage
sus_data_filter_demographics(
df,
sex = NULL,
race = NULL,
age_range = NULL,
education = NULL,
region = NULL,
municipality_code = NULL,
drop_ignored = FALSE,
lang = "pt",
verbose = TRUE
)Arguments
- df
A data frame containing health data.
- sex
Character vector specifying sex categories to include. Accepts values in English, Portuguese, or Spanish (e.g.,
"Male","Masculino","Masculino"). IfNULL(default), includes all sexes.- race
Character vector specifying race/color categories to include. Accepts IBGE standard categories in multiple languages. If
NULL(default), includes all races.- age_range
Numeric vector of length 2 specifying the age range
c(min_age, max_age). UseInffor no upper limit. IfNULL(default), includes all ages.- education
Character vector specifying education levels to include. If
NULL(default), includes all education levels. to include. IfNULL(default).- region
A string indicating a predefined group of states or regions (supports multilingual names PT, EN, ES). See below in details.
- municipality_code
Character or numeric vector specifying municipality codes (IBGE 6 or 7-digit codes) to include. If
NULL(default), includes all municipalities.- drop_ignored
Logical. If
TRUE, explicitly removes rows where demographic variables (sex, race and education) contain missing values (NA) or DATASUS ignored codes (e.g., "9", "Ignorado"). Default isFALSE.- lang
Character string specifying the language for messages. Options:
"en"(English),"pt"(Portuguese, default),"es"(Spanish).- verbose
Logical. If
TRUE(default), prints filtering summary.
Details
The function automatically detects column names in different languages and
standardizations. It handles both original DATASUS column names and standardized
names from sus_data_standardize().
Sex categories (case-insensitive):
English:
"Male","Female"Portuguese:
"Masculino","Feminino"Spanish:
"Masculino","Femenino"
Race/Color categories (IBGE standard):
English:
"White","Black","Yellow","Brown","Indigenous"Portuguese:
"Branca","Preta","Amarela","Parda","Indigena"Spanish:
"Blanca","Negra","Amarilla","Parda","Indigena"
IBGE Macro-regions:
"norte": c("AC", "AP", "AM", "PA", "RO", "RR", "TO")"nordeste": c("AL", "BA", "CE", "MA", "PB", "PE", "PI", "RN", "SE")"centro_oeste": c("DF", "GO", "MT", "MS")"sudeste": c("ES", "MG", "RJ", "SP")"sul": c("PR", "RS", "SC")
Biomes (Ecological Borders):
"amazonia_legal": c("AC", "AP", "AM", "PA", "RO", "RR", "MT", "MA", "TO")"mata_atlantica": c("AL", "BA", "CE", "ES", "GO", "MA", "MG", "MS", "PB", "PE", "PI", "PR", "RJ", "RN", "RS", "SC", "SE", "SP")"caatinga": c("AL", "BA", "CE", "MA", "PB", "PE", "PI", "RN", "SE", "MG")"cerrado": c("BA", "DF", "GO", "MA", "MG", "MS", "MT", "PA", "PI", "PR", "RO", "SP", "TO")"pantanal": c("MT", "MS")"pampa": c("RS")
Hydrography & Climate:
"bacia_amazonia": c("AC", "AM", "AP", "MT", "PA", "RO", "RR")"bacia_sao_francisco": c("AL", "BA", "DF", "GO", "MG", "PE", "SE")"bacia_parana": c("GO", "MG", "MS", "PR", "SP")"bacia_tocantins": c("GO", "MA", "PA", "TO")"semi_arido": c("AL", "BA", "CE", "MA", "PB", "PE", "PI", "RN", "SE", "MG")
Health, Agriculture & Geopolitics:
"matopiba": c("MA", "TO", "PI", "BA")"arco_desmatamento": c("RO", "AC", "AM", "PA", "MT", "MA")"dengue_hyperendemic": c("GO", "MS", "MT", "PR", "RJ", "SP")"sudene": c("AL", "BA", "CE", "MA", "PB", "PE", "PI", "RN", "SE", "MG", "ES")"fronteira_brasil": c("AC", "AM", "AP", "MT", "MS", "PA", "PR", "RO", "RR", "RS", "SC")
Examples
if (FALSE) { # \dontrun{
library(climasus4r)
# Filter by sex only
df_women <- sus_data_filter_demographics(df, sex = "Female")
# Filter by age range (elderly, 65+)
df_elderly <- sus_data_filter_demographics(df, age_range = c(65, Inf))
# Filter by multiple criteria (elderly women)
df_elderly_women <- sus_data_filter_demographics(
df,
sex = c("Feminino"),
age_range = c(65, Inf),
lang = "pt"
)
# Filter by race and municipality
df_subset <- sus_data_filter_demographics(
df,
race = c("Branca", "Parda"),
municipality_code = c("3550308", "3304557"), # Sao Paulo, Rio de Janeiro
lang = "pt"
)
# Complex filtering (children under 5, both sexes, specific municipality)
df_children <- sus_data_filter_demographics(
df,
age_range = c(0, 5),
region = "Norte",
lang = "en"
)
} # }