--- title: "Clean / Drop" author: "Roland Krasser" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Clean / Drop} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} library(dplyr) library(explore) ``` ## Rename variable ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- use_data_titanic(count = FALSE) glimpse(data) ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- data %>% clean_var(Age, name = "age") glimpse(data) ``` ## Replace NA values ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- use_data_beer() data %>% describe(energy_kcal_100ml) ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- data %>% clean_var(energy_kcal_100ml, na = 42) data %>% describe(energy_kcal_100ml) ``` ## Set min max values ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- create_data_person() data %>% describe(age) ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- data %>% clean_var(age, min_val = 20, max_val = 80) data %>% describe(age) ``` ## Rescale 0 to 1 ```{r echo=TRUE, message=FALSE, warning=FALSE} data %>% describe(income) ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- data %>% clean_var(income, rescale01 = TRUE) data %>% describe(income) ``` ## Cleaning text ```{r echo=TRUE, message=FALSE, warning=FALSE} data[1, "handset"] <- " android " data[2, "handset"] <- "ANDROID" data %>% describe(handset) ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- data %>% clean_var(handset, simplify_text = TRUE) data %>% describe(handset) ``` ## Drop variables * `drop_var_no_variance()` Drop all variables with no variance * `drop_var_not_numeric()` Drop all not numeric variables * `drop_var_low_variance()` Drop all variables with low variance * `drop_var_by_names()` Drop variables by name * `drop_var_with_na()` Drop all variables with NA-values ```{r echo=TRUE, message=FALSE, warning=FALSE} data <- use_data_beer() data %>% describe_tbl() ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} data %>% drop_var_no_variance() %>% describe_tbl() ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} data %>% drop_var_with_na() %>% describe_tbl() ``` ## Drop observations * `drop_obs_with_na()` Drop all observations with NA-values ```{r echo=TRUE, message=FALSE, warning=FALSE} data %>% drop_obs_with_na() %>% describe_tbl() ``` * `drop_obs_if()` Drop all observations where expression is true ```{r echo=TRUE, message=FALSE, warning=FALSE} data %>% count_pct(type) ``` ```{r echo=TRUE, message=FALSE, warning=FALSE} data %>% drop_obs_if(type == "Alkoholfrei") %>% count_pct(type) ```