From 0b59639861386081f87487c908bf8f05fa538628 Mon Sep 17 00:00:00 2001 From: Tony Zoght Date: Fri, 20 Jan 2023 22:12:09 -0800 Subject: [PATCH 1/3] ready for review --- CONTRIBUTING.md | 75 ++++++++++++++++++++++ CONTRIBUTORS.md | 11 ++++ DESCRIPTION | 7 +- NAMESPACE | 3 + R/clean_data_frame.R | 23 +++++++ R/redact_creditcardnumber.R | 19 ++++++ R/redact_email.R | 19 ++++++ README.Rmd | 88 ++++++++++++------------- README.md | 113 +++++++++++++++++---------------- logo.png | Bin 0 -> 10169 bytes man/clean_data_frame.Rd | 31 +++++++++ man/redact_creditcardnumber.Rd | 29 +++++++++ man/redact_email.Rd | 25 ++++++++ 13 files changed, 343 insertions(+), 100 deletions(-) create mode 100644 CONTRIBUTING.md create mode 100644 CONTRIBUTORS.md create mode 100644 R/clean_data_frame.R create mode 100644 R/redact_creditcardnumber.R create mode 100644 R/redact_email.R create mode 100644 logo.png create mode 100644 man/clean_data_frame.Rd create mode 100644 man/redact_creditcardnumber.Rd create mode 100644 man/redact_email.Rd diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..86c3800 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,75 @@ +# Contributing + +Contributions are welcome, and they are greatly appreciated! Every little bit +helps, and credit will always be given. + +## Types of Contributions + +### Report Bugs + +If you are reporting a bug, please include: + +* Your operating system name and version. +* Any details about your local setup that might be helpful in troubleshooting. +* Detailed steps to reproduce the bug. + +### Fix Bugs + +Look through the GitHub issues for bugs and Project. Anything tagged with "bug" and "help +wanted" is open to whoever wants to implement it. + +### Implement Features + +Look through the GitHub issues for features. Anything tagged with "enhancement" +and "help wanted" is open to whoever wants to implement it. + +### Write Documentation + +You can never have enough documentation! Please feel free to contribute to any +part of the documentation, such as the official docs, docstrings, or even +on the web in blog posts, articles, and such. + +### Submit Feedback + +If you are proposing a feature: + +* Explain in detail how it would work. +* Keep the scope as narrow as possible, to make it easier to implement. +* Remember that this is a volunteer-driven project, and that contributions + are welcome + +## Get Started! + +Ready to contribute? Here's how to set up `sanityzeR` for local development. + +1. Fork and Clone a copy of `sanityzeR` locally. +2. Install locally in R studio + + ```console + library(devtools) + library(usethis) + load_all() + ``` + +3. Use `git` (or similar) to create a branch for local development and make your changes: + + ```console + $ git checkout -b name-of-your-bugfix-or-feature + ``` + +4. When you're done making changes, check that your changes conform to any code formatting requirements and pass any tests. + +5. Commit your changes and open a pull request. + +## Pull Request Guidelines + +Before you submit a pull request, check that it meets these guidelines: + +1. The pull request should include additional tests if appropriate. +2. If the pull request adds functionality, the docs should be updated. +3. The pull request should work for all currently supported operating systems and versions of R. + +## Code of Conduct + +Please note that the `sanityzeR` project is released with a +Code of Conduct. By contributing to this project you agree to abide by its terms. diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md new file mode 100644 index 0000000..5cb3dad --- /dev/null +++ b/CONTRIBUTORS.md @@ -0,0 +1,11 @@ +# Contributors + +## Special thanks for all the people who had helped this project so far: + +- [Tony Zoght](https://github.com/tzoght) +- [Caesar Wong](https://github.com/caesarw0) +- [Jonah Hamilton](https://github.com/xXJohamXx) + +## I would like to join this list. How can I help the project? + +For more information, please refer to our [CONTRIBUTING](CONTRIBUTING.md) guide. diff --git a/DESCRIPTION b/DESCRIPTION index d51d687..6e12c94 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,13 +5,14 @@ Authors@R: c(person(given = "Jonah", family = "Hamilton", email = "jonah.hamilton@alumni.ubc.ca", - role = c("aut", "cre")), + role = c("aut")), person(given = "Caesar", family = "Wong", - role = "ctb"), + role = c("ctb")), person(given = "Tony", family = "Zoght", - role = "ctb") + email = "tony@zoght.com", + role = c("cre")) ) Description: Data scientists often need to remove or redact Personal Identifiable Information (PII) from their data. This package provides utilities diff --git a/NAMESPACE b/NAMESPACE index 6ae9268..45d77bf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,2 +1,5 @@ # Generated by roxygen2: do not edit by hand +export(clean_data_frame) +export(redact_creditcardnumber) +export(redact_email) diff --git a/R/clean_data_frame.R b/R/clean_data_frame.R new file mode 100644 index 0000000..93c4624 --- /dev/null +++ b/R/clean_data_frame.R @@ -0,0 +1,23 @@ +#' Cleans a data.frame by redacting PII information from character vector columns +#' +#' @param df A data.frame to clean +#' @param spotters_list A list containing lists of 3 elements each: +#' 1. the redact function +#' 2. hash_spotted value to pass or 0 to keep the default +#' 3. the replace_with value or 0 to keep the default +#' +#' +#' @return A deep copy of the cleaned data.frame. +#' @export +#' +#' @examples +#' df <- data.frame() +#' spotters <- list() +#' spotter_1 <- list(redact_email,TRUE,0) +#' spotters <- append(spotters,spotter_1) +#' df_cleaned <- clean_data_frame(df, spotters) +clean_data_frame <- function(df, spotters_list) { + # to be implemented in the next milestone + print(df) + print(spotters_list) +} diff --git a/R/redact_creditcardnumber.R b/R/redact_creditcardnumber.R new file mode 100644 index 0000000..debc618 --- /dev/null +++ b/R/redact_creditcardnumber.R @@ -0,0 +1,19 @@ +#' Redacts credit card numbers from a given string +#' +#' @param string A character vector with, at most, one element. The input string to redact credit card numbers from +#' @param hash_spotted When TRUE, the redaction of the credit cards will be a hash of the redacted (Default False) +#' @param replace_with A character vector with, at most, one element. When hash_spotted is FALSE, this character vector will be the replacement redacted credit card numbers. +#' +#' +#' @return A character vector. +#' @export +#' +#' @examples +#' x <- "You can use my 5567554868135971 here" +#' redact_creditcardnumber(x) +redact_creditcardnumber <- function(string, hash_spotted=FALSE, replace_with="CREDITCARD") { + # to be implemented in the next milestone + print(string) + print(hash_spotted) + print(replace_with) +} diff --git a/R/redact_email.R b/R/redact_email.R new file mode 100644 index 0000000..bf6c72e --- /dev/null +++ b/R/redact_email.R @@ -0,0 +1,19 @@ +#' Redacts an email addresses from a given string +#' +#' @param string A character vector with, at most, one element. The input string to redact email addresses from +#' @param hash_spotted When TRUE, the redaction of the email addresses will be a hash of the redacted (Default False) +#' @param replace_with A character vector with, at most, one element. When hash_spotted is FALSE, this character vector will be the replacement redacted email addresses. +#' +#' +#' @return A character vector. +#' @export +#' +#' @examples +#' x <- "my email address is foo@gaga.com" +#' redact_email(x) +redact_email <- function(string, hash_spotted=FALSE, replace_with="EMAILADDRS") { + # to be implemented in the next milestone + print(string) + print(hash_spotted) + print(replace_with) +} diff --git a/README.Rmd b/README.Rmd index a07ce08..fea192a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -2,7 +2,6 @@ output: github_document --- - ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, @@ -12,24 +11,29 @@ knitr::opts_chunk$set( ) ``` -** note this is a initial version of the README file which will be updated for the future milestones** +\*\* note this is a initial version of the README file which will be updated for the future milestones\*\* # sanityzeR + +![](logo.png) + The goal of sanityzeR: Data scientists often need to remove or redact Personal Identifiable Information (PII) from their data. This package provides utilities to spot and redact PII from r data frames/Tibbles. PII can be used to uniquely identify a person. This includes names, addresses, credit card numbers, phone numbers, email addresses, and social security numbers, and therefore regulatory bodies such as the European Union's General Data Protection Regulation (GDPR) and the California Consumer Privacy Act (CCPA) require that PII be removed or redacted from data sets before they are shared an further processed -## Why `sanityzeR` ? +## Why `sanityzeR` ? + Because it's a fun name and it's a play on the word "sanitize" which is what we are doing to the data -## Similar R packages - **add to this** +## Similar R packages + +The closest R package in functionality is [**anonymizer**](https://www.rdocumentation.org/packages/anonymizer/versions/0.2.0)which is a package for finding and removing PII from text. The package is not designed to work with data frames directly and we believe that our package will be more user-friendly and intuitive as it accepts data frames directly. In addition, sanityzeR gives the ability for users to define new type of spotters to redact new types of PII. ## Installation @@ -50,51 +54,47 @@ library(sanityzeR) ``` ## Features and Usage -Conceptually, `sanityzeR` is a package that provides a way to remove PII from Pandas data frames. The package provides a number of default spotters, which can be used to identify PII in the data and redact them. -The main entry point to the package is the `Cleanser` class. The `Cleanser` class is used to add `Spotter`s to the cleanser, which will be used to identify PII in the data. The cleanser can then be used to cleanse the data, and redact the PII from the given data frame (all future data structures that will be suppportd by the package, in the future). **edit this as needed** +Conceptually, `sanityzeR` is a package that provides a way to remove PII from Pandas data frames. The package provides a number of default spotters, which can be used to identify PII in the data and redact them. -The package comes with a number of default spotters, as subclassess of `Spotter`: -1. `CreditCardSpotter` - identifies credit card numbers -2. `EmailSpotter` - identifies email addresses +The library comes with two default redaction functions `redact_creditcardnumber` and `redact_email` and which simply takes a character vector and redacts the corresponding PII using either a constant string replacement or a hash of the redaction. -Spotters can be added to it using the `add_spotter()` method. The cleanser can then be used to cleanse data using the `cleanse()` method which takes a Pandas data frame and returns a Pandas data frame with PII redacted. +## Functions -The redaction options provided by `sanityze`` are: -1. Redact using a fixed string - The string in this case is the ID of the spotter. For example, if the spotter is an instance of `CreditCardSpotter`, the string will be `{{CREDITCARD}}`, or `{{EMAILADDRS}}` for an instance of `EmailSpotter`. -2. Redact using a hash of the input - The hash is computed using the `hashlib` package, and the hash function is `md5`. For example, if the spotter is an instance of `CreditCardSpotter`, the string will be `{{6a8b8c6c8c62bc939a11f36089ac75dd}}`, if the input is contains a PII `1234-5678-9012-3456`. +1. `redact_creditcardnumber()`: a function that takes a character vector (string) and redacts credit card numbers contained within that string, replacing them with either: + 1. A constant string that the user can specify + 2. A hash of the redaction (using MD5) +2. `redact_email`: a function that takes a character vector (string) and redacts email addresses contained within that string, replacing them with either: + 1. A constant string that the user can specify -## Classes and Functions -1. `Cleanser`: the main class of the package. It is used to add spotters to it, and then cleanse data using the spotters. - 1. `add_spotter()`: adds a spotter to the cleanser - 2. `remove_spotter()`: removes a spotter from the cleanser - 3. `clean()`: cleanses the data in the given data frame, and returns a new data frame with PII redacted -2. `EmailSpotter`: a spotter that identifies email addresses - 1. `getUID()`: returns the unique ID of the spotter - 2. `process()`: performs the PII matching and redaction -3. `CreditCardSpotter`: a spotter that identifies credit card numbers - 1. `getUID()`: returns the unique ID of the spotter - 2. `process()`: performs the PII matching and redaction + 2. A hash of the redaction (using MD5) +3. `clean_data_frame`: a function that takes as input the following list of arguments below and returns a deep copy of the cleaned data.frame: + 1. An input data.frame `df` to clean -> You can checkout detailed API Documentations [here](https://ubc-mds.github.io/sanityze/). + 2. A list of spotter information arguments. Each item in the list is a list of 3 elements: -Below is a simple quick start example: + 1. The redact\_\* function to use (e.g. `redact_creditcardnumber` ). -```python -import pandas as pd -from sanityze import Cleanser, EmailSpotter + 2. The second argument of the redact\_\* function: `hash_spotted` (TRUE or FALSE) or 0 to use the default argument. -# Create a cleanser, and don't add the default spotters -cleanser = Cleanser(include_default_spotters=False) -cleaner.add_spotter(from sanityze import Cleanser, EmailSpotter()) -cleaned_df = cleanser.clean(df) -``` + 3. The third argument of the redact\_\* function: `replace_with` (a redaction string) or 0 to use the default argument. +Below is a simple quick start example: +``` r +library(sanityzeR) +df <- data.frame() +spotters <- list() +spotter_1 <- list(redact_email,TRUE,0) +spotters <- append(spotters,spotter_1) + +df_cleaned <- clean_data_frame(df, spotters) +``` ## High-level Design -To better understand the design of the package, we have provided a high-level design document, which will be kept up to date as the package evolves. The document can be found [here](HighLevelDesign.md). + +To better understand the design of the package, we have provided a high-level design document, which will be kept up to date as the package evolves. The document can be found [here](https://github.com/UBC-MDS/sanityze/blob/main/HighLevelDesign.md). ## Contributing @@ -106,13 +106,13 @@ Interested in contributing? Check out the [contributing guidelines](CONTRIBUTING ## Credits -`sanityzeR` was created with [`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the `py-pkgs-cookiecutter` [template](https://github.com/py-pkgs/py-pkgs-cookiecutter). +`sanityzeR` was created using **devtools** and **usethis** R packages. ## Quick Links - * [Documentation](https://ubc-mds.github.io/sanityze/) - * [Kanban Board](https://github.com/orgs/UBC-MDS/projects/15) - * [Issues](https://github.com/UBC-MDS/sanityze/issues) - * [High Level Design](HighLevelDesign.md) - * [Contributing Guidelines](CONTRIBUTING.md) - * [Code of Conduct](CODE_OF_CONDUCT.md) - * [License](LICENSE) + +- [Kanban Board](https://github.com/orgs/UBC-MDS/projects/15) +- [Issues](https://github.com/UBC-MDS/sanityzeR/issues) +- [High Level Design](https://github.com/UBC-MDS/sanityze/blob/main/HighLevelDesign.md) +- [Contributing Guidelines](CONTRIBUTING.md) +- [Code of Conduct](CODE_OF_CONDUCT.md) +- [License](LICENSE.md) diff --git a/README.md b/README.md index 741db3a..28fb6ae 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ updated for the future milestones\*\* +![](logo.png) + The goal of sanityzeR: Data scientists often need to remove or redact Personal Identifiable @@ -27,7 +29,13 @@ what we are doing to the data ## Similar R packages -**add to this** +The closest R package in functionality is +[**anonymizer**](https://www.rdocumentation.org/packages/anonymizer/versions/0.2.0)which +is a package for finding and removing PII from text. The package is not +designed to work with data frames directly and we believe that our +package will be more user-friendly and intuitive as it accepts data +frames directly. In addition, sanityzeR gives the ability for users to +define new type of spotters to redact new types of PII. ## Installation @@ -55,60 +63,62 @@ Conceptually, `sanityzeR` is a package that provides a way to remove PII from Pandas data frames. The package provides a number of default spotters, which can be used to identify PII in the data and redact them. -The main entry point to the package is the `Cleanser` class. The -`Cleanser` class is used to add `Spotter`s to the cleanser, which will -be used to identify PII in the data. The cleanser can then be used to -cleanse the data, and redact the PII from the given data frame (all -future data structures that will be suppportd by the package, in the -future). **edit this as needed** - -The package comes with a number of default spotters, as subclassess of -`Spotter`: 1. `CreditCardSpotter` - identifies credit card numbers 2. -`EmailSpotter` - identifies email addresses - -Spotters can be added to it using the `add_spotter()` method. The -cleanser can then be used to cleanse data using the `cleanse()` method -which takes a Pandas data frame and returns a Pandas data frame with PII -redacted. - -The redaction options provided by -``` sanityze`` are: 1. Redact using a fixed string - The string in this case is the ID of the spotter. For example, if the spotter is an instance of ```CreditCardSpotter`, the string will be`{{CREDITCARD}}`, or`{{EMAILADDRS}}`for an instance of`EmailSpotter`. 2. Redact using a hash of the input - The hash is computed using the`hashlib`package, and the hash function is`md5`. For example, if the spotter is an instance of`CreditCardSpotter`, the string will be`{{6a8b8c6c8c62bc939a11f36089ac75dd}}`, if the input is contains a PII`1234-5678-9012-3456\`. - -## Classes and Functions - -1. `Cleanser`: the main class of the package. It is used to add - spotters to it, and then cleanse data using the spotters. - 1. `add_spotter()`: adds a spotter to the cleanser - 2. `remove_spotter()`: removes a spotter from the cleanser - 3. `clean()`: cleanses the data in the given data frame, and - returns a new data frame with PII redacted -2. `EmailSpotter`: a spotter that identifies email addresses - 1. `getUID()`: returns the unique ID of the spotter - 2. `process()`: performs the PII matching and redaction -3. `CreditCardSpotter`: a spotter that identifies credit card numbers - 1. `getUID()`: returns the unique ID of the spotter - 2. `process()`: performs the PII matching and redaction - -> You can checkout detailed API Documentations -> [here](https://ubc-mds.github.io/sanityze/). +The library comes with two default redaction functions +`redact_creditcardnumber` and `redact_email` and which simply takes a +character vector and redacts the corresponding PII using either a +constant string replacement or a hash of the redaction. + +## Functions + +1. `redact_creditcardnumber()`: a function that takes a character + vector (string) and redacts credit card numbers contained within + that string, replacing them with either: + 1. A constant string that the user can specify + + 2. A hash of the redaction (using MD5) +2. `redact_email`: a function that takes a character vector (string) + and redacts email addresses contained within that string, replacing + them with either: + 1. A constant string that the user can specify + + 2. A hash of the redaction (using MD5) +3. `clean_data_frame`: a function that takes as input the following + list of arguments below and returns a deep copy of the cleaned + data.frame: + 1. An input data.frame `df` to clean + + 2. A list of spotter information arguments. Each item in the list + is a list of 3 elements: + + 1. The redact\_\* function to use + (e.g. `redact_creditcardnumber` ). + + 2. The second argument of the redact\_\* function: + `hash_spotted` (TRUE or FALSE) or 0 to use the default + argument. + + 3. The third argument of the redact\_\* function: + `replace_with` (a redaction string) or 0 to use the default + argument. Below is a simple quick start example: -``` python -import pandas as pd -from sanityze import Cleanser, EmailSpotter +``` r +library(sanityzeR) +df <- data.frame() +spotters <- list() +spotter_1 <- list(redact_email,TRUE,0) +spotters <- append(spotters,spotter_1) -# Create a cleanser, and don't add the default spotters -cleanser = Cleanser(include_default_spotters=False) -cleaner.add_spotter(from sanityze import Cleanser, EmailSpotter()) -cleaned_df = cleanser.clean(df) +df_cleaned <- clean_data_frame(df, spotters) ``` ## High-level Design To better understand the design of the package, we have provided a high-level design document, which will be kept up to date as the package -evolves. The document can be found [here](HighLevelDesign.md). +evolves. The document can be found +[here](https://github.com/UBC-MDS/sanityze/blob/main/HighLevelDesign.md). ## Contributing @@ -124,17 +134,14 @@ It is licensed under the terms of the [MIT license](LICENSE). ## Credits -`sanityzeR` was created with -[`cookiecutter`](https://cookiecutter.readthedocs.io/en/latest/) and the -`py-pkgs-cookiecutter` -[template](https://github.com/py-pkgs/py-pkgs-cookiecutter). +`sanityzeR` was created using **devtools** and **usethis** R packages. ## Quick Links -- [Documentation](https://ubc-mds.github.io/sanityze/) - [Kanban Board](https://github.com/orgs/UBC-MDS/projects/15) -- [Issues](https://github.com/UBC-MDS/sanityze/issues) -- [High Level Design](HighLevelDesign.md) +- [Issues](https://github.com/UBC-MDS/sanityzeR/issues) +- [High Level + Design](https://github.com/UBC-MDS/sanityze/blob/main/HighLevelDesign.md) - [Contributing Guidelines](CONTRIBUTING.md) - [Code of Conduct](CODE_OF_CONDUCT.md) -- [License](LICENSE) +- [License](LICENSE.md) diff --git a/logo.png b/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..ffa934aa15584c8420b73c34f560209e44780428 GIT binary patch literal 10169 zcmd6MbyS)rSMd~4lgeVLuv`?qCgzC9MLsjh&7Nr4Ff0C1EPWwqhu4Ez~KM}_}(p_!@x0GJi_ zGBTPjERuz`ttJ~2k@}PpeFx9!W_Nze)9SGGB^G5^man@ zHp}yDMbwi9V9jRCsv5D2ph#pLBOSDI@Pnt;YlSrwK%NMou&WWaqz5xH0_b=JpWdvk zOSKU++lx~lt#>}_Yi(TgQ33Ms!?&9))hT*d2!i?vVt5fGXX$;~k5xDM%%os!(wMTo z<{3n`et{X=wq)@+=NNd`k_IRMvFLFUCPc|8h2tv@bs%BnPIOIi=qM>&TpLnaLxUGr zkD{r0+;4P(oP^1!XO*nJ&i=Tx0qc6AVzJgwVGFmd2*;@T;|@MCoah) zYuZ1Gz5Q?>XzF^fG~E|s2+C#3&P;!C%Kk<)pd&aZRG3zWinA(y^ztg>HahVR3%pv~ z99~YZ9b#JXVTNl0Gi7327I)`i1^?A!bko|hr9rAq(j~tnkEcYHJ!L3 zN1-|Bc?}G<)Mn0(Ug4$5X4){|$TK!JS_W_j+1h^STMht22?{p%ohbtl+yWCMSKBzt zutN1P02fl10y6qLLbHJHn|(=9bvDk^^BCp;f*}MpO%x+ZJhDk$8?>xYO6^)&bkt2c z)JgzVBJxxyx^)=8Il~ZIgt_Pf3Y#SV0v_($!VMui1lukYH*yoS_6-4FbkuHiUxb0~ z0B#H^d|aT+3q|6o_t7ml+T!Hs0o^W-MfMx`DO-Y^luTSHCjZTv;hMBV`~ zi*)S``-SC;h@#2MN}-GfNwCZl)9fxY&_z@ZH;QDFOwD)DiZ2t;B|T(zz`rKE#)y~G z`R>&qyo7NQn;_}=o&2ZwPoNjd9exY>YIH|5jr51YPKzNjoH+6Ug5DlkD?*+{SfDET z9$@dq9^syV4e}*TZ2we^yg@)ENB}i6?oB*r&uPDa#d2-Xg2gd1Zlq3s`ev4Uw~zoA zQQC*)Fwu9SAC3m&H^jFfzm|L*+XTf)KwYjqX`6W4lbiSFg};^FiFJo=D55jFVSGmK z4;PW9+(fkq=T_3CqaetR-t4p9lD8DDP4bNvk_=>LU?^nYrmL5I52}|cG1-HS5vS9q zYcR?szZ?#0h-0L8W}s$lOgT+iOCGkCsiPGBl%=#L|AobjQGz+-WATvDP@Q#i-4TQx zk_owom<@*mpd3M|RYQRucnWY9tDNyiK$ntxl=_D8U(`A1wX0z`Pj0GO$kv+DNs1ib7ReX>HSgVBLUkd2U&<#N_oUg;S9}T zQ*yx+uvoE}s#v+WUO9~K#dzXCLGe_K{h&S6E;p4wHGM>Ew`wGQlMI(PV#-}s&EdohWm5^pt!p zQ7TC-5gaoIi-J2AP3t>QU?{j)$XE`fEBq|!;PLt~%CSfeI}!wvOb#OsQ_dMPvR1dj zo-YyW9L}6NoJSnAMlX&27&_NE)cadGJB-(UY3P8+d_Dha=sN9i>7TR+Tq_8QJzdmx zLUR7L;Ik0iSpSV^k$)+E@o0H)QGanAqDf942OJ~HJ$j*ZOr&R(40p9tgfN)t+>eS`YO+hp-e;}?yUtCc1fv=#Yu<}vF% zf<1Pq3N-2dD@QnTLwM` zL4U77?CXRo{pXr;+t-ac)kBg)U?CCDDS&B!mZUKq)lbqUup8YFog)4WA@)1NZqYE> z2(k$4@P|6&Rz_>3-Q?1Iy?jUM)*e<1_Fmk`c04E^2sebxf{H%+XVk}76B0w(KlCHu z3PuNRe~YW7pGQ0nJP)ur0~06f|Oa$&rfD=^1lMT`xjF^ z*caI~`Yg5zwr~f7EYRv57m3a*e^>l=Pe8L3+((&@Hy)wh7QR&WEyS3>V8WOr%qD^| zConZLr!uwg1pH$EU7Zq?@;ODK2)UF>HC;Jg*;%=_w7)o;gcVa8Is3rqK=>d?8f?+w z#@)+fr9aR#WacG_9i0|y3@%snoZ;m86v#q8;xSw?b_b(Q4+1LCD$dI9kME0rNg1+g-CS-x-8!z7 zxqQKQDX@bRvVZwTro*X&_4$;flyi2yuya3Ty?)l_D5q*2vk7G$lM`zcv-5>}%$G;= zYneZII?IG#uU{+^pDib>axDbgMjj=|Fkvz^GxdrhK?TyLRIrrSPmG$692N`e4p;kD zlUKub5q7Hru5O*yFN=ZYTRz1B5S>){*|`e5faZYv+vXF;%DGCe%BKg0=fMDeasKhf z_l=PI+WYz&iK^Jjqr2#HokxU5qzJSQnio`5VlVv5>eo6to@xd*w@{m;rnr5q7n9Zs za} zCi_No^~c_f&mn278E(!GVI4oc1E+K0t>!jQoqf+qjRve`GgHMj?v>iQ?alt5vEM|} zylzY`I=y!5Zg*qfjD5M(&#KO9KJ>l5;vZ=IbmTLi+3MV?@#t`WR8G^HvqyO&XcR#0 zzkEk>n|%a5>2#aE+}ky-G@f@wTRm+ha^GKDg+S5H+|ORM=K4QhhgG7b;0a=zqhitw zORxo0o;TeHrmSb6o}fYtd(Zj56lU4C^;w?1?&|}@`~!Cye_qdI7JRF4#p^J>`eFsN-a5Z;xJ{wO z6t4&tyNW(9Kk}T}|1o?FY69(=sdan}x_lPkP8Hy1vW{H% zcbHpKi#h)Cd5h1X@T>=8y*+(I^ z{@3=kPO`z|4y6YKo%jzrHut|fXC5aB6bql6A4q3Rk@s(?GcVwaS4)V2l8u@gfDKNg z0}v4i0Z4EP0saL<+5?dPK?4BH@Dcz(N`4POgO`NxM>7xc-`trzq<_=ne+8vda+f?P>|(@^yCm3jv7vioi){h?fPO zud|bjr-<(>;6Efp;Pl^W5RmR4B3_QKfCg%sbTY0U5IO-aZZ2-1I3^t(otTHUjfl3a z{J-Gvf3JXcUS4h@AP^J^<${8_Ts>?-Ji@}lAZ}g|FE1xtg45H_#mmB%)5VkFpGy9> z9$AQ|m505Xm%XbC-Cw;Hmag7juYkb6hW`Eer=JjC`~R}!;`y(z-~odEazH#>+@Sv( z%*)>9{{j2U`6uijasAVr*k5HLn)bdBCqr3#XLzf^T@&Z!<`?^ing7Q5FGv50eC-MG zka2Z}1HHumt6TmB{!ixr3H*miga0te!~35m|AX@%$iE?oXn5Gety%nSLvbE4(0{A@ zm%JG0Zv+2Fga5NP|4|F?PH{{z(7%U+wfU<4Na=<VVEarzR@XG!xVdGFsBh#R52(3uQlg8oR*roc z0%vBOn(W}z_lMw9KUrz^pmT{_n>oxnxBjRIhF)Q?Y=tTr~eHd@ECNa!UDi)Q-MztZoreZD2!22uVIgpHQsgi_AuKHe=4H z{?rnbY}DTFO;O>IPlZ>D=tD1);x%@w(W>?*svy$z?GDA7rpK@14T`E#hHC2+gw-i3 z_SPmTh92pX9TUJPKa%NdA*doB4?9(4X2b1s{BE1CR~jJ>kff|V6{}f3^hz@OGMd^k zhABGixq>71(pmp->!NxUnd|S5hTss1@zy%Ep^95NRRCletawnI&vqq07fNLfY%UQ| z+Yt-l%4wa`7pK_GEzVaq7a$sIB^*~uwo$`nOdMc~EaTHfXuF6C*BcS#(#?bECEEqP z1|3gn3j5lOZ-BeoM(s`KZJBYuz+Ng!+kfV2DgaGsY8j~V){Hz<0_DcyVs13F`V@<= zUk5{DG!f+e_EuTPrsT#Y^E`1A00T)u#Q7yH;vW=yZPtj%&kg%7_+g6t@g?YxTC!sA zDX)3bkPD1$A`A>(q*iw-KWPZHrSoNzCRDKT`I8)*!+sS1AuOIRpZl^WtPndv%Yfy_ z1RMP*b|uht;UbPNzh{D~-&;FM>IYPzk^{#~caFPwI@j{Xe;P^|@S{qC*RC0rr@{MO zFn(>G`jlqa64nKDBldg`+WuPB|+kQiW>1gE$Ob*Nf9Q#^53UZ*cH(t z=7~3rB0ycUO?41w5mw+M93(jC+{;VI!Nmr;_n?jP>;cP}lpIZGUV^*#(5Xxp_C-|M-*}oCb>= zI#-kJIO;=WZ6K8`6SvCsal!Yjic9I9m*z=_CGjHMuQY!ZY97Cj5~k=+hXmKUYz*=x z=W?Bpd(bmrf0Rqde3X$4Kct#WA>|aFSsmJ>mb02$NZ32M+Gt?LV<<&~W!HTGd)e@Q-@t1PT zgDk4mBe+u)c%x-SZgiq(vM{o$7la^Hq5u|+V@4695y8izV#}Yiv@FuIAM8u|7BVoV zB8)J2`=$mO@cd-9w4=XY#TE}&R*HX;m%P_3re{y1pAn%~BHOXZ(2R}P%Vdbj{a|Z- zf8qBYmQZ8&Q$T2cGUIi-9t0=D=&i;?uai#YK4lH( z8Ii|;BH`H@w|Zp;`=aHkx{NG8kVPgNvMZp9fk_Wn`mWpvI0jy~*huMCx*8WjCrloC zgmK^CkXw+~dig<`T7IsH!j+$ql_;Cn+-FvAn&Wjcpd&`m%gd$>2y?Ckz$8p;Yvr+& zjxG-2l1q_?)V;bGn_buZDCxgxzI^N9EAKgpJsEHP0WtD(AhITH&H5|lHkh{dldG&C%kQ4qG8GzN-lvk(r%n^qS6 zQ^tSEA+S~l&5t(E=ZG;Rar-nOd$dplpPe!PGjpBveAE2^b-~>( zKFbL*<|D?G=3}=oeU$5+7DrJ`ddTa~;gxP?F}%}5hK*FaDhiU}%7Z00pjwIZ2!Zl2 zT^{fU)ERH=3LozGx_PP{;!vjoPLAlnNQ9yu-QFzr&uT+x>4AQHN*U#o)S|Dm&}VH| z=y8m~;tg|=#XP0B_CG2z=u9QHzO@uDqk#mKX}LW_3P;&-?9G3-#fqQB1&V?2sx% z*)pTeV>K89iS~CR`=x=$&9sKI_F3uX04aEqKxs$4l>TCXA8lENt)6w0k6H@SO`(^R zq;Np>D)20v46R5AB$i`!MHhm4XEg6Jog8iF%?|R@kcy}E@F4Ge(B8g#a zw~zd;R?TO`6nTgW8JZlwcuE5ae+WLcD$c$#>LsFi(l_~Cc*;j^FU>u4z=$*pqNeKyPz03^KBr8 zN4$c4(vXo0UC6nN;)_X9NfQ6`enj$Ryn!|O9zyD@OFEPsX&^ZH{YclKJ+L(Wp{UOt zhp1MTu`}}XD-{{yK2FCPdP`;^mAyCJQkrz>@Yya=kvHz>vl!4~?I1D+1#L)|*S^fd zm#LgQ%7jNJ5g9CH46!$n8GbHe+R6;>GYAab#j4h2hno_i}Z&0Cc3ASvV+wEOfMv%f*ALJX+HKFbXdBLIFx zldt~pcY#rZ1LoH7)Hbtixmohr&4n+GD*PC&lVS?)q$)Y87~^$4Bhj{G%K(Wjrt~EG z8K2xJ#-MIjMm{7-iB;a()#&^CX*xjKLL==W0<2AM?5noi*F$Zeizz)L6lHd$bu|!e zF$?4lAJP>`v>=3_(oblZLPl0OyK2EMZ{PVv*nxLX1%$;{%~`@K~s5}wQjbvG|Q3OyUj zUi`3oHVk*@#7m~wNq<}GaJR&M^Tz!V{@xr)?4>i2(32P`Nqim(uWkGQ)OGxBONw2) zaMh2osVbEYU4-HE=o2{L`_mWWKE1?z7G`QLKm#BUH~>WGt1&0+lhV}JmlY(R@8$1< z@+hzg;KLB#yCh>u9G>L5^{PmhYZ^ttvTJ>iQ57|7} z3qXsxV?&$k0e+j+xoevrb^SI^J%8Q{A%?ejZYKy^t^V*c+Gt$wusGuD)Y-Yl(jWh! z(V@1M)DD@{t6%Wnd;sLD(}=iCgoI3}s(yDqusFZy-z)AEhvjqy|9QH!+8bFMO}Aee z6ns9V?hSRfm{=J;HEs@0_1mWnJ}oGO*8ILH?ET@IX|`Y~aI%&hD0;h^-j_K@m&LM^>l-$p7;I;R3}7kPRCN?3{1ye$AfoAC_DO~+$FewINV zXRFz=)u@4U$6Y7LQc1+!N>6X0JNttUp14$x>!xU(p;Aqj`erksew{N2^XS0?Auz@sBOD?J-%V_45v3PKl3d1gk2TIS0A7u1+sIbS zcKDWd`p~RE1jIUr1D)@W)=mPr3>j_*(B-V~o(^Ym(?4MRW1Bv!Qn^=hgDDzxTIh z2ixadnuiyuqj732IHH0Xsh`5JIJ$j0J4$6rMkP#m=O@h6RM8Gw9kw0nZKjuG=h%+k zY=?O6cfypgW?q&M!(;`PaJeWG3{Qk@#v!D10@f`7m+J8S#P5a2())u2?MX*H(P?I8 zvWAeHxuTHl(u;v&%Dv%jmRig4KW7seN2FimIwO)kj>_e!`E5oB-eg=a=c*(Ne@1rP z&p&ZJ6zp))s(Xuj&a(_%>~Ow&o^^=RRM7_=!F*Xd9@etHI?UtUx~zgtu$Oe!v$!dt zE_F*gmAQ;}*nZ;oXBuPKj^9$!YuS??eMS-e&*t8EZn&B%HPC^lhpRt8nLI&X3P}w& z=x4-4VL8jS^upb4KQ+v})8oEq#B&(ToLa`w?<&ru+~HDNza&)`+T*Tg0UIZTSWb-Yp2KmQ|O))gDr}J z_>@VbQCnKgn0m*3$dgAwP&#+#L+?IyoRx-jn7Xs|_#*xT$SL~88krL_|esF;qzMG>7b#-&OIXoW-m)qV zSHrymBCTz$V#ukM=ALR)57Tkf?{J6r6*aE$&ZQoGbLM905OcBALt2XE$LPkHVLMS6 z=r-%HobU5YIYAd_3mG#wYUcfQ=>0Ccbl%X_%G8ZUi`ge3pq9f9$S2R5g_`?xYE{2K z9%S<;V|VyzdM;>=!3t#aLLJ@Q<~#jsib3oM!{!;TUo3ByiRQG(FwmsQbL$XW8F#^LKO2pX^2wS{U7D=hULpwbx?&@~jn+YERAHbTy^*K9?f(JuP*H zLj@l67p6GH_Oq6#d!3zSX__<~?xhIbVT!#!}Cz3vJ&dmfAg*tApG9!csnEQ8>yNV?(($v>R#!Y^Ay>L1vHAJn zjITMA3@g^qT)A1P}8y2m>3qa#n}BSz@7Vpn=0KhNSW2@R7(GH^1(mP=?_jbE`57@lz zXs_0(=4;Mc-qFC@pmXHqt9y?aIMKJcwj(RJv7OLvv$~@}BXQagDU$+dU(7jV{#_!^ zl632KIf8SvnyIQHZo@Wb()5NWK&U?A7H=Od)^W}R8vwP;x_ab8u$@e|V)*#3$A9~weY=uW9_F-&u&olZH6r$Zik8#_yO%l&=h8OK$f z&*{QJzdttH&;NvlJIzhf=H@o!Zp)jfid~^8q>k9l*SHv-#pA~y3cGiqQXK)GzcYLl zR0!hM3S7==FJqt`2r=ED0Qdot4N=E?%iRz+g!EatoF}|_66s!mrzu13^^#9}Lju-m zys}NdGbiCZ^}b6->M?waLH8Bq*Hh;M&$VWC%3zoHMK}HBh^wgvGeNL=cRzA(zl9ko zOfhS~-Q5qGMKq!63B8bvIjsuwQW~8%)Hv1cvYzbiErw)09OOQ4m=@kvKmU%{4CS!4 z5IYw8708k;!ED0F!u=aXTF1H|OCDV=A|0nTC0$O4X)_^45}2!)CDMg${*|-fRY&9f zYnaILapmA113i5T|9-mGpDMygmrS=;v=$lM zwFxN>nU_0u?W774b$~;gTu6R5%P!vhnomU+ibuUZ1`@H&#d^+tbhe^CpuD`8)e;^i z!$x)kZfr7`2%R4hBx&od43voi8*Q*-B1yUvtfRw@kCze2t0-{5T+m_O{WO z`PL!Ie6$;HeKBm*$(Yp|ei#MW25Xyvs3dZaU)NUm(h6;YmM;{%ezwwXscVv zl~IMtAAs*ekEuOg>dHu}M>{mVOv8_YUnR&{Xcgg?y@#LJ;0W##B5EPama%fgyg;0V zA52timY7J>#e||0N5~~f=TX3SH~cFfRnTSO`;LMdR&8{+s_=^UnI0H#S>e1FByyN< z9pI-Hvu-+E^!Rd;;W0f0iu9T|@RK77mk}o90yt00;KfVi3iy`fb+1};xa1220yfN8 kV1y(;-0=Tor+z&nI^E-A=6}HXdz+{vr!M Date: Fri, 20 Jan 2023 22:14:48 -0800 Subject: [PATCH 2/3] removed tod in README --- README.Rmd | 2 -- README.md | 3 --- 2 files changed, 5 deletions(-) diff --git a/README.Rmd b/README.Rmd index fea192a..05fcfbe 100644 --- a/README.Rmd +++ b/README.Rmd @@ -11,8 +11,6 @@ knitr::opts_chunk$set( ) ``` -\*\* note this is a initial version of the README file which will be updated for the future milestones\*\* - # sanityzeR diff --git a/README.md b/README.md index 28fb6ae..8a96306 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,4 @@ -\*\* note this is a initial version of the README file which will be -updated for the future milestones\*\* - # sanityzeR From 27b7ccd569f69ad2edd86499597e8858548c80f3 Mon Sep 17 00:00:00 2001 From: Tony Zoght Date: Fri, 20 Jan 2023 22:16:55 -0800 Subject: [PATCH 3/3] added usage in README --- README.Rmd | 9 +++++++-- README.md | 24 ++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/README.Rmd b/README.Rmd index 05fcfbe..706794c 100644 --- a/README.Rmd +++ b/README.Rmd @@ -44,11 +44,16 @@ devtools::install_github("UBC-MDS/sanityzeR") ## Example -This is a basic example which shows you how to solve a common problem: **edit this** +This is a basic example which shows you how to solve a common problem: ```{r example} library(sanityzeR) -## basic example code +df <- data.frame() +spotters <- list() +spotter_1 <- list(redact_email,TRUE,0) +spotters <- append(spotters,spotter_1) + +df_cleaned <- clean_data_frame(df, spotters) ``` ## Features and Usage diff --git a/README.md b/README.md index 8a96306..fe0d63b 100644 --- a/README.md +++ b/README.md @@ -47,11 +47,31 @@ devtools::install_github("UBC-MDS/sanityzeR") ## Example This is a basic example which shows you how to solve a common problem: -**edit this** ``` r library(sanityzeR) -## basic example code +df <- data.frame() +spotters <- list() +spotter_1 <- list(redact_email,TRUE,0) +spotters <- append(spotters,spotter_1) + +df_cleaned <- clean_data_frame(df, spotters) +#> data frame with 0 columns and 0 rows +#> [[1]] +#> function (string, hash_spotted = FALSE, replace_with = "EMAILADDRS") +#> { +#> print(string) +#> print(hash_spotted) +#> print(replace_with) +#> } +#> +#> +#> +#> [[2]] +#> [1] TRUE +#> +#> [[3]] +#> [1] 0 ``` ## Features and Usage