From 1973553c447c8dadb7d921a3ed7d7f17256dfbf2 Mon Sep 17 00:00:00 2001 From: Sartaj Bhuvaji Date: Sun, 4 Feb 2024 09:08:15 -0800 Subject: [PATCH 1/2] Added Pypi readme and updated doc --- PYPI_README.md | 39 +++++++++++++++++++++++++++++++++++++++ README.md | 12 ++++++++---- setup.py | 8 ++++---- 3 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 PYPI_README.md diff --git a/PYPI_README.md b/PYPI_README.md new file mode 100644 index 0000000..8e14441 --- /dev/null +++ b/PYPI_README.md @@ -0,0 +1,39 @@ + + [![Build](https://github.com/SartajBhuvaji/SDGnE/actions/workflows/main.yaml/badge.svg)](https://github.com/SartajBhuvaji/SDGnE/actions/workflows/main.yaml) + + +## About +- SDGnE (Synthetic Data Generation and Evaluation) is a Python package designed to generate synthetic data and evaluate its quality using neural network models. +- This tool is intended for developers and researchers who require synthetic datasets for testing and development. +- The current dittto version `v0.2.0` uses Autoencoders and SMOTE to generate synthetic data. + +## Getting Started +`pip install sdgne` + + ## Notebooks + To get started, we have created notebook for the Autoencoder and SMOTE algorithm. + + ### Auto Encoder + Autoencoders are a class of neural networks designed for unsupervised learning and representing features in a smaller space. They consist of an encoder and a decoder, intending to learn the input data's compressed representation (encoding). We leverage this architecture to generate synthetic data. + + [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SartajBhuvaji/SDGnE/blob/main/notebooks/SDGnE_Autoencoder_Notebook.ipynb) + + ### SMOTE + SMOTE, abbreviated as Synthetic Minority Oversampling Technique, is used to generate synthetic data from the original dataset. Over the years, several variants of SMOTE have been developed, each tailored to specific scenarios and requirements. These variants employ distinct methodologies and innovations to enhance the generation of synthetic data, thereby improving model performance by ensuring a more balanced distribution of classes. We provide a few SMOTE variants for synthetic data generation. + + [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SartajBhuvaji/SDGnE/blob/main/notebooks/SDGnE_SMOTE_Notebook.ipynb) + + ### Comparison + In this notebook, we will compare the `Single Encoder Autoencoder` and the `SMOTE Algorithm` for synthetic data generation. We will generate synthetic data using both the algorithms and perform statistical evaluation. + + [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SartajBhuvaji/SDGnE/blob/main/notebooks/SDGnE_Comparison_Notebook.ipynb) + + ## Features + +- **Data Generation**: Create synthetic datasets that mimic the statistical properties of real-world data. +- **Neural Autoencoders**: Utilize various autoencoder architectures to learn data representations. +- **Evaluation Metrics**: Assess the quality of synthetic data using built-in evaluation metrics. +- **Extensibility**: Easily extend the package with custom data generators and evaluators. + + ## Links + - **Documentation**: https://seattle-university.gitbook.io/sdgne/ \ No newline at end of file diff --git a/README.md b/README.md index 1a3c51b..d34a93f 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ [![Build](https://github.com/SartajBhuvaji/SDGnE/actions/workflows/main.yaml/badge.svg)](https://github.com/SartajBhuvaji/SDGnE/actions/workflows/main.yaml) + ![PyPI](https://img.shields.io/pypi/v/sdgne?label=sdgne) + ## About - SDGnE (Synthetic Data Generation and Evaluation) is a Python package designed to generate synthetic data and evaluate its quality using neural network models. @@ -39,17 +41,17 @@ ### Auto Encoder Autoencoders are a class of neural networks designed for unsupervised learning and representing features in a smaller space. They consist of an encoder and a decoder, intending to learn the input data's compressed representation (encoding). We leverage this architecture to generate synthetic data. - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tfihhB8TeC47EYbNgB-uU-3p2KJLmkmC?usp=sharing) + [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SartajBhuvaji/SDGnE/blob/main/notebooks/SDGnE_Autoencoder_Notebook.ipynb) ### SMOTE SMOTE, abbreviated as Synthetic Minority Oversampling Technique, is used to generate synthetic data from the original dataset. Over the years, several variants of SMOTE have been developed, each tailored to specific scenarios and requirements. These variants employ distinct methodologies and innovations to enhance the generation of synthetic data, thereby improving model performance by ensuring a more balanced distribution of classes. We provide a few SMOTE variants for synthetic data generation. - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/18kAMJR0VBtfC3swIkbuTPWd0Sb0mDVCo?usp=sharing) + [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SartajBhuvaji/SDGnE/blob/main/notebooks/SDGnE_SMOTE_Notebook.ipynb) ### Comparison In this notebook, we will compare the `Single Encoder Autoencoder` and the `SMOTE Algorithm` for synthetic data generation. We will generate synthetic data using both the algorithms and perform statistical evaluation. - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1VAM6hGRPvoLfS8x_WYDbexAc7V4I00_h?usp=sharing) + [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/SartajBhuvaji/SDGnE/blob/main/notebooks/SDGnE_Comparison_Notebook.ipynb) ## Features @@ -58,4 +60,6 @@ - **Evaluation Metrics**: Assess the quality of synthetic data using built-in evaluation metrics. - **Extensibility**: Easily extend the package with custom data generators and evaluators. - + ## Links + - **Documentation**: https://seattle-university.gitbook.io/sdgne/ + - **PyPI**: \ No newline at end of file diff --git a/setup.py b/setup.py index c1bba4f..86b3474 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,12 @@ here = os.path.abspath(os.path.dirname(__file__)) -with codecs.open(os.path.join(here, "README.md"), encoding="utf-8") as fh: +with codecs.open(os.path.join(here, "PYPI_README.md"), encoding="utf-8") as fh: long_description = "\n" + fh.read() -VERSION = '0.2.0' +VERSION = '0.2.2' DESCRIPTION = 'Generates synthetic data' -LONG_DESCRIPTION = 'Generates synthetic data' +LONG_DESCRIPTION = long_description URL = "https://github.com/SartajBhuvaji" # Setting up @@ -33,7 +33,7 @@ classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", - "Programming Language :: Python :: 3.7, 3.8, 3.9, 3.10, 3.11", + "Programming Language :: Python :: 3.7", "Operating System :: Unix", "Operating System :: MacOS :: MacOS X", "Operating System :: Microsoft :: Windows", From 2edb1f55bd3bc89c15f7c95afc5b0fb648139d93 Mon Sep 17 00:00:00 2001 From: Sartaj Bhuvaji Date: Sun, 4 Feb 2024 09:15:53 -0800 Subject: [PATCH 2/2] Updated workflow --- .github/workflows/main.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 83c8119..69d94b0 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -95,4 +95,5 @@ jobs: uses: pypa/gh-action-pypi-publish@master with: user: __token__ - password: ${{ secrets.pypi_password }} \ No newline at end of file + password: ${{ secrets.pypi_password }} + repository_url: https://upload.pypi.org/legacy/ \ No newline at end of file