diff --git a/.gitignore b/.gitignore index 5694e2c2..9cf956ed 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,10 @@ *.bak *.log .pyc -pypanda/*.pyc -pypuma/*.pyc -pypanda/output* -pypuma/output* +netZooPy/netZooPy/pypanda/*.pyc +netZooPy/netZooPy/pypuma/*.pyc +netZooPy/netZooPy/pypanda/output* +netZooPy/netZooPy/pypuma/output* dist/ *.egg-info/ .DS_Store @@ -20,10 +20,10 @@ pypuma.egg* test_*.txt pypandaenv/ .idea/ -pypuma/__pycache__ -pypuma/__pycache__/* +netZooPy/netZooPy/pypuma/__pycache__ +netZooPy/netZooPy/pypuma/__pycache__/* top*genes.png Toy_Panda.pairs.txt Toy_Puma.pairs.txt test.py -files.txt \ No newline at end of file +files.txt diff --git a/UserGuide.md b/UserGuide.md index c02594a6..fac8166d 100644 --- a/UserGuide.md +++ b/UserGuide.md @@ -1,7 +1,9 @@ ## Description -Forked from [https://github.com/QuackenbushLab/pypanda](https://github.com/QuackenbushLab/pypanda), -which was based on [https://github.com/davidvi/pypanda](https://github.com/davidvi/pypanda). -Compared to QuackenbushLab/pypanda this repository adds the Python implementation of PUMA ([run_puma.py](run_puma.py) and [pypanda/puma.py](pypanda/puma.py)). +This repo is based on the following repos: +- [https://github.com/aless80/pypanda](https://github.com/aless80/pypanda), +- [https://github.com/QuackenbushLab/pypanda](https://github.com/QuackenbushLab/pypanda), +- which was based on [https://github.com/davidvi/pypanda](https://github.com/davidvi/pypanda). +- Compared to QuackenbushLab/pypanda this repository adds the Python implementation of PUMA ([run_puma.py](netZooPy/netZooPy/pypuma/run_puma.py) and [netZooPy/netZooPy/pypuma/puma.py](pypanda/puma.py)). NaN values in normalized matrices are replaced with values normalized by the overall z-score. This allows running the Toy Data provided in this repository. ## Table of Contents @@ -25,7 +27,8 @@ C and MATLAB code: [https://github.com/mararie/PUMA](https://github.com/mararie/ _Glass K, Huttenhower C, Quackenbush J, Yuan GC. Passing Messages Between Biological Networks to Refine Predicted Interactions, PLoS One, 2013 May 31;8(5):e64832_ Original PANDA C++ code: [http://sourceforge.net/projects/panda-net/](http://sourceforge.net/projects/panda-net/). -* **[LIONESS](https://arxiv.org/abs/1505.06440)** (Linear Interpolation to Obtain Network Estimates for Single Samples) +* **[LIONESS](https://arxiv.org/abs/1505.06440)** (Linear Interpolation to Obtain Network Estimates for Single Samples) +* **[LIONESSR](https://doi.org/10.1016/j.isci.2019.03.02)** The R version _Marieke Lydia Kuijjer, Matthew Tung,GuoCheng Yuan,John Quackenbush, Kimberly Glass. Estimating sample-specific regulatory networks_ LIONESS can be used to estimate single-sample networks using aggregate networks made with any network reconstruction algorithm (http://arxiv.org/pdf/1505.06440.pdf). @@ -61,22 +64,22 @@ Hamming distance is calculated every iteration. ## Installation -PyPanda runs on Python 2.7. You can either run the pypanda script directly (see [Usage](#usage)) or install it on your system. We recommend the following commands to install pypandas on UNIX systems: +PyPanda runs on Python 3. You can either run the pypanda script directly (see [Usage](#usage)) or install it on your system. We recommend the following commands to install pypandas on UNIX systems: #### Using a virtual environment Using [python virtual environments](http://docs.python-guide.org/en/latest/dev/virtualenvs/) is the cleanest installation method. Cloning git and setting up a [python virtual environment](http://docs.python-guide.org/en/latest/dev/virtualenvs/): ```no-highlight pip install --user pipenv #Make sure you have pipenv -git clone https://github.com/aless80/pypanda.git -cd pypanda +git clone https://github.com/netZoo/netZooPy.git +cd netZooPy ``` Creating a virtual environment and installing pypanda: ```no-highlight virtualenv pypandaenv #virtual environment created in a folder inside the git folder source pypandaenv/bin/activate -(pypandaenv)$ pip install -r requirements.txt -(pypandaenv)$ python setup.py install --record files.txt +(pypandaenv)$ pip3 install -r requirements.txt +(pypandaenv)$ python3 setup.py install --record files.txt ``` Uninstall pypanda from virtual environment: ```no-highlight @@ -91,9 +94,9 @@ rm -rf pypandaenv #### Using pip Never use ~~sudo pip~~. Instead you can use pip on the user's install directory: ```no-highlight -git clone https://github.com/aless80/pypanda.git +git clone https://github.com/netZooPy/netZooPy.git cd pypanda -python setup.py install --user +python3 setup.py install --user #to run from the command line you will need to make pypanda executable and add the bin directory to your PATH: cd bin chmod +x pypanda @@ -104,11 +107,11 @@ source ~/.bashrc ```no-highlight pip uninstall pypanda ``` -To run pypanda from Windows (tested on Windows 10) install Git (https://git-scm.com/downloads) and Anaconda Python2.7 (https://www.continuum.io/downloads) and from the Anaconda prompt run: +To run pypanda from Windows (tested on Windows 10) install Git (https://git-scm.com/downloads) and Anaconda Python3 (https://www.continuum.io/downloads) and from the Anaconda prompt run: ```no-highlight -git clone https://github.com/aless80/pypanda.git -cd pypanda -python setup.py install +git clone https://github.com/netZooPy/netZooPy.git +cd netZooPy +python3 setup.py install ``` ## Usage @@ -130,18 +133,18 @@ python run_panda.py -e ./ToyData/ToyExpressionData.txt -m ./ToyData/ToyMotifData ``` To reconstruct a single sample Lioness Pearson correlation network (this can take some time): ```python -python run_panda.py -e ./ToyData/ToyExpressionData.txt -m ./ToyData/ToyMotifData.txt -p ./ToyData/ToyPPIData.txt -o output_panda.txt -q output_lioness.txt +python3 run_panda.py -e ../../tests/ToyData/ToyExpressionData.txt -m ../../tests/ToyData/ToyMotifData.txt -p ../../tests/ToyData/ToyPPIData.txt -o output_panda.txt -q output_lioness.txt ``` To run pypuma on toy data: ```python -python run_puma.py -e ./ToyData/ToyExpressionData.txt -m ./ToyData/ToyMotifData.txt -p ./ToyData/ToyPPIData.txt -o output_puma.txt -i ./ToyData/ToyMiRList.txt +python3 run_puma.py -e ../../tests/ToyData/ToyExpressionData.txt -m ../../tests/ToyData/ToyMotifData.txt -p ../../tests/ToyData/ToyPPIData.txt -o output_puma.txt -i ../../tests//ToyData/ToyMiRList.txt ``` To reconstruct a single sample Lioness Pearson correlation network using pypuma (this can take some time): ```python -python run_puma.py -e ./ToyData/ToyExpressionData.txt -m ./ToyData/ToyMotifData.txt -p ./ToyData/ToyPPIData.txt -i ToyData/ToyMiRList.txt -o output_puma.txt -q output_lioness.txt +python3 run_puma.py -e ../../tests/ToyData/ToyExpressionData.txt -m ../../tests/ToyData/ToyMotifData.txt -p ../../tests/ToyData/ToyPPIData.txt -i ../../tests/ToyData/ToyMiRList.txt -o output_puma.txt -q output_lioness.txt ``` -For pypuma see also [PyPuma](https://github.com/aless80/pypuma#installation). +For pypuma see also [PyPuma](https://github.com/netZooPy/netZooPy/pypuma#installation). #### Run from python Fire up your python shell or ipython notebook. Use the python installation in the virtual environment if you installed pypanda there. @@ -154,7 +157,7 @@ from pypanda.lioness import Lioness ``` Run the Panda algorithm, leave out motif and PPI data to use Pearson correlation network: ```python -panda_obj = Panda('ToyData/ToyExpressionData.txt', 'ToyData/ToyMotifData.txt', 'ToyData/ToyPPIData.txt', remove_missing=False) +panda_obj = Panda('../../tests/ToyData/ToyExpressionData.txt', '../../tests/ToyData/ToyMotifData.txt', '../../tests/ToyData/ToyPPIData.txt', remove_missing=False) ``` Save the results: ```python @@ -172,7 +175,7 @@ outdegree = panda_obj.return_panda_outdegree() ``` To run the Lioness algorithm for single sample networks, first run panda (or puma) using the keep_expression_matrix flag, then use Lioness as follows: ```python -panda_obj = Panda('ToyData/ToyExpressionData.txt', 'ToyData/ToyMotifData.txt', 'ToyData/ToyPPIData.txt', remove_missing=False, keep_expression_matrix=True) +panda_obj = Panda('../../tests/ToyData/ToyExpressionData.txt', '../../tests/ToyData/ToyMotifData.txt', '../../tests/ToyData/ToyPPIData.txt', remove_missing=False, keep_expression_matrix=True) lioness_obj = Lioness(panda_obj) ``` Save Lioness results: @@ -187,7 +190,7 @@ plot.top_network_plot(column= 0, top=100, file='top_100_genes.png') Run the Puma algorithm, leave out motif and PPI data to use Pearson correlation network: ```python -puma_obj = Puma('ToyData/ToyExpressionData.txt', 'ToyData/ToyMotifData.txt', 'ToyData/ToyPPIData.txt','ToyData/ToyMiRList.txt') +puma_obj = Puma('../../tests/ToyData/ToyExpressionData.txt', '../../tests/ToyData/ToyMotifData.txt', '../../tests/ToyData/ToyPPIData.txt','../../tests/ToyData/ToyMiRList.txt') ``` ## Toy data @@ -216,4 +219,4 @@ Sample1 Sample2 Sample3 Sample4 -0.117475863987 0.494923925853 0.0518448588965 -0.0584810456421 TF, Gene and Motif order is identical to the panda output file. -``` \ No newline at end of file +``` diff --git a/netZooPy/pypanda/__pycache__/__init__.cpython-36.pyc b/netZooPy/pypanda/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 5c57022b..00000000 Binary files a/netZooPy/pypanda/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/netZooPy/pypanda/__pycache__/lioness.cpython-36.pyc b/netZooPy/pypanda/__pycache__/lioness.cpython-36.pyc deleted file mode 100644 index 2fdbd03e..00000000 Binary files a/netZooPy/pypanda/__pycache__/lioness.cpython-36.pyc and /dev/null differ diff --git a/netZooPy/pypanda/__pycache__/panda.cpython-36.pyc b/netZooPy/pypanda/__pycache__/panda.cpython-36.pyc deleted file mode 100644 index 77460b73..00000000 Binary files a/netZooPy/pypanda/__pycache__/panda.cpython-36.pyc and /dev/null differ diff --git a/netZooPy/pypanda/__pycache__/timer.cpython-36.pyc b/netZooPy/pypanda/__pycache__/timer.cpython-36.pyc deleted file mode 100644 index 31922f19..00000000 Binary files a/netZooPy/pypanda/__pycache__/timer.cpython-36.pyc and /dev/null differ diff --git a/netZooPy/pypanda/lioness.py b/netZooPy/pypanda/lioness.py index ee74db44..9bb2f05b 100755 --- a/netZooPy/pypanda/lioness.py +++ b/netZooPy/pypanda/lioness.py @@ -2,8 +2,8 @@ import os, os.path import numpy as np -from .panda import Panda -from .timer import Timer +from panda import Panda +from timer import Timer class Lioness(Panda): diff --git a/netZooPy/pypanda/panda.py b/netZooPy/pypanda/panda.py index 85415927..d024ca1f 100755 --- a/netZooPy/pypanda/panda.py +++ b/netZooPy/pypanda/panda.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np from scipy.stats import zscore -from .timer import Timer +from timer import Timer class Panda(object): """ Using PANDA to infer gene regulatory network. @@ -327,7 +327,7 @@ def split_label(label): plt.savefig(file, dpi=300) return None - def return_panda_indegree(self): + def return_panda_indegree(self): '''Return Panda indegree.''' #subset_indegree = self.export_panda_results.loc[:,['gene','force']] export_panda_results_pd = pd.DataFrame(self.export_panda_results,columns=['tf','gene','motif','force']) @@ -335,6 +335,7 @@ def return_panda_indegree(self): subset_indegree['force']=pd.to_numeric(subset_indegree.force) self.panda_indegree = subset_indegree.groupby('gene').sum() return self.panda_indegree + def return_panda_outdegree(self): '''Return Panda outdegree.''' export_panda_results_pd = pd.DataFrame(self.export_panda_results,columns=['tf','gene','motif','force']) diff --git a/netZooPy/pypanda/run_lioness.py b/netZooPy/pypanda/run_lioness.py index 2399394a..c9fe7bca 100755 --- a/netZooPy/pypanda/run_lioness.py +++ b/netZooPy/pypanda/run_lioness.py @@ -16,7 +16,7 @@ """ import sys import getopt -import pypanda +from lioness import Lioness def main(argv): #Create variables @@ -76,7 +76,7 @@ def main(argv): # Run panda print('Start LIONESS run ...') - L = pypanda.Lioness(expression_data, motif, ppi, panda_net, start=start, end=end, save_dir=save_dir, save_fmt=save_fmt) + L = Lioness(expression_data, motif, ppi, panda_net, start=start, end=end, save_dir=save_dir, save_fmt=save_fmt) print('All done!') if __name__ == '__main__': diff --git a/netZooPy/pypanda/run_panda.py b/netZooPy/pypanda/run_panda.py index 9faf236f..834741a4 100755 --- a/netZooPy/pypanda/run_panda.py +++ b/netZooPy/pypanda/run_panda.py @@ -14,7 +14,7 @@ """ import sys import getopt -import pypanda +from panda import Panda def main(argv): #Create variables @@ -58,7 +58,7 @@ def main(argv): # Run PANDA print('Start Panda run ...') - panda_obj = pypanda.Panda(expression_data, motif, ppi, save_tmp=True, remove_missing=rm_missing, keep_expression_matrix=bool(lioness_file)) + panda_obj = Panda(expression_data, motif, ppi, save_tmp=True, remove_missing=rm_missing, keep_expression_matrix=bool(lioness_file)) #panda_obj = pypanda.Panda(expression_data, motif, None, save_tmp=True, remove_missing=rm_missing) #panda_obj = pypanda.Panda(None, motif, ppi, save_tmp=True, remove_missing=rm_missing) #panda_obj = pypanda.Panda(None, motif, None, save_tmp=True, remove_missing=rm_missing) @@ -69,7 +69,7 @@ def main(argv): #outdegree = panda_obj.return_panda_outdegree() if lioness_file: - from pypanda.lioness import Lioness + from lioness import Lioness lioness_obj = Lioness(panda_obj) lioness_obj.save_lioness_results(lioness_file) print('All done!') diff --git a/netZooPy/pypuma/lioness_for_puma.py b/netZooPy/pypuma/lioness_for_puma.py index 64f67d91..9bfe9604 100755 --- a/netZooPy/pypuma/lioness_for_puma.py +++ b/netZooPy/pypuma/lioness_for_puma.py @@ -2,8 +2,8 @@ import os, os.path import numpy as np -from .puma import Puma -from .timer import Timer +from puma import Puma +from timer import Timer class Lioness(Puma): """Using LIONESS to infer single-sample gene regulatory networks. @@ -86,4 +86,4 @@ def save_lioness_results(self, file='lioness.txt'): '''Write lioness results to file.''' #self.lioness_network.to_csv(file, index=False, header=False, sep="\t") np.savetxt(file, self.lioness_network, delimiter="\t",header="") - return None \ No newline at end of file + return None diff --git a/netZooPy/pypuma/puma.py b/netZooPy/pypuma/puma.py index 57446bb9..7a05732d 100755 --- a/netZooPy/pypuma/puma.py +++ b/netZooPy/pypuma/puma.py @@ -5,7 +5,7 @@ import pandas as pd import numpy as np from scipy.stats import zscore -from .timer import Timer +from timer import Timer class Puma(object): """ Using PUMA to infer gene regulatory network. diff --git a/netZooPy/pypuma/run_puma.py b/netZooPy/pypuma/run_puma.py index a96515df..70f57ef6 100755 --- a/netZooPy/pypuma/run_puma.py +++ b/netZooPy/pypuma/run_puma.py @@ -16,7 +16,7 @@ """ import sys import getopt -import pypuma +from puma import Puma def main(argv): #Create variables @@ -64,14 +64,14 @@ def main(argv): # Run PUMA print('Start Puma run ...') - puma_obj = pypuma.Puma(expression_data, motif, ppi, miR, save_tmp=True, remove_missing=rm_missing, keep_expression_matrix=bool(lioness_file)) + puma_obj = Puma(expression_data, motif, ppi, miR, save_tmp=True, remove_missing=rm_missing, keep_expression_matrix=bool(lioness_file)) puma_obj.save_puma_results(output_file) #puma_obj.top_network_plot(top=100, file='puma_top100genes.png') #indegree = puma_obj.return_panda_indegree() #outdegree = puma_obj.return_panda_outdegree() if lioness_file: - from pypuma.lioness_for_puma import Lioness + from lioness_for_puma import Lioness lioness_obj = Lioness(puma_obj) lioness_obj.save_lioness_results(lioness_file) print('All done!')