Skip to content

Commit

Permalink
Develop (#25)
Browse files Browse the repository at this point in the history
* Visualization (#7) squash commit

* issue not yet resolved, but moving to better test env

* broken visualizer

* time series output present

* Histogram resolved

* sub-class bokeh_visualizer created, configured through property, ipy notebook test runner created

'

* Datagatherer (#22)

* #15 inserted real code test

* sqllite3 added

* #15 datagathererinput

* #15 datagathererinput

* #18 test updated

* #21, will revert post commit

* reverted

* reverted

* #18 unit tested

* #18 fixed failing test_read_sql_from_empty_table

* #18 fixed test_read_sql_from_populated_table

* #18 fixed test_read_sql_from_populated_table
  • Loading branch information
ZNevzz authored Jul 20, 2020
1 parent d8e9ceb commit 4a95115
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 51 deletions.
164 changes: 131 additions & 33 deletions octopy_predictor/src/datagatherer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,82 @@
import pandas as pd
from io import StringIO
from collections import namedtuple
import sqlite3


# TODO
#from util import logit
#import util
# from util import logit
# import util


# CONSTANTS

FILE = 'FILE'
SQL = 'SQL'
FILE_PATH = 'FILEPATH'
CONNECTION = 'CONN'
QUERY = 'SQL'
QUERY_PARAMERTERS = 'SQLPARAMS'


class DataGathererInput(object):

"""
DataGathererInput
Usage
----------
DataGatherer
"""
FILE_CONSTRAINTS = [FILE_PATH]
SQL_CONSTRAINTS = [CONNECTION, QUERY, QUERY_PARAMERTERS]

CONSTRAINTS = {
FILE: FILE_CONSTRAINTS,
SQL: SQL_CONSTRAINTS
}

def __init__(self, type: str):
"""
Parameters
----------
type : str
TYPE of DataGatherer.
Returns
-------
None.
"""
if type not in DataGathererInput.CONSTRAINTS.keys():
pass
# TODO Throw error
self.type = type
self.values = {}

def add(self, key: str, value):
"""
Parameters
----------
key : str
valid keys present in CONSTRAINTS _values.
value : any
value corresponding to key.
Returns
-------
None.
"""

if key in DataGathererInput.CONSTRAINTS[self.type]:
self.values[key] = value


'''
CONSTANTS
Expand Down Expand Up @@ -49,80 +121,84 @@ class DataGatherer(object):
"""docstring for DataGatherer
DataGatherer is responsible to fetch data from multiple sources
and convert it to a specific type using provided Adapters
The defaul Adapter is DataFrame
The default Adapter is DataFrame
"""
def __init__(self, arg = None):

def __init__(self, arg=None):

super(DataGatherer, self).__init__()
self.arg = arg

#@logit
# @logit
@staticmethod
def _read_from_file(file):
_file_content = None
try:
_file_content = file.read()
#util.debug_store['file_content at datagatherer'] = _file_content
# util.debug_store['file_content'] = _file_content
except IOError as io_error:
#util.debug_store['io_error at datagatherer'] = io_error.__traceback__
# util.debug_store['io_error'] = io_error.__traceback__
raise io_error
else:
return _file_content
#@logit

# @logit
@staticmethod
def _determine_resource(path):
def determine_resource(path):
resource_type, file_type = None, None

# resource type
resource_type = 'web' if path.startswith('http') else 'local'

# file type
try:
file_extension_index = path.rindex('.')
except ValueError as val_error:
# TODO: message = invalid path
raise val_error
else:
file_type = path[file_extension_index + 1 :]
file_type = path[file_extension_index + 1:]
finally:
FileResource = namedtuple('FileResource', 'resource_type file_type')
return FileResource(resource_type = resource_type, file_type = file_type)


#@logit
FileResource = namedtuple('FileResource',
'resource_type file_type')
return FileResource(resource_type=resource_type,
file_type=file_type)

# @logit
@staticmethod
def _read_from_path(path):
'''
read data from a file available at given path
'''
df = pd.DataFrame()
metadata = _determine_resource(path)
metadata = DataGatherer.determine_resource(path)

if metadata.resource_type == 'local':

if metadata.file_type == 'csv':
df = pd.read_csv(path)

elif metadata.resource_type == 'web':

if metadata.file_type == 'csv':
df = pd.read_csv(path)

return df

#@logit
def read(self, path = None, file = None, sql = None):

# @logit
def read(self, path=None, file=None, sql=None):

'''
read receives either path or file. If received both, file is given priority
read receives either path or file.
If received both, file is given priority
'''
try:
try:
df = None
if path is None:
file_content = self._read_from_file(file)
#util.debug_store['StringIO(file_content) at datagatherer'] = StringIO(file_content)
# util.debug_store['S'] = StringIO(file_content)
df = pd.read_csv(StringIO(file_content))

elif file is None:
df = pd.read_csv(path)
else:
Expand All @@ -134,5 +210,27 @@ def read(self, path = None, file = None, sql = None):
print('Exception occured while loading data')
raise exception
finally:
#util.debug_store['df at datagatherer'] = df.to_json(orient='columns')
# util.debug_store['df'] = df.to_json(orient='columns')
return df

def read_sql(self, gatherer_input: DataGathererInput):
"""
Parameters
----------
input : DataGathererInput
Contains _values required to execute SQL QUERY.
Returns
-------
df : DataFrame
Result of SQL QUERY.
"""
df = pd.DataFrame()
# TODO Move all connections to application start-up
conn = sqlite3.connect(gatherer_input.values[CONNECTION], uri=True)
df = pd.read_sql_query(gatherer_input.values[QUERY], con=conn)
return df
88 changes: 70 additions & 18 deletions octopy_predictor/tests/test_datagatherer.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,102 @@
# -*- coding: utf-8 -*-

import unittest
import sqlite3

import pandas as pd
import numpy as np
from octopy_predictor.src.datagatherer import *

def _create_test_connection():
conn = sqlite3.connect(':memory:')

#TODO create test dataframes and corresponding tables in sqliter3

conn = 'file::memory:?cache=shared'



class DataGathererTest(unittest.TestCase):
"""Test cases for DataGatherer"""

def setUp(self):
self.gatherer = DataGatherer()

def test_determine_resource(self):
self.gatherer = DataGatherer()

def test_read_sql_from_empty_table(self):

"""
TBD
Test read_sql()
given a gatherer_input with SQL gatherer values
and empty in-memory database
when read_sql is called
then dataframe should be returned
"""
self.assertTrue(1==1)
c = sqlite3.connect(conn, uri=True)
c.execute('drop table if exists test')
gatherer_input = DataGathererInput(SQL)
gatherer_input.add(QUERY, "SELECT * FROM sqlite_master")
gatherer_input.add(CONNECTION, conn)
gatherer = DataGatherer()

df = gatherer.read_sql(gatherer_input)

self.assertIsNotNone(df)
self.assertTrue(df.empty)

def test_read_sql_from_populated_table(self):
"""
Test read_sql
given a gatherer_input with SQL gatherer values
and empty in-memory database
when read_sql is called
then dataframe should be returned
"""
#%%
expected_df = pd.DataFrame(np.reshape(np.arange(10), (2,5)))
c = sqlite3.connect(conn, uri=True)
expected_df.to_sql('test', con=c, if_exists='replace', index=False)
#%%

#%%
gatherer_input = DataGathererInput(SQL)
gatherer_input.add(QUERY, "SELECT * FROM test")
gatherer_input.add(CONNECTION, conn)
gatherer = DataGatherer()

df = gatherer.read_sql(gatherer_input)
#%%
self.assertIsNotNone(df)
self.assertFalse(df.empty, "df is empty")
self.assertEqual(expected_df.shape, df.shape)


class DataGathererInputTest(unittest.TestCase):
"""Test cases for DataGathererInput"""
"""Test cases for DataGathererInput"""

def test_SQL_inputs(self):
"""
given: input is SQL
given: input is SQL
when: DataGathererInput is created
then: all parameters required for SQL datagatherer should be available
"""
conn = _create_test_connection()


expected = {
'type': SQL,
CONNECTION: conn
}



input = DataGathererInput(SQL)
input.add(CONNECTION, conn)

self.assertIsNotNone(input.values)
self.assertEqual(input.values[CONNECTION], expected[CONNECTION], "expected does not match actual")

if __name__ == '__main__':


if __name__ == '__main__':
# unittest.main()
suite = unittest.defaultTestLoader.loadTestsFromTestCase(DataGathererTest)
unittest.TextTestRunner().run(suite)

0 comments on commit 4a95115

Please sign in to comment.