From 3b1bbda3b3a3199b987eed84cc34cd710be4dff6 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Wed, 16 Dec 2020 11:08:04 +0500 Subject: [PATCH 01/14] refactor common handler utilities out --- ipinfo/handler.py | 62 +++++++---------------------------- ipinfo/handler_async.py | 64 ++++++++----------------------------- ipinfo/handler_utils.py | 51 +++++++++++++++++++++++++++++ tests/handler_async_test.py | 6 ++-- tests/handler_test.py | 6 ++-- 5 files changed, 84 insertions(+), 105 deletions(-) create mode 100644 ipinfo/handler_utils.py diff --git a/ipinfo/handler.py b/ipinfo/handler.py index 3e7489d..ddd8f6d 100644 --- a/ipinfo/handler.py +++ b/ipinfo/handler.py @@ -12,6 +12,7 @@ from .cache.default import DefaultCache from .details import Details from .exceptions import RequestQuotaExceededError +from . import handler_utils class Handler: @@ -20,10 +21,8 @@ class Handler: Instantiates and maintains access to cache. """ - API_URL = "https://ipinfo.io" CACHE_MAXSIZE = 4096 CACHE_TTL = 60 * 60 * 24 - COUNTRY_FILE_DEFAULT = "countries.json" REQUEST_TIMEOUT_DEFAULT = 2 def __init__(self, access_token=None, **kwargs): @@ -34,7 +33,9 @@ def __init__(self, access_token=None, **kwargs): self.access_token = access_token # load countries file - self.countries = self._read_country_names(kwargs.get("countries_file")) + self.countries = handler_utils.read_country_names( + kwargs.get("countries_file") + ) # setup req opts self.request_options = kwargs.get("request_options", {}) @@ -55,7 +56,7 @@ def __init__(self, access_token=None, **kwargs): def getDetails(self, ip_address=None): """Get details for specified IP address as a Details object.""" raw_details = self._requestDetails(ip_address) - self._format_details(raw_details) + handler_utils.format_details(raw_details, self.countries) return Details(raw_details) def getBatchDetails(self, ip_addresses): @@ -80,8 +81,8 @@ def getBatchDetails(self, ip_addresses): lookup_addresses.append(ip_address) # Do the lookup - url = self.API_URL + "/batch" - headers = self._get_headers() + url = handler_utils.API_URL + "/batch" + headers = handler_utils.get_headers(self.access_token) headers["content-type"] = "application/json" response = requests.post( url, json=lookup_addresses, headers=headers, **self.request_options @@ -101,7 +102,7 @@ def getBatchDetails(self, ip_addresses): # Format every result for detail in result.values(): if isinstance(detail, dict): - self._format_details(detail) + handler_utils.format_details(detail, self.countries) return result @@ -117,12 +118,14 @@ def _requestDetails(self, ip_address=None): ip_address = ip_address.exploded if ip_address not in self.cache: - url = self.API_URL + url = handler_utils.API_URL if ip_address: url += "/" + ip_address response = requests.get( - url, headers=self._get_headers(), **self.request_options + url, + headers=handler_utils.get_headers(self.access_token), + **self.request_options ) if response.status_code == 429: raise RequestQuotaExceededError() @@ -130,44 +133,3 @@ def _requestDetails(self, ip_address=None): self.cache[ip_address] = response.json() return self.cache[ip_address] - - def _get_headers(self): - """Built headers for request to IPinfo API.""" - headers = { - "user-agent": "IPinfoClient/Python{version}/4.0.0".format( - version=sys.version_info[0] - ), - "accept": "application/json", - } - - if self.access_token: - headers["authorization"] = "Bearer {}".format(self.access_token) - - return headers - - def _format_details(self, details): - details["country_name"] = self.countries.get(details.get("country")) - details["latitude"], details["longitude"] = self._read_coords( - details.get("loc") - ) - - def _read_coords(self, location): - lat, lon = None, None - coords = tuple(location.split(",")) if location else "" - if len(coords) == 2 and coords[0] and coords[1]: - lat, lon = coords[0], coords[1] - return lat, lon - - def _read_country_names(self, countries_file=None): - """ - Read list of countries from specified country file or - default file. - """ - if not countries_file: - countries_file = os.path.join( - os.path.dirname(__file__), self.COUNTRY_FILE_DEFAULT - ) - with open(countries_file) as f: - countries_json = f.read() - - return json.loads(countries_json) diff --git a/ipinfo/handler_async.py b/ipinfo/handler_async.py index 29eaed3..c5be14a 100644 --- a/ipinfo/handler_async.py +++ b/ipinfo/handler_async.py @@ -12,6 +12,7 @@ from .cache.default import DefaultCache from .details import Details from .exceptions import RequestQuotaExceededError +from . import handler_utils class AsyncHandler: @@ -20,10 +21,8 @@ class AsyncHandler: Instantiates and maintains access to cache. """ - API_URL = "https://ipinfo.io" CACHE_MAXSIZE = 4096 CACHE_TTL = 60 * 60 * 24 - COUNTRY_FILE_DEFAULT = "countries.json" REQUEST_TIMEOUT_DEFAULT = 2 def __init__(self, access_token=None, **kwargs): @@ -34,7 +33,9 @@ def __init__(self, access_token=None, **kwargs): self.access_token = access_token # load countries file - self.countries = self._read_country_names(kwargs.get("countries_file")) + self.countries = handler_utils.read_country_names( + kwargs.get("countries_file") + ) # setup req opts self.request_options = kwargs.get("request_options", {}) @@ -95,10 +96,10 @@ async def getDetails(self, ip_address=None): return Details(self.cache[ip_address]) # not in cache; do http req - url = self.API_URL + url = handler_utils.API_URL if ip_address: url += "/" + ip_address - headers = self._get_headers() + headers = handler_utils.get_headers(self.access_token) async with self.httpsess.get(url, headers=headers) as resp: if resp.status == 429: raise RequestQuotaExceededError() @@ -106,7 +107,7 @@ async def getDetails(self, ip_address=None): raw_details = await resp.json() # format & cache - self._format_details(raw_details) + handler_utils.format_details(raw_details, self.countries) self.cache[ip_address] = raw_details return Details(raw_details) @@ -139,11 +140,13 @@ async def getBatchDetails(self, ip_addresses): return result # do http req - url = self.API_URL + "/batch" - headers = self._get_headers() + url = handler_utils.API_URL + "/batch" + headers = handler_utils.get_headers(self.access_token) headers["content-type"] = "application/json" async with self.httpsess.post( - url, data=json.dumps(lookup_addresses), headers=headers + url, + data=json.dumps(lookup_addresses), + headers=headers ) as resp: if resp.status == 429: raise RequestQuotaExceededError() @@ -153,7 +156,7 @@ async def getBatchDetails(self, ip_addresses): # format & fill up cache for ip_address, details in json_resp.items(): if isinstance(details, dict): - self._format_details(details) + handler_utils.format_details(details, self.countries) self.cache[ip_address] = details # merge cached results with new lookup @@ -168,44 +171,3 @@ def _ensure_aiohttp_ready(self): timeout = aiohttp.ClientTimeout(total=self.request_options["timeout"]) self.httpsess = aiohttp.ClientSession(timeout=timeout) - - def _get_headers(self): - """Built headers for request to IPinfo API.""" - headers = { - "user-agent": "IPinfoClient/Python{version}/4.0.0".format( - version=sys.version_info[0] - ), - "accept": "application/json", - } - - if self.access_token: - headers["authorization"] = "Bearer {}".format(self.access_token) - - return headers - - def _format_details(self, details): - details["country_name"] = self.countries.get(details.get("country")) - details["latitude"], details["longitude"] = self._read_coords( - details.get("loc") - ) - - def _read_coords(self, location): - lat, lon = None, None - coords = tuple(location.split(",")) if location else "" - if len(coords) == 2 and coords[0] and coords[1]: - lat, lon = coords[0], coords[1] - return lat, lon - - def _read_country_names(self, countries_file=None): - """ - Read list of countries from specified country file or - default file. - """ - if not countries_file: - countries_file = os.path.join( - os.path.dirname(__file__), self.COUNTRY_FILE_DEFAULT - ) - with open(countries_file) as f: - countries_json = f.read() - - return json.loads(countries_json) diff --git a/ipinfo/handler_utils.py b/ipinfo/handler_utils.py new file mode 100644 index 0000000..5c8d75e --- /dev/null +++ b/ipinfo/handler_utils.py @@ -0,0 +1,51 @@ +""" +Utilities used in handlers. +""" + +import json +import os +import sys + +API_URL = "https://ipinfo.io" +COUNTRY_FILE_DEFAULT = "countries.json" + +def get_headers(access_token): + """Build headers for request to IPinfo API.""" + headers = { + "user-agent": "IPinfoClient/Python{version}/4.0.0".format( + version=sys.version_info[0] + ), + "accept": "application/json", + } + + if access_token: + headers["authorization"] = "Bearer {}".format(access_token) + + return headers + +def format_details(details, countries): + details["country_name"] = countries.get(details.get("country")) + details["latitude"], details["longitude"] = read_coords( + details.get("loc") + ) + +def read_coords(location): + lat, lon = None, None + coords = tuple(location.split(",")) if location else "" + if len(coords) == 2 and coords[0] and coords[1]: + lat, lon = coords[0], coords[1] + return lat, lon + +def read_country_names(countries_file=None): + """ + Read list of countries from specified country file or + default file. + """ + if not countries_file: + countries_file = os.path.join( + os.path.dirname(__file__), COUNTRY_FILE_DEFAULT + ) + with open(countries_file) as f: + countries_json = f.read() + + return json.loads(countries_json) diff --git a/tests/handler_async_test.py b/tests/handler_async_test.py index 9f43efd..3b180ac 100644 --- a/tests/handler_async_test.py +++ b/tests/handler_async_test.py @@ -3,6 +3,7 @@ from ipinfo.cache.default import DefaultCache from ipinfo.details import Details from ipinfo.handler_async import AsyncHandler +from ipinfo import handler_utils import pytest @@ -20,7 +21,7 @@ async def test_init(): async def test_headers(): token = "mytesttoken" handler = AsyncHandler(token) - headers = handler._get_headers() + headers = handler_utils.get_headers(token) await handler.deinit() assert "user-agent" in headers @@ -78,7 +79,8 @@ async def test_get_details(n): domains = details.domains assert domains["ip"] == "8.8.8.8" - assert domains["total"] == 12988 + # NOTE: actual number changes too much + assert "total" in domains assert len(domains["domains"]) == 5 await handler.deinit() diff --git a/tests/handler_test.py b/tests/handler_test.py index 9cd7936..5741a5d 100644 --- a/tests/handler_test.py +++ b/tests/handler_test.py @@ -5,6 +5,7 @@ from ipinfo.cache.default import DefaultCache from ipinfo.details import Details from ipinfo.handler import Handler +from ipinfo import handler_utils import pytest @@ -19,7 +20,7 @@ def test_init(): def test_headers(): token = "mytesttoken" handler = Handler(token) - headers = handler._get_headers() + headers = handler_utils.get_headers(token) assert "user-agent" in headers assert "accept" in headers @@ -75,7 +76,8 @@ def test_get_details(n): domains = details.domains assert domains["ip"] == "8.8.8.8" - assert domains["total"] == 12988 + # NOTE: actual number changes too much + assert "total" in domains assert len(domains["domains"]) == 5 From 4e346ea6cd04a0562b9d54759a5fdd3a80fc6ece Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Wed, 16 Dec 2020 11:12:05 +0500 Subject: [PATCH 02/14] comments for util funcs --- ipinfo/handler_utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ipinfo/handler_utils.py b/ipinfo/handler_utils.py index 5c8d75e..42b7045 100644 --- a/ipinfo/handler_utils.py +++ b/ipinfo/handler_utils.py @@ -24,12 +24,23 @@ def get_headers(access_token): return headers def format_details(details, countries): + """ + Format details given a countries object. + + The countries object can be retrieved from read_country_names. + """ details["country_name"] = countries.get(details.get("country")) details["latitude"], details["longitude"] = read_coords( details.get("loc") ) def read_coords(location): + """ + Given a location of the form `,`, returns the latitude and + longitude as a tuple. + + Returns None for each tuple item if the form is invalid. + """ lat, lon = None, None coords = tuple(location.split(",")) if location else "" if len(coords) == 2 and coords[0] and coords[1]: From c9cabb1547984c161e722556f04217776d216df5 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Wed, 16 Dec 2020 11:15:44 +0500 Subject: [PATCH 03/14] note most recent changes to changelog --- CHANGELOG.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8d361e..8f6d005 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,19 @@ # IPInfo Changelog +## 4.1.0 (WIP) + +- Most private functions on all handlers (i.e. those that start with `_`) are + now moved to `ipinfo.handler_utils`. + ## 4.0.0 #### Breaking Changes - [PR #32](https://github.com/ipinfo/python/pull/32) - All EOL Python versions are no longer supported; currently, Python 3.6 or greater is now **required**. - An asynchronous handler is available from `getHandlerAsync` which returns an `AsyncHandler` which uses **aiohttp**. + All EOL Python versions are no longer supported; currently, Python 3.6 or + greater is now **required**. + An asynchronous handler is available from `getHandlerAsync` which returns an + `AsyncHandler` which uses **aiohttp**. ## 3.0.0 From 74f7ec5f6e875661cd7fa8cba4e05970413cb5d5 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Wed, 16 Dec 2020 11:38:04 +0500 Subject: [PATCH 04/14] update requirements --- requirements.in | 8 ++++---- requirements.txt | 36 ++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/requirements.in b/requirements.in index 8146e26..95babb6 100644 --- a/requirements.in +++ b/requirements.in @@ -1,10 +1,10 @@ # base requests>=2.18.4 -cachetools==4.1.1 -aiohttp<=4 +cachetools==4.2.0 +aiohttp>=3.0.0,<=4 # dev -pytest==6.1.2 +pytest==6.2.1 pytest-asyncio==0.14.0 -pip-tools==5.3.1 +pip-tools==5.4.0 black==20.8b1 diff --git a/requirements.txt b/requirements.txt index 9ed6927..5e560b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,35 +4,35 @@ # # pip-compile --no-emit-index-url --no-emit-trusted-host # -aiohttp==3.7.2 # via -r requirements.in -appdirs==1.4.3 # via black +aiohttp==3.7.3 # via -r requirements.in +appdirs==1.4.4 # via black async-timeout==3.0.1 # via aiohttp -attrs==19.3.0 # via aiohttp, pytest +attrs==20.3.0 # via aiohttp, pytest black==20.8b1 # via -r requirements.in -cachetools==4.1.1 # via -r requirements.in -certifi==2019.9.11 # via requests +cachetools==4.2.0 # via -r requirements.in +certifi==2020.12.5 # via requests chardet==3.0.4 # via aiohttp, requests click==7.1.2 # via black, pip-tools -idna==2.8 # via requests, yarl +idna==2.10 # via requests, yarl iniconfig==1.1.1 # via pytest -multidict==5.0.0 # via aiohttp, yarl +multidict==5.1.0 # via aiohttp, yarl mypy-extensions==0.4.3 # via black -packaging==20.4 # via pytest -pathspec==0.8.0 # via black -pip-tools==5.3.1 # via -r requirements.in -pluggy==0.13.0 # via pytest -py==1.9.0 # via pytest +packaging==20.8 # via pytest +pathspec==0.8.1 # via black +pip-tools==5.4.0 # via -r requirements.in +pluggy==0.13.1 # via pytest +py==1.10.0 # via pytest pyparsing==2.4.7 # via packaging pytest-asyncio==0.14.0 # via -r requirements.in -pytest==6.1.2 # via -r requirements.in, pytest-asyncio -regex==2020.10.28 # via black -requests==2.22.0 # via -r requirements.in -six==1.12.0 # via packaging, pip-tools +pytest==6.2.1 # via -r requirements.in, pytest-asyncio +regex==2020.11.13 # via black +requests==2.25.0 # via -r requirements.in +six==1.15.0 # via pip-tools toml==0.10.2 # via black, pytest typed-ast==1.4.1 # via black typing-extensions==3.7.4.3 # via aiohttp, black -urllib3==1.25.6 # via requests -yarl==1.6.2 # via aiohttp +urllib3==1.26.2 # via requests +yarl==1.6.3 # via aiohttp # The following packages are considered to be unsafe in a requirements file: # pip From 7cfafa9f7bb27abfeb131f977e0c48e28c8388ce Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Wed, 16 Dec 2020 11:44:32 +0500 Subject: [PATCH 05/14] put SDK version in a common place --- ipinfo/handler_utils.py | 7 +++++-- ipinfo/version.py | 1 + setup.py | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 ipinfo/version.py diff --git a/ipinfo/handler_utils.py b/ipinfo/handler_utils.py index 42b7045..f8680a3 100644 --- a/ipinfo/handler_utils.py +++ b/ipinfo/handler_utils.py @@ -6,14 +6,17 @@ import os import sys +from .version import SDK_VERSION + API_URL = "https://ipinfo.io" COUNTRY_FILE_DEFAULT = "countries.json" def get_headers(access_token): """Build headers for request to IPinfo API.""" headers = { - "user-agent": "IPinfoClient/Python{version}/4.0.0".format( - version=sys.version_info[0] + "user-agent": "IPinfoClient/Python{version}/{sdk_version}".format( + version=sys.version_info[0], + sdk_version=SDK_VERSION ), "accept": "application/json", } diff --git a/ipinfo/version.py b/ipinfo/version.py new file mode 100644 index 0000000..ed2023b --- /dev/null +++ b/ipinfo/version.py @@ -0,0 +1 @@ +SDK_VERSION = '4.1.0' diff --git a/setup.py b/setup.py index e68f85b..f7716c9 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,7 @@ from setuptools import setup +from ipinfo.version import SDK_VERSION + long_description = """ The official Python library for IPinfo. @@ -10,7 +12,7 @@ setup( name="ipinfo", - version="4.0.0", + version=SDK_VERSION, description="Official Python library for IPInfo", long_description=long_description, url="https://github.com/ipinfo/python", From 5d363eaba4d074b7c1bae81427201d1605a22a84 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Wed, 16 Dec 2020 11:53:40 +0500 Subject: [PATCH 06/14] address https://github.com/ipinfo/python/issues/33 also get rid of useless tests that only mock stuff. --- ipinfo/handler.py | 58 +++++++++++++++++++---------------------- ipinfo/handler_async.py | 8 +++--- tests/handler_test.py | 51 ------------------------------------ 3 files changed, 31 insertions(+), 86 deletions(-) diff --git a/ipinfo/handler.py b/ipinfo/handler.py index ddd8f6d..96aa4a9 100644 --- a/ipinfo/handler.py +++ b/ipinfo/handler.py @@ -55,9 +55,33 @@ def __init__(self, access_token=None, **kwargs): def getDetails(self, ip_address=None): """Get details for specified IP address as a Details object.""" - raw_details = self._requestDetails(ip_address) - handler_utils.format_details(raw_details, self.countries) - return Details(raw_details) + # If the supplied IP address uses the objects defined in the built-in + # module ipaddress extract the appropriate string notation before + # formatting the URL. + if isinstance(ip_address, IPv4Address) or isinstance( + ip_address, IPv6Address + ): + ip_address = ip_address.exploded + + if ip_address in self.cache: + return Details(self.cache[ip_address]) + + # not in cache; do http req + url = handler_utils.API_URL + if ip_address: + url += "/" + ip_address + headers = handler_utils.get_headers(self.access_token) + response = requests.get(url, headers=headers, **self.request_options) + if response.status_code == 429: + raise RequestQuotaExceededError() + response.raise_for_status() + details = response.json() + + # format & cache + handler_utils.format_details(details, self.countries) + self.cache[ip_address] = details + + return Details(details) def getBatchDetails(self, ip_addresses): """Get details for a batch of IP addresses at once.""" @@ -105,31 +129,3 @@ def getBatchDetails(self, ip_addresses): handler_utils.format_details(detail, self.countries) return result - - def _requestDetails(self, ip_address=None): - """Get IP address data by sending request to IPinfo API.""" - - # If the supplied IP address uses the objects defined in the built-in - # module ipaddress extract the appropriate string notation before - # formatting the URL. - if isinstance(ip_address, IPv4Address) or isinstance( - ip_address, IPv6Address - ): - ip_address = ip_address.exploded - - if ip_address not in self.cache: - url = handler_utils.API_URL - if ip_address: - url += "/" + ip_address - - response = requests.get( - url, - headers=handler_utils.get_headers(self.access_token), - **self.request_options - ) - if response.status_code == 429: - raise RequestQuotaExceededError() - response.raise_for_status() - self.cache[ip_address] = response.json() - - return self.cache[ip_address] diff --git a/ipinfo/handler_async.py b/ipinfo/handler_async.py index c5be14a..6a6e6eb 100644 --- a/ipinfo/handler_async.py +++ b/ipinfo/handler_async.py @@ -104,13 +104,13 @@ async def getDetails(self, ip_address=None): if resp.status == 429: raise RequestQuotaExceededError() resp.raise_for_status() - raw_details = await resp.json() + details = await resp.json() # format & cache - handler_utils.format_details(raw_details, self.countries) - self.cache[ip_address] = raw_details + handler_utils.format_details(details, self.countries) + self.cache[ip_address] = details - return Details(raw_details) + return Details(details) async def getBatchDetails(self, ip_addresses): """Get details for a batch of IP addresses at once.""" diff --git a/tests/handler_test.py b/tests/handler_test.py index 5741a5d..9f4369c 100644 --- a/tests/handler_test.py +++ b/tests/handler_test.py @@ -102,54 +102,3 @@ def test_get_batch_details(n): assert "privacy" in d assert "abuse" in d assert "domains" in d - - -def test_builtin_ip_types(): - handler = Handler() - fake_details = {"country": "US", "ip": "127.0.0.1", "loc": "12.34,56.78"} - - handler._requestDetails = lambda x: fake_details - - details = handler.getDetails(IPv4Address(fake_details["ip"])) - assert isinstance(details, Details) - assert details.country == fake_details["country"] - assert details.country_name == "United States" - assert details.ip == fake_details["ip"] - assert details.loc == fake_details["loc"] - assert details.longitude == "56.78" - assert details.latitude == "12.34" - - -def test_json_serialization(): - handler = Handler() - fake_details = { - "asn": { - "asn": "AS20001", - "domain": "twcable.com", - "name": "Time Warner Cable Internet LLC", - "route": "104.172.0.0/14", - "type": "isp", - }, - "city": "Los Angeles", - "company": { - "domain": "twcable.com", - "name": "Time Warner Cable Internet LLC", - "type": "isp", - }, - "country": "US", - "country_name": "United States", - "hostname": "cpe-104-175-221-247.socal.res.rr.com", - "ip": "104.175.221.247", - "loc": "34.0293,-118.3570", - "latitude": "34.0293", - "longitude": "-118.3570", - "phone": "323", - "postal": "90016", - "region": "California", - } - - handler._requestDetails = lambda x: fake_details - - details = handler.getDetails(fake_details["ip"]) - assert isinstance(details, Details) - assert json.dumps(details.all) From 14d1cebe743b4dcec6d3574f4c6f350b34a800e4 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Wed, 16 Dec 2020 14:21:26 +0500 Subject: [PATCH 07/14] basic batch improvements. chunking happens now; input list size limit doesnt exist. --- ipinfo/handler.py | 74 +++++++++++++++++++++++-------------- ipinfo/handler_async.py | 70 ++++++++++++++++++++++++----------- ipinfo/handler_utils.py | 18 ++++++--- ipinfo/version.py | 2 +- tests/handler_async_test.py | 32 +++++++++++----- tests/handler_test.py | 31 ++++++++++++---- 6 files changed, 155 insertions(+), 72 deletions(-) diff --git a/ipinfo/handler.py b/ipinfo/handler.py index 96aa4a9..0bd627b 100644 --- a/ipinfo/handler.py +++ b/ipinfo/handler.py @@ -83,15 +83,28 @@ def getDetails(self, ip_address=None): return Details(details) - def getBatchDetails(self, ip_addresses): - """Get details for a batch of IP addresses at once.""" + def getBatchDetails(self, ip_addresses, batch_size=None): + """ + Get details for a batch of IP addresses at once. + + There is no specified limit to the number of IPs this function can + accept; it can handle as much as the user can fit in RAM (along with + all of the response data, which is at least a magnitude larger than the + input list). + + The batch size can be adjusted with `batch_size` but is clipped to (and + also defaults to) `handler_utils.BATCH_MAX_SIZE`. + """ + if batch_size == None: + batch_size = handler_utils.BATCH_MAX_SIZE + result = {} - # Pre-populate with anything we've got in the cache, and keep around + # pre-populate with anything we've got in the cache, and keep around # the IPs not in the cache. lookup_addresses = [] for ip_address in ip_addresses: - # If the supplied IP address uses the objects defined in the + # if the supplied IP address uses the objects defined in the # built-in module ipaddress extract the appropriate string notation # before formatting the URL. if isinstance(ip_address, IPv4Address) or isinstance( @@ -104,28 +117,35 @@ def getBatchDetails(self, ip_addresses): else: lookup_addresses.append(ip_address) - # Do the lookup - url = handler_utils.API_URL + "/batch" - headers = handler_utils.get_headers(self.access_token) - headers["content-type"] = "application/json" - response = requests.post( - url, json=lookup_addresses, headers=headers, **self.request_options - ) - if response.status_code == 429: - raise RequestQuotaExceededError() - response.raise_for_status() - - # Fill up cache - json_response = response.json() - for ip_address, details in json_response.items(): - self.cache[ip_address] = details - - # Merge cached results with new lookup - result.update(json_response) - - # Format every result - for detail in result.values(): - if isinstance(detail, dict): - handler_utils.format_details(detail, self.countries) + # loop over batch chunks and do lookup for each. + for i in range(0, len(ip_addresses), batch_size): + chunk = ip_addresses[i : i + batch_size] + + # lookup + url = handler_utils.API_URL + "/batch" + headers = handler_utils.get_headers(self.access_token) + headers["content-type"] = "application/json" + response = requests.post( + url, + json=lookup_addresses, + headers=headers, + **self.request_options + ) + if response.status_code == 429: + raise RequestQuotaExceededError() + response.raise_for_status() + + # fill cache + json_response = response.json() + for ip_address, details in json_response.items(): + self.cache[ip_address] = details + + # merge cached results with new lookup + result.update(json_response) + + # format all + for detail in result.values(): + if isinstance(detail, dict): + handler_utils.format_details(detail, self.countries) return result diff --git a/ipinfo/handler_async.py b/ipinfo/handler_async.py index 6a6e6eb..5a97e6b 100644 --- a/ipinfo/handler_async.py +++ b/ipinfo/handler_async.py @@ -3,6 +3,7 @@ """ from ipaddress import IPv4Address, IPv6Address +import asyncio import json import os import sys @@ -112,10 +113,26 @@ async def getDetails(self, ip_address=None): return Details(details) - async def getBatchDetails(self, ip_addresses): - """Get details for a batch of IP addresses at once.""" + async def getBatchDetails(self, ip_addresses, batch_size=None): + """ + Get details for a batch of IP addresses at once. + + There is no specified limit to the number of IPs this function can + accept; it can handle as much as the user can fit in RAM (along with + all of the response data, which is at least a magnitude larger than the + input list). + + The batch size can be adjusted with `batch_size` but is clipped to (and + also defaults to) `handler_utils.BATCH_MAX_SIZE`. + + The concurrency level is currently unadjustable; coroutines will be + created and consumed for all batches at once. + """ self._ensure_aiohttp_ready() + if batch_size == None: + batch_size = handler_utils.BATCH_MAX_SIZE + result = {} # Pre-populate with anything we've got in the cache, and keep around @@ -135,32 +152,41 @@ async def getBatchDetails(self, ip_addresses): else: lookup_addresses.append(ip_address) - # all in cache - return early. - if len(lookup_addresses) == 0: - return result - - # do http req - url = handler_utils.API_URL + "/batch" - headers = handler_utils.get_headers(self.access_token) - headers["content-type"] = "application/json" - async with self.httpsess.post( - url, - data=json.dumps(lookup_addresses), - headers=headers - ) as resp: + # loop over batch chunks and prepare coroutines for each. + reqs = [] + for i in range(0, len(ip_addresses), batch_size): + chunk = ip_addresses[i : i + batch_size] + + # all in cache - return early. + if len(lookup_addresses) == 0: + return result + + # do http req + url = handler_utils.API_URL + "/batch" + headers = handler_utils.get_headers(self.access_token) + headers["content-type"] = "application/json" + reqs.append( + self.httpsess.post( + url, data=json.dumps(lookup_addresses), headers=headers + ) + ) + + resps = await asyncio.gather(*reqs) + for resp in resps: + # gather data if resp.status == 429: raise RequestQuotaExceededError() resp.raise_for_status() json_resp = await resp.json() - # format & fill up cache - for ip_address, details in json_resp.items(): - if isinstance(details, dict): - handler_utils.format_details(details, self.countries) - self.cache[ip_address] = details + # format & fill up cache + for ip_address, details in json_resp.items(): + if isinstance(details, dict): + handler_utils.format_details(details, self.countries) + self.cache[ip_address] = details - # merge cached results with new lookup - result.update(json_resp) + # merge cached results with new lookup + result.update(json_resp) return result diff --git a/ipinfo/handler_utils.py b/ipinfo/handler_utils.py index f8680a3..e9e7124 100644 --- a/ipinfo/handler_utils.py +++ b/ipinfo/handler_utils.py @@ -8,15 +8,22 @@ from .version import SDK_VERSION +# Base URL to make requests against. API_URL = "https://ipinfo.io" + +# Used to transform incoming responses with country abbreviations into the full +# expanded country name, e.g. "PK" -> "Pakistan". COUNTRY_FILE_DEFAULT = "countries.json" +# The max amount of IPs allowed by the API per batch request. +BATCH_MAX_SIZE = 1000 + + def get_headers(access_token): """Build headers for request to IPinfo API.""" headers = { "user-agent": "IPinfoClient/Python{version}/{sdk_version}".format( - version=sys.version_info[0], - sdk_version=SDK_VERSION + version=sys.version_info[0], sdk_version=SDK_VERSION ), "accept": "application/json", } @@ -26,6 +33,7 @@ def get_headers(access_token): return headers + def format_details(details, countries): """ Format details given a countries object. @@ -33,9 +41,8 @@ def format_details(details, countries): The countries object can be retrieved from read_country_names. """ details["country_name"] = countries.get(details.get("country")) - details["latitude"], details["longitude"] = read_coords( - details.get("loc") - ) + details["latitude"], details["longitude"] = read_coords(details.get("loc")) + def read_coords(location): """ @@ -50,6 +57,7 @@ def read_coords(location): lat, lon = coords[0], coords[1] return lat, lon + def read_country_names(countries_file=None): """ Read list of countries from specified country file or diff --git a/ipinfo/version.py b/ipinfo/version.py index ed2023b..dcdc22e 100644 --- a/ipinfo/version.py +++ b/ipinfo/version.py @@ -1 +1 @@ -SDK_VERSION = '4.1.0' +SDK_VERSION = "4.1.0" diff --git a/tests/handler_async_test.py b/tests/handler_async_test.py index 3b180ac..7db234a 100644 --- a/tests/handler_async_test.py +++ b/tests/handler_async_test.py @@ -29,9 +29,8 @@ async def test_headers(): assert "authorization" in headers -@pytest.mark.parametrize("n", range(5)) @pytest.mark.asyncio -async def test_get_details(n): +async def test_get_details(): token = os.environ.get("IPINFO_TOKEN", "") handler = AsyncHandler(token) details = await handler.getDetails("8.8.8.8") @@ -86,22 +85,30 @@ async def test_get_details(n): await handler.deinit() -@pytest.mark.parametrize("n", range(5)) -@pytest.mark.asyncio -async def test_get_batch_details(n): +############# +# BATCH TESTS +############# + +_batch_ip_addrs = ["1.1.1.1", "8.8.8.8", "9.9.9.9"] + + +def _prepare_batch_test(): + """Helper for preparing batch test cases.""" token = os.environ.get("IPINFO_TOKEN", "") if not token: pytest.skip("token required for batch tests") handler = AsyncHandler(token) - ips = ["1.1.1.1", "8.8.8.8", "9.9.9.9"] - details = await handler.getBatchDetails(ips) + return handler, token, _batch_ip_addrs + +def _check_batch_details(ips, details, token): + """Helper for batch tests.""" for ip in ips: assert ip in details d = details[ip] assert d["ip"] == ip - assert d["country"] == "US" - assert d["country_name"] == "United States" + assert "country" in d + assert "country_name" in d if token: assert "asn" in d assert "company" in d @@ -109,4 +116,11 @@ async def test_get_batch_details(n): assert "abuse" in d assert "domains" in d + +@pytest.mark.parametrize("batch_size", [None, 2, 3]) +@pytest.mark.asyncio +async def test_get_batch_details(batch_size): + handler, token, ips = _prepare_batch_test() + details = await handler.getBatchDetails(ips, batch_size=batch_size) + _check_batch_details(ips, details, token) await handler.deinit() diff --git a/tests/handler_test.py b/tests/handler_test.py index 9f4369c..078e0f9 100644 --- a/tests/handler_test.py +++ b/tests/handler_test.py @@ -27,8 +27,7 @@ def test_headers(): assert "authorization" in headers -@pytest.mark.parametrize("n", range(5)) -def test_get_details(n): +def test_get_details(): token = os.environ.get("IPINFO_TOKEN", "") handler = Handler(token) details = handler.getDetails("8.8.8.8") @@ -81,24 +80,40 @@ def test_get_details(n): assert len(domains["domains"]) == 5 -@pytest.mark.parametrize("n", range(5)) -def test_get_batch_details(n): +############# +# BATCH TESTS +############# + +_batch_ip_addrs = ["1.1.1.1", "8.8.8.8", "9.9.9.9"] + + +def _prepare_batch_test(): + """Helper for preparing batch test cases.""" token = os.environ.get("IPINFO_TOKEN", "") if not token: pytest.skip("token required for batch tests") handler = Handler(token) - ips = ["1.1.1.1", "8.8.8.8", "9.9.9.9"] - details = handler.getBatchDetails(ips) + return handler, token, _batch_ip_addrs + +def _check_batch_details(ips, details, token): + """Helper for batch tests.""" for ip in ips: assert ip in details d = details[ip] assert d["ip"] == ip - assert d["country"] == "US" - assert d["country_name"] == "United States" + assert "country" in d + assert "country_name" in d if token: assert "asn" in d assert "company" in d assert "privacy" in d assert "abuse" in d assert "domains" in d + + +@pytest.mark.parametrize("batch_size", [None, 2, 3]) +def test_get_batch_details(batch_size): + handler, token, ips = _prepare_batch_test() + details = handler.getBatchDetails(ips, batch_size=batch_size) + _check_batch_details(ips, details, token) From 901fcafde43fcef69784a20fd05aa25b34da0bbc Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Mon, 21 Dec 2020 18:06:34 +0500 Subject: [PATCH 08/14] support scaling/hard timeouts & graceful-fail for sync batch --- ipinfo/exceptions.py | 6 +++ ipinfo/handler.py | 105 +++++++++++++++++++++++++++++++--------- ipinfo/handler_async.py | 28 ++++++----- ipinfo/handler_utils.py | 12 +++++ 4 files changed, 117 insertions(+), 34 deletions(-) diff --git a/ipinfo/exceptions.py b/ipinfo/exceptions.py index 2cebde1..4b8e031 100644 --- a/ipinfo/exceptions.py +++ b/ipinfo/exceptions.py @@ -7,3 +7,9 @@ class RequestQuotaExceededError(Exception): """Error indicating that users monthly request quota has been passed.""" pass + + +class TimeoutExceededError(Exception): + """Error indicating that some timeout has been exceeded.""" + + pass diff --git a/ipinfo/handler.py b/ipinfo/handler.py index 0bd627b..f7f312e 100644 --- a/ipinfo/handler.py +++ b/ipinfo/handler.py @@ -6,12 +6,22 @@ import json import os import sys +import time import requests from .cache.default import DefaultCache from .details import Details -from .exceptions import RequestQuotaExceededError +from .exceptions import RequestQuotaExceededError, TimeoutExceededError +from .handler_utils import ( + API_URL, + COUNTRY_FILE_DEFAULT, + BATCH_MAX_SIZE, + CACHE_MAXSIZE, + CACHE_TTL, + REQUEST_TIMEOUT_DEFAULT, + BATCH_REQ_TIMEOUT_DEFAULT, +) from . import handler_utils @@ -21,10 +31,6 @@ class Handler: Instantiates and maintains access to cache. """ - CACHE_MAXSIZE = 4096 - CACHE_TTL = 60 * 60 * 24 - REQUEST_TIMEOUT_DEFAULT = 2 - def __init__(self, access_token=None, **kwargs): """ Initialize the Handler object with country name list and the @@ -40,7 +46,7 @@ def __init__(self, access_token=None, **kwargs): # setup req opts self.request_options = kwargs.get("request_options", {}) if "timeout" not in self.request_options: - self.request_options["timeout"] = self.REQUEST_TIMEOUT_DEFAULT + self.request_options["timeout"] = REQUEST_TIMEOUT_DEFAULT # setup cache if "cache" in kwargs: @@ -48,13 +54,18 @@ def __init__(self, access_token=None, **kwargs): else: cache_options = kwargs.get("cache_options", {}) if "maxsize" not in cache_options: - cache_options["maxsize"] = self.CACHE_MAXSIZE + cache_options["maxsize"] = CACHE_MAXSIZE if "ttl" not in cache_options: - cache_options["ttl"] = self.CACHE_TTL + cache_options["ttl"] = CACHE_TTL self.cache = DefaultCache(**cache_options) - def getDetails(self, ip_address=None): - """Get details for specified IP address as a Details object.""" + def getDetails(self, ip_address=None, timeout=None): + """ + Get details for specified IP address as a Details object. + + If `timeout` is not `None`, it will override the client-level timeout + just for this operation. + """ # If the supplied IP address uses the objects defined in the built-in # module ipaddress extract the appropriate string notation before # formatting the URL. @@ -66,12 +77,17 @@ def getDetails(self, ip_address=None): if ip_address in self.cache: return Details(self.cache[ip_address]) + # prepare req http opts + req_opts = {**self.request_options} + if timeout is not None: + req_opts["timeout"] = timeout + # not in cache; do http req - url = handler_utils.API_URL + url = API_URL if ip_address: url += "/" + ip_address headers = handler_utils.get_headers(self.access_token) - response = requests.get(url, headers=headers, **self.request_options) + response = requests.get(url, headers=headers, **req_opts) if response.status_code == 429: raise RequestQuotaExceededError() response.raise_for_status() @@ -83,7 +99,14 @@ def getDetails(self, ip_address=None): return Details(details) - def getBatchDetails(self, ip_addresses, batch_size=None): + def getBatchDetails( + self, + ip_addresses, + batch_size=None, + timeout_per_batch=BATCH_REQ_TIMEOUT_DEFAULT, + timeout_total=None, + raise_on_fail=True, + ): """ Get details for a batch of IP addresses at once. @@ -92,11 +115,26 @@ def getBatchDetails(self, ip_addresses, batch_size=None): all of the response data, which is at least a magnitude larger than the input list). + The input list is broken up into batches to abide by API requirements. The batch size can be adjusted with `batch_size` but is clipped to (and - also defaults to) `handler_utils.BATCH_MAX_SIZE`. + also defaults to) `BATCH_MAX_SIZE`. + + For each batch, `timeout_per_batch` indicates the maximum seconds to + spend waiting for the HTTP request to complete. If any batch fails with + this timeout, the whole operation fails. + Defaults to `BATCH_REQ_TIMEOUT_DEFAULT` seconds. + + `timeout_total` is a seconds-denominated hard-timeout for the time + spent in HTTP operations; regardless of whether all batches have + succeeded so far, if `timeout_total` is reached, the whole operation + will fail. Defaults to being turned off. + + `raise_on_fail`, if turned off, will return any result retrieved so far + rather than raise an exception when errors occur, including timeout and + quota errors. Defaults to on. """ if batch_size == None: - batch_size = handler_utils.BATCH_MAX_SIZE + batch_size = BATCH_MAX_SIZE result = {} @@ -117,23 +155,44 @@ def getBatchDetails(self, ip_addresses, batch_size=None): else: lookup_addresses.append(ip_address) + # prepare req http options + req_opts = {**self.request_options, "timeout": timeout_per_batch} + + if timeout_total is not None: + start_time = time.time() + # loop over batch chunks and do lookup for each. for i in range(0, len(ip_addresses), batch_size): + # quit if total timeout is reached. + if ( + timeout_total is not None + and time.time() - start_time > timeout_total + ): + if raise_on_fail: + raise TimeoutExceededError() + else: + return result + chunk = ip_addresses[i : i + batch_size] # lookup - url = handler_utils.API_URL + "/batch" + url = API_URL + "/batch" headers = handler_utils.get_headers(self.access_token) headers["content-type"] = "application/json" response = requests.post( - url, - json=lookup_addresses, - headers=headers, - **self.request_options + url, json=lookup_addresses, headers=headers, **req_opts ) - if response.status_code == 429: - raise RequestQuotaExceededError() - response.raise_for_status() + + # fail on bad status codes + try: + if response.status_code == 429: + raise RequestQuotaExceededError() + response.raise_for_status() + except Exception as e: + if raise_on_fail: + raise e + else: + return result # fill cache json_response = response.json() diff --git a/ipinfo/handler_async.py b/ipinfo/handler_async.py index 5a97e6b..24b5ed6 100644 --- a/ipinfo/handler_async.py +++ b/ipinfo/handler_async.py @@ -13,6 +13,15 @@ from .cache.default import DefaultCache from .details import Details from .exceptions import RequestQuotaExceededError +from .handler_utils import ( + API_URL, + COUNTRY_FILE_DEFAULT, + BATCH_MAX_SIZE, + CACHE_MAXSIZE, + CACHE_TTL, + REQUEST_TIMEOUT_DEFAULT, + BATCH_REQ_TIMEOUT_DEFAULT, +) from . import handler_utils @@ -22,10 +31,6 @@ class AsyncHandler: Instantiates and maintains access to cache. """ - CACHE_MAXSIZE = 4096 - CACHE_TTL = 60 * 60 * 24 - REQUEST_TIMEOUT_DEFAULT = 2 - def __init__(self, access_token=None, **kwargs): """ Initialize the Handler object with country name list and the @@ -41,7 +46,7 @@ def __init__(self, access_token=None, **kwargs): # setup req opts self.request_options = kwargs.get("request_options", {}) if "timeout" not in self.request_options: - self.request_options["timeout"] = self.REQUEST_TIMEOUT_DEFAULT + self.request_options["timeout"] = REQUEST_TIMEOUT_DEFAULT # setup aiohttp self.httpsess = None @@ -52,9 +57,9 @@ def __init__(self, access_token=None, **kwargs): else: cache_options = kwargs.get("cache_options", {}) if "maxsize" not in cache_options: - cache_options["maxsize"] = self.CACHE_MAXSIZE + cache_options["maxsize"] = CACHE_MAXSIZE if "ttl" not in cache_options: - cache_options["ttl"] = self.CACHE_TTL + cache_options["ttl"] = CACHE_TTL self.cache = DefaultCache(**cache_options) async def init(self): @@ -97,7 +102,7 @@ async def getDetails(self, ip_address=None): return Details(self.cache[ip_address]) # not in cache; do http req - url = handler_utils.API_URL + url = API_URL if ip_address: url += "/" + ip_address headers = handler_utils.get_headers(self.access_token) @@ -122,8 +127,9 @@ async def getBatchDetails(self, ip_addresses, batch_size=None): all of the response data, which is at least a magnitude larger than the input list). + The input list is broken up into batches to abide by API requirements. The batch size can be adjusted with `batch_size` but is clipped to (and - also defaults to) `handler_utils.BATCH_MAX_SIZE`. + also defaults to) `BATCH_MAX_SIZE`. The concurrency level is currently unadjustable; coroutines will be created and consumed for all batches at once. @@ -131,7 +137,7 @@ async def getBatchDetails(self, ip_addresses, batch_size=None): self._ensure_aiohttp_ready() if batch_size == None: - batch_size = handler_utils.BATCH_MAX_SIZE + batch_size = BATCH_MAX_SIZE result = {} @@ -162,7 +168,7 @@ async def getBatchDetails(self, ip_addresses, batch_size=None): return result # do http req - url = handler_utils.API_URL + "/batch" + url = API_URL + "/batch" headers = handler_utils.get_headers(self.access_token) headers["content-type"] = "application/json" reqs.append( diff --git a/ipinfo/handler_utils.py b/ipinfo/handler_utils.py index e9e7124..341f41c 100644 --- a/ipinfo/handler_utils.py +++ b/ipinfo/handler_utils.py @@ -18,6 +18,18 @@ # The max amount of IPs allowed by the API per batch request. BATCH_MAX_SIZE = 1000 +# The default max size of the cache in terms of number of items. +CACHE_MAXSIZE = 4096 + +# The default TTL of the cache in seconds +CACHE_TTL = 60 * 60 * 24 + +# The default request timeout for per-IP requests. +REQUEST_TIMEOUT_DEFAULT = 2 + +# The default request timeout for batch requests. +BATCH_REQ_TIMEOUT_DEFAULT = 5 + def get_headers(access_token): """Build headers for request to IPinfo API.""" From 91fc4a63dd58ea095d514d89c38bf5fc1f622955 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Mon, 21 Dec 2020 18:11:19 +0500 Subject: [PATCH 09/14] prepare changelog for release after async is ready --- CHANGELOG.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f6d005..d9524b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,20 @@ # IPInfo Changelog -## 4.1.0 (WIP) +## 4.1.0 - Most private functions on all handlers (i.e. those that start with `_`) are now moved to `ipinfo.handler_utils`. +- All constants that existed on handlers (i.e. `REQUEST_TIMEOUT_DEFAULT`) are + now moved to `ipinfo.handler_utils`. +- Both the sync and async handlers have the following improvements: + - `timeout` can be specified as a keyword-arg to getDetails to optionally + override the client-level timeout. + - getBatchDetails now has no limit to the size of the `ip_addresses` input + list. It will chunk the list internally and make requests against the + batch endpoint in a way that doesn't exceed the API's own limits. + - getBatchDetails now accepts the new options `batch_size`, + `timeout_per_batch`, `timeout_total` and `raise_on_fail`. Please see the + documentation for details on what each of these do. ## 4.0.0 From 62fe3edda4985da31969848c33ed33f6ad34b4d7 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Mon, 21 Dec 2020 18:18:01 +0500 Subject: [PATCH 10/14] clarify docs and changelog --- CHANGELOG.md | 4 ++++ ipinfo/handler.py | 11 +++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9524b4..9bf6d2e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,14 @@ ## 4.1.0 +- The SDK version is available via `ipinfo.version` as `SDK_VERSION`. - Most private functions on all handlers (i.e. those that start with `_`) are now moved to `ipinfo.handler_utils`. - All constants that existed on handlers (i.e. `REQUEST_TIMEOUT_DEFAULT`) are now moved to `ipinfo.handler_utils`. +- Cache behavior for the synchronous handler is a bit different now; the item + actually cached is the item _after_ formatting is complete, rather than + before. - Both the sync and async handlers have the following improvements: - `timeout` can be specified as a keyword-arg to getDetails to optionally override the client-level timeout. diff --git a/ipinfo/handler.py b/ipinfo/handler.py index f7f312e..aef6b10 100644 --- a/ipinfo/handler.py +++ b/ipinfo/handler.py @@ -116,8 +116,9 @@ def getBatchDetails( input list). The input list is broken up into batches to abide by API requirements. - The batch size can be adjusted with `batch_size` but is clipped to (and - also defaults to) `BATCH_MAX_SIZE`. + The batch size can be adjusted with `batch_size` but is clipped to + `BATCH_MAX_SIZE`. + Defaults to `BATCH_MAX_SIZE`. For each batch, `timeout_per_batch` indicates the maximum seconds to spend waiting for the HTTP request to complete. If any batch fails with @@ -127,11 +128,13 @@ def getBatchDetails( `timeout_total` is a seconds-denominated hard-timeout for the time spent in HTTP operations; regardless of whether all batches have succeeded so far, if `timeout_total` is reached, the whole operation - will fail. Defaults to being turned off. + will fail by raising `TimeoutExceededError`. + Defaults to being turned off. `raise_on_fail`, if turned off, will return any result retrieved so far rather than raise an exception when errors occur, including timeout and - quota errors. Defaults to on. + quota errors. + Defaults to on. """ if batch_size == None: batch_size = BATCH_MAX_SIZE From d8151060f0717504fc292c2237ff24563303d441 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Mon, 21 Dec 2020 18:52:22 +0500 Subject: [PATCH 11/14] fixes & impl for async --- ipinfo/handler.py | 23 +++++++++++------- ipinfo/handler_async.py | 54 +++++++++++++++++++++++++++++------------ 2 files changed, 53 insertions(+), 24 deletions(-) diff --git a/ipinfo/handler.py b/ipinfo/handler.py index aef6b10..2dc082f 100644 --- a/ipinfo/handler.py +++ b/ipinfo/handler.py @@ -116,7 +116,7 @@ def getBatchDetails( input list). The input list is broken up into batches to abide by API requirements. - The batch size can be adjusted with `batch_size` but is clipped to + The batch size can be adjusted with `batch_size` but is clipped to `BATCH_MAX_SIZE`. Defaults to `BATCH_MAX_SIZE`. @@ -158,14 +158,22 @@ def getBatchDetails( else: lookup_addresses.append(ip_address) - # prepare req http options - req_opts = {**self.request_options, "timeout": timeout_per_batch} + # all in cache - return early. + if len(lookup_addresses) == 0: + return result + # do start timer if necessary if timeout_total is not None: start_time = time.time() + # prepare req http options + req_opts = {**self.request_options, "timeout": timeout_per_batch} + # loop over batch chunks and do lookup for each. - for i in range(0, len(ip_addresses), batch_size): + url = API_URL + "/batch" + headers = handler_utils.get_headers(self.access_token) + headers["content-type"] = "application/json" + for i in range(0, len(lookup_addresses), batch_size): # quit if total timeout is reached. if ( timeout_total is not None @@ -176,14 +184,11 @@ def getBatchDetails( else: return result - chunk = ip_addresses[i : i + batch_size] + chunk = lookup_addresses[i : i + batch_size] # lookup - url = API_URL + "/batch" - headers = handler_utils.get_headers(self.access_token) - headers["content-type"] = "application/json" response = requests.post( - url, json=lookup_addresses, headers=headers, **req_opts + url, json=chunk, headers=headers, **req_opts ) # fail on bad status codes diff --git a/ipinfo/handler_async.py b/ipinfo/handler_async.py index 24b5ed6..8c210bf 100644 --- a/ipinfo/handler_async.py +++ b/ipinfo/handler_async.py @@ -118,7 +118,14 @@ async def getDetails(self, ip_address=None): return Details(details) - async def getBatchDetails(self, ip_addresses, batch_size=None): + async def getBatchDetails( + self, + ip_addresses, + batch_size=None, + timeout_per_batch=BATCH_REQ_TIMEOUT_DEFAULT, + timeout_total=None, + raise_on_fail=True, + ): """ Get details for a batch of IP addresses at once. @@ -158,31 +165,48 @@ async def getBatchDetails(self, ip_addresses, batch_size=None): else: lookup_addresses.append(ip_address) + # all in cache - return early. + if len(lookup_addresses) == 0: + return result + + # do start timer if necessary + if timeout_total is not None: + start_time = time.time() + # loop over batch chunks and prepare coroutines for each. + url = API_URL + "/batch" + headers = handler_utils.get_headers(self.access_token) + headers["content-type"] = "application/json" reqs = [] - for i in range(0, len(ip_addresses), batch_size): - chunk = ip_addresses[i : i + batch_size] - - # all in cache - return early. - if len(lookup_addresses) == 0: - return result + for i in range(0, len(lookup_addresses), batch_size): + chunk = lookup_addresses[i : i + batch_size] # do http req - url = API_URL + "/batch" - headers = handler_utils.get_headers(self.access_token) - headers["content-type"] = "application/json" reqs.append( self.httpsess.post( - url, data=json.dumps(lookup_addresses), headers=headers + url, + data=json.dumps(chunk), + headers=headers, + timeout=timeout_per_batch, ) ) - resps = await asyncio.gather(*reqs) + resps = await asyncio.wait_for( + asyncio.gather(*reqs, return_exceptions=raise_on_fail), + timeout_total + ) for resp in resps: # gather data - if resp.status == 429: - raise RequestQuotaExceededError() - resp.raise_for_status() + try: + if resp.status == 429: + raise RequestQuotaExceededError() + resp.raise_for_status() + except Exception as e: + if raise_on_fail: + raise e + else: + return result + json_resp = await resp.json() # format & fill up cache From c3e58552c49d66cd85a01425d444c324c205638b Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Mon, 21 Dec 2020 18:54:43 +0500 Subject: [PATCH 12/14] fix docs, add support for sync timeout override --- ipinfo/handler_async.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/ipinfo/handler_async.py b/ipinfo/handler_async.py index 8c210bf..de0c160 100644 --- a/ipinfo/handler_async.py +++ b/ipinfo/handler_async.py @@ -86,7 +86,7 @@ async def deinit(self): await self.httpsess.close() self.httpsess = None - async def getDetails(self, ip_address=None): + async def getDetails(self, ip_address=None, timeout=None): """Get details for specified IP address as a Details object.""" self._ensure_aiohttp_ready() @@ -106,7 +106,10 @@ async def getDetails(self, ip_address=None): if ip_address: url += "/" + ip_address headers = handler_utils.get_headers(self.access_token) - async with self.httpsess.get(url, headers=headers) as resp: + req_opts = {} + if timeout is not None: + req_opts["timeout"] = timeout + async with self.httpsess.get(url, headers=headers, **req_opts) as resp: if resp.status == 429: raise RequestQuotaExceededError() resp.raise_for_status() @@ -135,8 +138,25 @@ async def getBatchDetails( input list). The input list is broken up into batches to abide by API requirements. - The batch size can be adjusted with `batch_size` but is clipped to (and - also defaults to) `BATCH_MAX_SIZE`. + The batch size can be adjusted with `batch_size` but is clipped to + `BATCH_MAX_SIZE`. + Defaults to `BATCH_MAX_SIZE`. + + For each batch, `timeout_per_batch` indicates the maximum seconds to + spend waiting for the HTTP request to complete. If any batch fails with + this timeout, the whole operation fails. + Defaults to `BATCH_REQ_TIMEOUT_DEFAULT` seconds. + + `timeout_total` is a seconds-denominated hard-timeout for the time + spent in HTTP operations; regardless of whether all batches have + succeeded so far, if `timeout_total` is reached, the whole operation + will fail by raising `TimeoutExceededError`. + Defaults to being turned off. + + `raise_on_fail`, if turned off, will return any result retrieved so far + rather than raise an exception when errors occur, including timeout and + quota errors. + Defaults to on. The concurrency level is currently unadjustable; coroutines will be created and consumed for all batches at once. From 4347bfddd5134502cc5182f7fb305141e175e740 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Mon, 21 Dec 2020 21:00:18 +0500 Subject: [PATCH 13/14] test cases and more robust err handling --- ipinfo/handler.py | 12 ++-- ipinfo/handler_async.py | 110 +++++++++++++++++++++++------------- ipinfo/handler_utils.py | 10 ++++ tests/handler_async_test.py | 14 ++++- tests/handler_test.py | 12 +++- 5 files changed, 109 insertions(+), 49 deletions(-) diff --git a/ipinfo/handler.py b/ipinfo/handler.py index 2dc082f..c957fb6 100644 --- a/ipinfo/handler.py +++ b/ipinfo/handler.py @@ -179,10 +179,9 @@ def getBatchDetails( timeout_total is not None and time.time() - start_time > timeout_total ): - if raise_on_fail: - raise TimeoutExceededError() - else: - return result + return handler_utils.return_or_fail( + raise_on_fail, TimeoutExceededError(), result + ) chunk = lookup_addresses[i : i + batch_size] @@ -197,10 +196,7 @@ def getBatchDetails( raise RequestQuotaExceededError() response.raise_for_status() except Exception as e: - if raise_on_fail: - raise e - else: - return result + return handler_utils.return_or_fail(raise_on_fail, e, result) # fill cache json_response = response.json() diff --git a/ipinfo/handler_async.py b/ipinfo/handler_async.py index de0c160..fab7d51 100644 --- a/ipinfo/handler_async.py +++ b/ipinfo/handler_async.py @@ -7,12 +7,13 @@ import json import os import sys +import time import aiohttp from .cache.default import DefaultCache from .details import Details -from .exceptions import RequestQuotaExceededError +from .exceptions import RequestQuotaExceededError, TimeoutExceededError from .handler_utils import ( API_URL, COUNTRY_FILE_DEFAULT, @@ -197,49 +198,80 @@ async def getBatchDetails( url = API_URL + "/batch" headers = handler_utils.get_headers(self.access_token) headers["content-type"] = "application/json" - reqs = [] - for i in range(0, len(lookup_addresses), batch_size): - chunk = lookup_addresses[i : i + batch_size] - - # do http req - reqs.append( - self.httpsess.post( - url, - data=json.dumps(chunk), - headers=headers, - timeout=timeout_per_batch, - ) + + # prepare coroutines that will make reqs and update results. + reqs = [ + self._do_batch_req( + lookup_addresses[i : i + batch_size], + url, + headers, + timeout_per_batch, + raise_on_fail, + result, + ) + for i in range(0, len(lookup_addresses), batch_size) + ] + + try: + _, pending = await asyncio.wait( + {*reqs}, + timeout=timeout_total, + return_when=asyncio.FIRST_EXCEPTION, ) - resps = await asyncio.wait_for( - asyncio.gather(*reqs, return_exceptions=raise_on_fail), - timeout_total - ) - for resp in resps: - # gather data - try: - if resp.status == 429: - raise RequestQuotaExceededError() - resp.raise_for_status() - except Exception as e: - if raise_on_fail: - raise e - else: - return result - - json_resp = await resp.json() - - # format & fill up cache - for ip_address, details in json_resp.items(): - if isinstance(details, dict): - handler_utils.format_details(details, self.countries) - self.cache[ip_address] = details - - # merge cached results with new lookup - result.update(json_resp) + # if all done, return result. + if len(pending) == 0: + return result + + # if some had a timeout, first cancel timed out stuff and wait for + # cleanup. then exit with return_or_fail. + for co in pending: + try: + co.cancel() + await co + except asyncio.CancelledError: + pass + + return handler_utils.return_or_fail( + raise_on_fail, TimeoutExceededError(), result + ) + except Exception as e: + return handler_utils.return_or_fail(raise_on_fail, e, result) return result + async def _do_batch_req( + self, chunk, url, headers, timeout_per_batch, raise_on_fail, result + ): + """ + Coroutine which will do the actual POST request for getBatchDetails. + """ + resp = await self.httpsess.post( + url, + data=json.dumps(chunk), + headers=headers, + timeout=timeout_per_batch, + ) + + # gather data + try: + if resp.status == 429: + raise RequestQuotaExceededError() + resp.raise_for_status() + except Exception as e: + return handler_utils.return_or_fail(raise_on_fail, e, None) + + json_resp = await resp.json() + + # format & fill up cache + for ip_address, details in json_resp.items(): + if isinstance(details, dict): + handler_utils.format_details(details, self.countries) + self.cache[ip_address] = details + + # merge cached results with new lookup + result.update(json_resp) + def _ensure_aiohttp_ready(self): """Ensures aiohttp internal state is initialized.""" if self.httpsess: diff --git a/ipinfo/handler_utils.py b/ipinfo/handler_utils.py index 341f41c..0328406 100644 --- a/ipinfo/handler_utils.py +++ b/ipinfo/handler_utils.py @@ -83,3 +83,13 @@ def read_country_names(countries_file=None): countries_json = f.read() return json.loads(countries_json) + + +def return_or_fail(raise_on_fail, e, v): + """ + Either throws `e` if `raise_on_fail` or else returns `v`. + """ + if raise_on_fail: + raise e + else: + return v diff --git a/tests/handler_async_test.py b/tests/handler_async_test.py index 7db234a..aa3b710 100644 --- a/tests/handler_async_test.py +++ b/tests/handler_async_test.py @@ -4,6 +4,7 @@ from ipinfo.details import Details from ipinfo.handler_async import AsyncHandler from ipinfo import handler_utils +import ipinfo import pytest @@ -117,10 +118,21 @@ def _check_batch_details(ips, details, token): assert "domains" in d -@pytest.mark.parametrize("batch_size", [None, 2, 3]) +@pytest.mark.parametrize("batch_size", [None, 1, 2, 3]) @pytest.mark.asyncio async def test_get_batch_details(batch_size): handler, token, ips = _prepare_batch_test() details = await handler.getBatchDetails(ips, batch_size=batch_size) _check_batch_details(ips, details, token) await handler.deinit() + + +@pytest.mark.parametrize("batch_size", [None, 1, 2, 3]) +@pytest.mark.asyncio +async def test_get_batch_details_total_timeout(batch_size): + handler, token, ips = _prepare_batch_test() + with pytest.raises(ipinfo.exceptions.TimeoutExceededError): + await handler.getBatchDetails( + ips, batch_size=batch_size, timeout_total=0.001 + ) + await handler.deinit() diff --git a/tests/handler_test.py b/tests/handler_test.py index 078e0f9..5389a74 100644 --- a/tests/handler_test.py +++ b/tests/handler_test.py @@ -6,6 +6,7 @@ from ipinfo.details import Details from ipinfo.handler import Handler from ipinfo import handler_utils +import ipinfo import pytest @@ -112,8 +113,17 @@ def _check_batch_details(ips, details, token): assert "domains" in d -@pytest.mark.parametrize("batch_size", [None, 2, 3]) +@pytest.mark.parametrize("batch_size", [None, 1, 2, 3]) def test_get_batch_details(batch_size): handler, token, ips = _prepare_batch_test() details = handler.getBatchDetails(ips, batch_size=batch_size) _check_batch_details(ips, details, token) + + +@pytest.mark.parametrize("batch_size", [1, 2]) +def test_get_batch_details_total_timeout(batch_size): + handler, token, ips = _prepare_batch_test() + with pytest.raises(ipinfo.exceptions.TimeoutExceededError): + handler.getBatchDetails( + ips, batch_size=batch_size, timeout_total=0.001 + ) From 6f3651683ae9fd55c78b8ccac1d0c7e933b52855 Mon Sep 17 00:00:00 2001 From: Uman Shahzad Date: Mon, 21 Dec 2020 21:10:03 +0500 Subject: [PATCH 14/14] update readme with input size info --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5c1dd3d..0a9306b 100644 --- a/README.md +++ b/README.md @@ -218,9 +218,10 @@ The file must be a `.json` file with the following structure: ### Batch Operations -Looking up a single IP at a time can be slow. It could be done concurrently from -the client side, but IPinfo supports a batch endpoint to allow you to group -together IPs and let us handle retrieving details for them in bulk for you. +Looking up a single IP at a time can be slow. It could be done concurrently +from the client side, but IPinfo supports a batch endpoint to allow you to +group together IPs and let us handle retrieving details for them in bulk for +you. ```python >>> import ipinfo, pprint @@ -256,6 +257,9 @@ together IPs and let us handle retrieving details for them in bulk for you. 'timezone': 'America/Los_Angeles'}} ``` +The input size is not limited, as the interface will chunk operations for you +behind the scenes. + Please see [the official documentation](https://ipinfo.io/developers/batch) for more information and limitations.