From 94f53940ab268dfca955396acc20c65448f75c8f Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 4 May 2020 14:25:47 +0200 Subject: [PATCH 01/46] AdobeReader migration to 2.0 API v0 --- nck/clients/adobe_client.py | 64 ++++++++++ nck/helpers/adobe_helper_2_0.py | 75 +++++++++++ nck/readers/__init__.py | 2 + nck/readers/adobe_reader_2_0.py | 216 ++++++++++++++++++++++++++++++++ 4 files changed, 357 insertions(+) create mode 100644 nck/clients/adobe_client.py create mode 100644 nck/helpers/adobe_helper_2_0.py create mode 100644 nck/readers/adobe_reader_2_0.py diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py new file mode 100644 index 00000000..ee0aaf64 --- /dev/null +++ b/nck/clients/adobe_client.py @@ -0,0 +1,64 @@ +# PRELIMINARY STEPS TO FOLLOW TO GET JWT CREDENTIALS +# - Get developper access to Adobe Analytics: +# https://helpx.adobe.com/enterprise/using/manage-developers.html +# - Create an integration to Adobe Analytics on Adobe I/O Console: +# https://console.adobe.io/ + +import logging +import datetime +import requests +import jwt + +IMS_HOST = "ims-na1.adobelogin.com" +IMS_EXCHANGE = "https://ims-na1.adobelogin.com/ims/exchange/jwt" +DISCOVERY_URL = "https://analytics.adobe.io/discovery/me" + +logging.basicConfig(level="INFO") +logger = logging.getLogger() + +class JWTClient(): + """ + Following the steps described in this repo: + https://github.com/AdobeDocs/analytics-2.0-apis/tree/master/examples/jwt/python + """ + + def __init__(self, api_key, tech_account_id, org_id, client_secret, metascopes, private_key_path): + self.api_key = api_key + self.tech_account_id = tech_account_id + self.org_id = org_id + self.client_secret = client_secret + self.metascopes = metascopes + self.private_key_path = private_key_path + + #Creating jwt_token attribute + logging.info("Getting jwt_token.") + with open(self.private_key_path, 'r') as file: + private_key = file.read() + self.jwt_token = jwt.encode({ + "exp": datetime.datetime.utcnow() + datetime.timedelta(seconds=30), + "iss": self.org_id, + "sub": self.tech_account_id, + f"https://{IMS_HOST}/s/{self.metascopes}": True, + "aud": f"https://{IMS_HOST}/c/{self.api_key}" + }, private_key, algorithm='RS256') + + #Creating access_token attribute + logging.info("Getting access_token.") + post_body = { + "client_id": self.api_key, + "client_secret": self.client_secret, + "jwt_token": self.jwt_token + } + response = requests.post(IMS_EXCHANGE, data=post_body) + self.access_token = response.json()["access_token"] + + #Creating global_company_id attribute + logging.info("Getting global_company_id.") + response = requests.get( + DISCOVERY_URL, + headers={ + "Authorization": f"Bearer {self.access_token}", + "x-api-key": self.api_key + } + ) + self.global_company_id = response.json().get("imsOrgs")[0].get("companies")[0].get("globalCompanyId") \ No newline at end of file diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py new file mode 100644 index 00000000..3435ce79 --- /dev/null +++ b/nck/helpers/adobe_helper_2_0.py @@ -0,0 +1,75 @@ +def build_request_headers(jwt_client): + """ + Building headers to authenticate with the Reporting API. + Input: JWTClient object + """ + + return { + "Accept": "application/json", + "Authorization": "Bearer {}".format(jwt_client.access_token), + "Content-Type": "application/json", + "x-api-key": jwt_client.api_key, + "x-proxy-global-company-id": jwt_client.global_company_id + } + +def add_metrics_container_to_report_description(rep_desc,dimensions,breakdown_item_ids,metrics): + """ + Filling the metricContainer section of a report description: + - Creates 1 filter per dimension breakdown x metric + - Applies filters to each metric + """ + + nb_breakdowns = len(breakdown_item_ids) + nb_metrics = len(metrics) + + rep_desc["metricContainer"]["metricFilters"] = [ + { + "id": i+j*nb_breakdowns, + "type": "breakdown", + "dimension": f"variables/{dimensions[i]}", + "itemId": breakdown_item_ids[i] + } + for j in range(nb_metrics) for i in range(nb_breakdowns)] + + rep_desc["metricContainer"]["metrics"] = [ + { + "id": f"metrics/{metrics[j]}", + "filters": [i+j*nb_breakdowns for i in range(nb_breakdowns)] + } + for j in range(nb_metrics)] + + return rep_desc + +def get_node_values_from_response(response): + """ + Extracting dimension values from a report response, + and returning them into a dictionnary of nodes: {name_itemId: value} + For instance: {'daterangeday_1200201': 'Mar 1, 2020'} + """ + + name = response["columns"]["dimension"]["id"].split("/")[1] + values = [row["value"] for row in response["rows"]] + item_ids = [row["itemId"] for row in response["rows"]] + + return {"{}_{}".format(name,item_id): value for (item_id,value) in zip(item_ids,values)} + +def get_item_ids_from_nodes(list_of_strings): + """ + Extacting item_ids from a list of nodes, + each node being expressed as 'name_itemId' + """ + + return [string.split("_")[1] for string in list_of_strings if string] + +def parse_response(response,metrics,parent_dim_parsed): + """ + Parsing a raw JSON response into the following format: + {dimension: value, metric: value} (1 dictionnary per row) + """ + + dimension = response["columns"]["dimension"]["id"].split("variables/")[1] + + for row in response["rows"]: + parsed_row_metrics = {m:v for m, v in zip(metrics,row["data"])} + parsed_row = {**parent_dim_parsed, dimension:row["value"], **parsed_row_metrics} + yield parsed_row \ No newline at end of file diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 8c5375a8..aaef68c3 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -30,6 +30,7 @@ from nck.readers.ga_reader import ga from nck.readers.search_console_reader import search_console from nck.readers.adobe_reader import adobe +from nck.readers.adobe_reader_2_0 import adobe_2_0 from nck.readers.radarly_reader import radarly from nck.readers.yandex_campaign_reader import yandex_campaigns from nck.readers.yandex_statistics_reader import yandex_statistics @@ -48,6 +49,7 @@ ga, search_console, adobe, + adobe_2_0, radarly, yandex_campaigns, yandex_statistics diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py new file mode 100644 index 00000000..d4ce5dc9 --- /dev/null +++ b/nck/readers/adobe_reader_2_0.py @@ -0,0 +1,216 @@ +import click +import json +import logging +import datetime +import requests +from itertools import chain +from nck.utils.args import extract_args +from nck.commands.command import processor +from nck.readers.reader import Reader +from nck.clients.adobe_client import JWTClient +from nck.streams.json_stream import JSONStream +from nck.helpers.adobe_helper_2_0 import ( + build_request_headers, + add_metrics_container_to_report_description, + get_node_values_from_response, + get_item_ids_from_nodes, + parse_response +) + +DATEFORMAT = "%Y-%m-%dT%H:%M:%S" + +logging.basicConfig(level="INFO") +logger = logging.getLogger() + +@click.command(name="read_adobe_2_0") +@click.option("--adobe-api-key", required=True) +@click.option("--adobe-tech-account-id", required=True) +@click.option("--adobe-org-id", required=True) +@click.option("--adobe-client-secret", required=True) +@click.option("--adobe-metascopes", required=True) +@click.option("--adobe-private-key-path", required=True) +@click.option("--adobe-date-start", required=True, type=click.DateTime()) +@click.option("--adobe-date-stop", required=True, type=click.DateTime()) +@click.option("--adobe-report-suite-id", required=True) +@click.option("--adobe-dimensions", required=True, multiple=True) +@click.option("--adobe-metrics", required=True, multiple=True) +@processor("adobe_api_key","adobe_tech_account_id","adobe_org_id","adobe_client_secret","adobe_metascopes","adobe_private_key_path") +def adobe_2_0(**kwargs): + return AdobeReader_2_0(**extract_args("adobe_", kwargs)) + +class AdobeReader_2_0(Reader): + + def __init__( + self, + api_key, + tech_account_id, + org_id, + client_secret, + metascopes, + private_key_path, + date_start, + date_stop, + report_suite_id, + dimensions, + metrics + ): + # We should probably define a method to create the jwt_client within the NewAdobeReader + self.jwt_client = JWTClient(api_key, tech_account_id, org_id, client_secret, metascopes, private_key_path) + self.date_start = date_start + self.date_stop = date_stop + datetime.timedelta(days=1) + self.report_suite_id = report_suite_id + self.dimensions = dimensions + self.metrics = metrics + self.node_values = {} + + def build_date_range(self): + return f"{self.date_start.strftime(DATEFORMAT)}/{self.date_stop.strftime(DATEFORMAT)}" + + def build_report_description(self,breakdown_item_ids,metrics): + """ + Building a report description, to be passed as a parameter to the Reporting API. + Documentation: + - https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-multiple-breakdowns.md + - https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md + """ + + rep_desc = { + "rsid": self.report_suite_id, + "globalFilters": [{"type": "dateRange","dateRange": self.build_date_range()}], + "metricContainer": {}, + "dimension": "variables/{}".format(self.dimensions[len(breakdown_item_ids)]), + "settings": {"countRepeatInstances": "true","limit":"500"} + } + + rep_desc = add_metrics_container_to_report_description( + rep_desc = rep_desc, + dimensions = self.dimensions, + breakdown_item_ids = breakdown_item_ids, + metrics = metrics + ) + + return rep_desc + + def get_report_page(self,rep_desc,page_nb=0): + """ + Getting a single report page, and returning it into a raw JSON format. + """ + + rep_desc["settings"]["page"] = page_nb + + response = requests.post( + "https://analytics.adobe.io/api/{}/reports".format(self.jwt_client.global_company_id), + headers = build_request_headers(self.jwt_client), + data = json.dumps(rep_desc) + ) + + return response.json() + + def get_parsed_report(self,rep_desc,metrics,parent_dim_parsed): + """ + Iterating over report pages, parsing them, and returning a list of iterators, + containing dictonnary-formatted records: {dimension: value, metric: value} + + The parent_dim_parsed argument (a dictionnary: {dimension: value}) + should be passed if the request includes multiple dimension breakdowns, + so that we can add their values to output records. + """ + + logging.info(f"Getting report: {rep_desc}") + + first_response = self.get_report_page(rep_desc=rep_desc,page_nb=0) + all_responses = [parse_response(first_response,metrics,parent_dim_parsed)] + + if first_response['totalPages']>1: + for page_nb in range(1,first_response['totalPages']): + next_response = reader.get_report_page(rep_desc=rep_desc,page_nb=page_nb) + all_responses += [parse_response(next_response,metrics,parent_dim_parsed)] + + return chain(*all_responses) + + def get_node_values(self,breakdown_item_ids): + """ + Extracting dimension values from a full report response (all pages), + and returning them into a dictionnary of nodes: {name_itemId: value} + For instance: {'daterangeday_1200001': 'Jan 1, 2020'} + """ + + rep_desc = self.build_report_description(breakdown_item_ids=breakdown_item_ids,metrics=["visits"]) + first_response = self.get_report_page(rep_desc=rep_desc,page_nb=0) + node_values = get_node_values_from_response(first_response) + + if first_response['totalPages']>1: + for page_nb in range(1,first_response['totalPages']): + node_values += get_node_values_from_response(reader.get_report_page(rep_desc=rep_desc,page_nb=page_nb)) + + return node_values + + def add_child_nodes_to_graph(self,graph,node,path_to_node): + """ + Adding child nodes to Adobe graph, at two levels: + parent_node: [child_node_0, child_node_1, child_node_2] + child_node_0: [] + child_node_1: [] + child_node_2: [] + """ + + breakdown_item_ids = get_item_ids_from_nodes(path_to_node) + child_node_values = self.get_node_values(breakdown_item_ids) + self.node_values.update(child_node_values) + + graph[node] = [n for n in child_node_values] + for n in child_node_values: + graph[n] = [] + + return graph + + def read(self,graph=None,node=None): + """ + Exploring Adobe graph using a DFS (Deep-First-Search) algorithm. + """ + + global visited + global path_to_node + + if graph: + + # If remaining node children to explore: add node children to graph + if len(path_to_node) < len(self.dimensions)-1: + + graph = self.add_child_nodes_to_graph(graph,node,path_to_node) + + # If no remaining node children to explore: get report + if len(path_to_node) == len(self.dimensions)-1: + + parent_dim_parsed = {node.split("_")[0]:self.node_values[node] for node in path_to_node} + breakdown_item_ids = get_item_ids_from_nodes(path_to_node) + rep_desc = self.build_report_description(breakdown_item_ids=breakdown_item_ids,metrics=self.metrics) + data = self.get_parsed_report(rep_desc=rep_desc,metrics=self.metrics,parent_dim_parsed=parent_dim_parsed) + + def result_generator(): + yield from data + + yield JSONStream("adobe_results", result_generator()) + + else: + # Create graph and add first level of nodes + graph, node, path_to_node, visited = {}, "root", [], [] + graph = self.add_child_nodes_to_graph(graph=graph,node=node,path_to_node=path_to_node) + + # Add node to visited + if node not in visited: + visited.append(node) + + # Update unvisited_childs + unvisited_childs = [child_node for child_node in graph[node] if child_node not in visited] + + # Read through node children + for child_node in unvisited_childs: + path_to_node.append(child_node) + yield from self.read(graph=graph,node=child_node) + path_to_node.remove(child_node) + + # Remove local_root_node children from visited + if path_to_node != []: + local_root_node = path_to_node[-1] + visited = [n for n in visited if n not in graph[local_root_node]] \ No newline at end of file From 2ed39c6ea23e18661b8c705ad7319d980e471350 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 4 May 2020 14:30:10 +0200 Subject: [PATCH 02/46] AdobeReader migration to 2.0 API v0 --- nck/helpers/adobe_helper_2_0.py | 2 +- nck/readers/adobe_reader_2_0.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py index 3435ce79..badd4235 100644 --- a/nck/helpers/adobe_helper_2_0.py +++ b/nck/helpers/adobe_helper_2_0.py @@ -12,7 +12,7 @@ def build_request_headers(jwt_client): "x-proxy-global-company-id": jwt_client.global_company_id } -def add_metrics_container_to_report_description(rep_desc,dimensions,breakdown_item_ids,metrics): +def add_metric_container_to_report_description(rep_desc,dimensions,breakdown_item_ids,metrics): """ Filling the metricContainer section of a report description: - Creates 1 filter per dimension breakdown x metric diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index d4ce5dc9..00b6e66d 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -11,7 +11,7 @@ from nck.streams.json_stream import JSONStream from nck.helpers.adobe_helper_2_0 import ( build_request_headers, - add_metrics_container_to_report_description, + add_metric_container_to_report_description, get_node_values_from_response, get_item_ids_from_nodes, parse_response @@ -82,7 +82,7 @@ def build_report_description(self,breakdown_item_ids,metrics): "settings": {"countRepeatInstances": "true","limit":"500"} } - rep_desc = add_metrics_container_to_report_description( + rep_desc = add_metric_container_to_report_description( rep_desc = rep_desc, dimensions = self.dimensions, breakdown_item_ids = breakdown_item_ids, From ad471bd04eda3ab0ad8095d7c622a9ad8b6b5385 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 11 May 2020 17:42:49 +0200 Subject: [PATCH 03/46] FacebookReader refactoring V0 --- nck/helpers/facebook_helper.py | 269 ++++++++++------------ nck/readers/README.md | 131 +++++++++-- nck/readers/__init__.py | 4 +- nck/readers/facebook_reader.py | 315 +++++++++++++++----------- nck/streams/normalized_json_stream.py | 5 + 5 files changed, 411 insertions(+), 313 deletions(-) diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index 8dc822f2..1495ef72 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -15,160 +15,125 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -from facebook_business.adobjects.adsinsights import AdsInsights - -BREAKDOWNS_POSSIBLE_VALUES = [v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__")] - -ACTION_BREAKDOWNS_POSSIBLE_VALUES = [ - v for k, v in AdsInsights.ActionBreakdowns.__dict__.items() if not k.startswith("__") -] - -AD_OBJECT_TYPES = ["adaccount", "campaign", "adset", "ad", "user"] -LEVELS_POSSIBLE_VALUES = ["ad", "adset", "campaign", "account"] - -CMP_POSSIBLE_VALUES = [ - "account_id", - "adlabels", - "bid_strategy", - "boosted_object_id", - "brand_lift_studies", - "budget_rebalance_flag", - "budget_remaining", - "buying_type", - "can_create_brand_lift_study", - "can_use_spend_cap", - "configured_status", - "created_time", - "daily_budget", - "effective_status", - "id", - "issues_info", - "last_budget_toggling_time", - "lifetime_budget", - "name", - "objective", - "pacing_type", - "promoted_object", - "recommendations", - "source_campaign", - "source_campaign_id", - "spend_cap", - "start_time", - "status", - "stop_time", - "topline_id", - "updated_time", -] +from facebook_business.adobjects.adsinsights import AdsInsights -# should have done this list comprehension selection but -# some of the fields are obsolet and doesn't work, i took the most important -# ADS_POSSIBLE_VALUES = [v for k,v in AdSet.Field.__dict__.items() if not k.startswith("__")] -ADS_POSSIBLE_VALUES = [ - "account_id", - "adlabels", - "asset_feed_id", - "budget_remaining", - "campaign", - "campaign_id", - "configured_status", - "created_time", - "creative_sequence", - "daily_budget", - "end_time", - "lifetime_budget", - "lifetime_imps", - "lifetime_min_spend_target", - "lifetime_spend_cap", - "name", - "pacing_type", - "source_adset", - "source_adset_id", - "start_time", - "status", -] +FACEBOOK_OBJECTS = ["creative", "ad", "adset", "campaign", "account"] DATE_PRESETS = [v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__")] -DESIRED_FIELDS = { - "date_start": "date_start", - "date_stop": "date_stop", - "account_name": "account_name", - "account_id": "account_id", - "ad_id": "ad_id", - "ad_name": "ad_name", - "adset_id": "adset_id", - "adset_name": "adset_name", - "campaign_id": "campaign_id", - "campaign_name": "campaign_name", - "clicks": "clicks", - "link_clicks": "inline_link_clicks", - "outbound_clicks": ("outbound_clicks", "outbound_click"), - "impressions": "impressions", - "post_engagement": ("actions", "post_engagement"), - "purchases": ("actions", "omni_purchase"), - "website_purchases": ("actions", "offsite_conversion.fb_pixel_purchase"), - "purchases_conversion_value": ("action_values", "offsite_conversion.fb_pixel_purchase"), - "website_purchases_conversion_value": ("action_values", "omni_purchase"), - "website_purchase_roas": ("website_purchase_roas", "offsite_conversion.fb_pixel_purchase"), - "objective": "objective", - "reach": "reach", - "spend": "spend", - "video_plays_3s": ("actions", "video_view"), - "video_plays": ("video_play_actions", "video_view"), - "video_plays_100p": ("video_p100_watched_actions", "video_view"), - "video_plays_95p": ("video_p95_watched_actions", "video_view"), - "video_plays_75p": ("video_p75_watched_actions", "video_view"), - "video_plays_50p": ("video_p50_watched_actions", "video_view"), - "video_plays_25p": ("video_p25_watched_actions", "video_view"), - "age": "age", - "gender": "gender", - "account_currency": "account_currency", - "frequency": "frequency", - "buying_type": "buying_type", - "video_p100_watched_actions": "video_p100_watched_actions", - "video_p75_watched_actions": "video_p75_watched_actions", - "video_p25_watched_actions": "video_p25_watched_actions", - "video_p50_watched_actions": "video_p50_watched_actions", - "video_thruplay_watched_actions": "video_thruplay_watched_actions", - "conversions": "conversions", - "status": "status", - "lifetime_budget": "lifetime_budget", - "budget_remaining": "budget_remaining", - "name": "name", - "id": "id", - "start_time": "start_time", - "stop_time": "end_time", - "daily_budget": "daily_budget", - "device_platform": "device_platform", - "platform_position": "platform_position", - "publisher_platform": "publisher_platform", - "impression_device": "impression_device", - "link_url_asset": {"value": "website_url"}, -} - - -def get_field_value(row, field): - if is_url_asset(field): - return extract_special_field(row, field) - return ( - row.get(DESIRED_FIELDS[field], None) - if isinstance(DESIRED_FIELDS[field], str) - else get_nested_field_value(row, field) - ) - - -def extract_special_field(row, field): - dic = DESIRED_FIELDS[field] - return row.get(field, {}).get(dic.get("value"), None) - - -def is_url_asset(field): - return field == "link_url_asset" - - -def get_nested_field_value(row, field): - if DESIRED_FIELDS[field][0] not in row: - return None - nested_field = next((x for x in row[DESIRED_FIELDS[field][0]] if x["action_type"] == DESIRED_FIELDS[field][1]), {}) - return nested_field["value"] if nested_field else None +BREAKDOWNS = [v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__")] + +ACTION_BREAKDOWNS = [v for k, v in AdsInsights.ActionBreakdowns.__dict__.items() if not k.startswith("__")] + +def get_action_breakdown_filters(field_path): + """ + Extracts action breakdown filters from a field path, + and returns them as a dictionnary. + + For instance: + 'actions[action_type:video_view][action_type:post_engagement][action_device:iphone]' + returns: + {'action_type':['video_view','post_engagement'], + 'action_device':['iphone']} + """ + filters = {} + for path_item in field_path: + if ":" in path_item: + action_breakdown = path_item.split(":")[0] + action_breakdown_value = path_item.split(":")[1] + if action_breakdown not in filters: + filters[action_breakdown] = [action_breakdown_value] + else: + filters[action_breakdown].append(action_breakdown_value) + return filters + +def format_field_path(field_path): + """ + Formats a field_path back into a field. + + For instance: + ['actions', 'action_type:post_engagement'] + returns: + 'actions[action_type:post_engagement]' + """ + if len(field_path)==1: + return field_path[0] + else: + return "".join([field_path[0]] + ["[{}]".format(element) for element in field_path[1:]]) + +def check_if_obj_meets_action_breakdown_filters(obj, filters): + """ + Checks if a nested action breakdown object + meets the conditions defined by action breakdown filters. + + For instance, if action breakdown filters are: + {'action_type': ['post_engagement', 'video_view'] + 'action_device': ['iphone']} + Outputs will be: + - {'action_type':'post_engagement', 'action_device':'iphone', 'value':12}: True + - {'action_type':'video_view', 'action_device':'iphone', 'value':12}: True + - {'action_type':'post_engagement', 'action_device':'desktop', 'value':12}: False + """ + obj_meets_all_filters = True + for action_breakdown in filters: + obj_meets_filter = obj[action_breakdown] in filters[action_breakdown] + obj_meets_all_filters = obj_meets_all_filters and obj_meets_filter + if obj_meets_all_filters is False: + break + return obj_meets_all_filters + +def get_action_breakdown_value(obj, visited, action_breakdowns): + """ + Extracts the action breakdown value + of a nested action breakdown object. + + For instance: + {actions: [{'action_type':'video_view', 'action_device':'iphone', 'value':'12'}]} + Here, the nested action_breakdown object is: + {'action_type':'video_view', 'action_device':'iphone', 'value':'12'} + returns: + {'actions[action_type:video_view][action_device:iphone]': '12'} + """ + obj_action_breakdown = ["{}:{}".format(action_breakdown,obj[action_breakdown]) for action_breakdown in action_breakdowns if action_breakdown in obj] + return {format_field_path(visited + obj_action_breakdown): obj["value"]} + +def get_all_action_breakdown_values(resp_obj, visited, action_breakdowns, filters=None): + """ + Extracts action breakdown values from a list of nested action breakdown objects, + only if they meet the conditions defined by action breakdown filters. + """ + action_breakdown_values = {} + for obj in resp_obj: + if filters: + if check_if_obj_meets_action_breakdown_filters(obj, filters): + action_breakdown_values = {**action_breakdown_values, **get_action_breakdown_value(obj, visited, action_breakdowns)} + else: + action_breakdown_values = {**action_breakdown_values, **get_action_breakdown_value(obj, visited, action_breakdowns)} + return action_breakdown_values + +def get_field_values(resp_obj, field_path, action_breakdowns, visited=None): + """ + Recursive function extracting (and formating) the values + of a requested field from an API response and a field path. + """ + path_item = field_path[0] + remaining_path_items = len(field_path)-1 + + if visited is None: + visited = [path_item] + else: + visited.append(path_item) + + if path_item in resp_obj: + if remaining_path_items == 0: + if isinstance(resp_obj[path_item],str): + return {format_field_path(visited): resp_obj[path_item]} + if isinstance(resp_obj[path_item],list): + return get_all_action_breakdown_values(resp_obj[path_item], visited, action_breakdowns) + else: + return get_field_values(resp_obj[path_item], field_path[1:], action_breakdowns, visited) + else: + if all(":" in f for f in field_path): + filters = get_action_breakdown_filters(field_path) + return get_all_action_breakdown_values(resp_obj, visited[:-1], action_breakdowns, filters) \ No newline at end of file diff --git a/nck/readers/README.md b/nck/readers/README.md index fd730835..35477830 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -10,46 +10,127 @@ Each reader role is to read data from external source and transform it into a St 4. Reference click command into [commands list](./__init__.py) 5. Update current README.md - ## Facebook Reader -- Example +#### Quickstart + +The Facebook Reader handles calls to 2 endpoints of the Facebook Marketing API: **Facebook Ad Insights** (to retrieve performance data), and **Facebook Object Nodes** (to retrieve configuration data). + +*Example of Facebook Ad Insights Request* +``` +python nck/entrypoint.py read_facebook --facebook-access-token --facebook-object-id --facebook-breakdown age --facebook-breakdown gender --facebook-action-breakdown action_type --facebook-field ad_id --facebook-field ad_name --facebook-field impressions --facebook-field clicks --facebook-field actions[action_type:post_engagement] --facebook-field actions[action_type:video_view] --facebook-field age --facebook-field gender --facebook-time-increment 1 --facebook-start-date 2020-01-01 --facebook-end-date 2020-01-03 write_console +``` + +*Example of Facebook Object Node Request* +``` +python nck/entrypoint.py read_facebook --facebook-access-token --facebook-object-id --facebook-ad-insights False --facebook-level ad --facebook-field id --facebook-field creative[id] --facebook-add-date-to-report True --facebook-start-date 2020-01-01 --facebook-end-date 2019-01-01 write_console +``` -The following command retrieves some insights of every Ads in the Facebook account thanks to -a Facebook App whose access_token is . +#### Parameters +|CLI option|Documentation| +|:--|:--| +|`--facebook-app-id`|**[Not mandatory if Facebook Access Token is provided]** Facebook App ID.| +|`--facebook-app-secret`|**[Not mandatory if Facebook Access Token is provided]** Facebook App Secret.| +|`--facebook-access-token`|Facebook App Access Token.| +|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Supported values: creative (available only for Facebook Object Nodes requests), ad (default), adset, campaign, account.*| +|`--facebook-object-id`|ID of the root Facebook Object used to make the request.| +|`--facebook-level`|Granularity of the API response. *Supported values: creative (available only for Facebook Object Nodes requests), ad (default), adset, campaign or account.*| +|`--facebook-ad-insights`|*True* (default) if *Facebook Ad Insights* request, *False* if *Facebook Object Nodes* request.| +|`--facebook-field`|Fields to be retrieved.| +|`--facebook-start-date`|**[Specific to *Facebook Ad Insights* Requests, and to *Facebook Object Nodes* requests at the Campaign, Adset and Ad levels]** Start date of the requested time range.| +|`--facebook-end-date`|**[Specific to *Facebook Ad Insights* Requests, and to *Facebook Object Nodes* requests at the Campaign, Adset and Ad levels]** End date of the requested time range.| +|`--facebook-date-preset`|**[Specific to *Facebook Ad Insights* Requests, and to *Facebook Object Nodes* requests at the Campaign, Adset and Ad levels]** Relative time range. Ignored if a specific *--facebook-start date* and *--facebook-end-date* are specified.| +|`--facebook-time-increment`|**[Specific to *Facebook Ad Insights* Requests, and to *Facebook Object Nodes* requests at the Campaign, Adset and Ad levels]** Cuts the results between smaller time slices within the specified time range.| +|`--facebook-add-date-to-report`|**[Prefered ]** *True* if you wish to add the date of the request to each response record, *False* otherwise (default).| +|`--facebook-breakdown`|**[Specific to *Facebook Ad Insights* Requests]** How to break down the result.| +|`--facebook-action-breakdown`|**[Specific to *Facebook Ad Insights* Requests]** How to break down action results.| + +#### Additional details for a relevant use of the Facebook Reader + +1. Select the appropriate `--facebook-level` + +|If Facebook Object Type is...|Facebook Level can be...| +|:--|:--| +|`account`|*account, campaign, adset, ad, creative*| +|`campaign`|*campaign, adset, ad*| +|`adset`|*adset, ad, creative*| +|`ad`|*ad, creative*| +|`creative`|*creative*| + +2. Format Facebook Reader response using `--facebook-fields` + +2.1. The list of **applicable fields** can be found on the links below: + +- **Facebook Ad Insights Request**: [all fields](https://developers.facebook.com/docs/marketing-api/insights/parameters/v7.0) +- **Facebook Object Nodes Request**: [Account-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-account), [Campaign-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign-group), [Adset-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign), [Ad-level fields](https://developers.facebook.com/docs/marketing-api/reference/adgroup), [Creative-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-creative) + +2.2. If you want to select **a nested field value**, simply indicate the path to this value within the request field. + +*Facebook Reader Request* ``` -python nck/entrypoint.py read_facebook --facebook-access-token --facebook-ad-object-id --facebook-breakdown gender --facebook-level ad --facebook-start-date 2019-01-01 --facebook-end-date 2019-01-01 --facebook-field date_start --facebook-field date_stop --facebook-field account_currency --facebook-field account_id --facebook-field account_name --facebook-field ad_id --facebook-field ad_name --facebook-field adset_id --facebook-field adset_name --facebook-field campaign_id --facebook-field campaign_name --facebook-field clicks --facebook-field impressions --facebook-desired-field date_start --facebook-desired-field date_stop --facebook-desired-field account_name --facebook-desired-field account_id --facebook-desired-field ad_id --facebook-desired-field ad_namefacebook-desired-field clicks --facebook-desired-field impressions write_console +--facebook-field object_story_spec[video_data][call_to_action][value][link] ``` -The report below is the output of the command. You can easily store it in GCS or Biquery thanks to the corresponding -writers([GCS writer](../writers/gcs_writer.py), [BQ writer](../writers/bigquery_writer.py)): +*API Response* +``` +"object_story_spec": { + "video_data": { + "call_to_action": { + "type": "LEARN_MORE", + "value": { + "link": "http://artefact.com", + "link_format": "VIDEO_LPP" + } + } + } +} +``` + +*Facebook Reader Response* ``` { - "date_start": "2019-01-05", - "date_stop": "2019-01-05", - "account_name": "example_name" - "account_id": "0000000000" - "ad_id": "00000000000", - "ad_name": "example_name", - "clicks": "1", - "impressions": "100" + "object_story_spec[video_data][call_to_action][value][link]": "https://www.artefact.com" } ``` -See the [documentation here](https://developers.facebook.com/docs/marketing-api/insights/#marketing-api-quickstart "Create a Facebook App") -to create a Facebook App and an access token. -- Parameters of the Facebook Readers +(2.3) **Action Breakdown filters** can be applied to the fields of ***Facebook Ad Insights* Requests** using the following syntax: `[:]`. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. -| --facebook-app-id | --facebook-app-secret | --facebook-access-token | --facebook-ad-object-id | --facebook-ad-object-type | --facebook-breakdown | --facebook-action-breakdown | --facebook-ad-insights | --facebook-level | --facebook-time-increment | --facebook-field | --facebook-desired-field | --facebook-start-date | --facebook-end-date | --facebook-date-preset | --facebook-request-date -|:-----------------:|:---------------------:|:-----------------------:|:-----------------------:|:-------------------------:|:--------------------:|:---------------------------:|:----------------------:|:-------------------:|:-------------------------:|:----------------:|:------------------------:|:---------------------:|:-------------------:|:----------------------:|:----------------------:| -|Facebook App ID |Facebook App ID| Facebook App access token|Object ID to request (account ID, campaign ID, ...)|Object type (account, campaign, ...)|List of breakdowns for the request|List of action-breakdowns for the request|If set to true, request insights|Represents the granularity of result|Time increment|List of fields to request|Desired fields in the output report |Start date of period|End date of period|Preset period|If set to true, the date of the request will appear in the report +*Facebook Reader Request* +``` +--facebook-action-breakdown action_type +--facebook-field actions[action_type:video_view][action_type:post_engagement] +``` -See the documents below for a better understanding of the parameters: -- [Facebook API Insights documentation](https://developers.facebook.com/docs/marketing-api/insights) -- [API Reference for Ad Insights](https://developers.facebook.com/docs/marketing-api/reference/adgroup/insights/) -- [Available Fields for Nautilus](../helpers/facebook_helper.py) +*API Response* +``` +"actions": [ + { + "action_type": "video_view", + "value": "17" + }, + { + "action_type": "link_click", + "value": "8" + }, + { + "action_type": "post_engagement", + "value": "25" + }, + { + "action_type": "page_engagement", + "value": "12" + } +] +``` +*Facebook Reader Response* +``` +{ + "actions[action_type:video_view]": "17", + "actions[action_type:post_engagement]": "25", +} +``` ## Google Readers diff --git a/nck/readers/__init__.py b/nck/readers/__init__.py index 7d9993bb..0cc93eea 100644 --- a/nck/readers/__init__.py +++ b/nck/readers/__init__.py @@ -25,7 +25,7 @@ from nck.readers.oracle_reader import oracle from nck.readers.gsheets_reader import gsheets from nck.readers.salesforce_reader import salesforce -from nck.readers.facebook_reader import facebook_marketing +from nck.readers.facebook_reader import facebook from nck.readers.dbm_reader import dbm from nck.readers.dcm_reader import dcm from nck.readers.ga_reader import ga @@ -43,7 +43,7 @@ google_ads, s3, sa360_reader, - facebook_marketing, + facebook, oracle, dbm, dcm, diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 3e0f4fc3..21d41e08 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -15,10 +15,11 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + import logging import click -from itertools import chain +import re from click import ClickException from datetime import datetime @@ -28,73 +29,73 @@ from nck.utils.retry import retry from nck.streams.normalized_json_stream import NormalizedJSONStream from nck.helpers.facebook_helper import ( - AD_OBJECT_TYPES, - BREAKDOWNS_POSSIBLE_VALUES, - LEVELS_POSSIBLE_VALUES, + FACEBOOK_OBJECTS, DATE_PRESETS, - DESIRED_FIELDS, - get_field_value, + BREAKDOWNS, + ACTION_BREAKDOWNS, + get_action_breakdown_filters, + get_field_values, + format_field_path ) from facebook_business.api import FacebookAdsApi from facebook_business.adobjects.adaccount import AdAccount -from facebook_business.adobjects.adset import AdSet from facebook_business.adobjects.campaign import Campaign +from facebook_business.adobjects.adset import AdSet +from facebook_business.adobjects.ad import Ad +from facebook_business.adobjects.adcreative import AdCreative DATEFORMAT = "%Y-%m-%d" +OBJECT_CREATION_MAPPING = { + "account": AdAccount, + "campaign": Campaign, + "adset": AdSet, + "ad": Ad, + "creative": AdCreative +} + +EDGE_MAPPING = { + "account": ["campaign", "adset", "ad", "creative"], + "campaign": ["adset", "ad"], + "adset": ["ad", "creative"], + "ad": ["creative"] +} def check_object_id(ctx, param, values): try: [int(value) for value in values] return values except ValueError: - raise ClickException("Wrong format. Account ID should only contains digits") - + raise ClickException("Wrong format. Ad object IDs should only contains digits.") @click.command(name="read_facebook") @click.option("--facebook-app-id", default="", help="Not mandatory for AdsInsights reporting if access-token provided") +@click.option("--facebook-app-secret", default="", help="Not mandatory for AdsInsights reporting if access-token provided") +@click.option("--facebook-access-token", required=True) +@click.option("--facebook-object-id", required=True, multiple=True, callback=check_object_id) +@click.option("--facebook-object-type",type=click.Choice(FACEBOOK_OBJECTS),default="account") +@click.option("--facebook-level", type=click.Choice(FACEBOOK_OBJECTS), default="ad", help="Granularity of result") @click.option( - "--facebook-app-secret", default="", help="Not mandatory for AdsInsights reporting if access-token provided" + "--facebook-ad-insights", + type=click.BOOL, + default=True, + help="https://developers.facebook.com/docs/marketing-api/insights", ) -@click.option("--facebook-access-token", required=True) -@click.option("--facebook-ad-object-id", required=True, multiple=True, callback=check_object_id) -@click.option("--facebook-ad-object-type", type=click.Choice(AD_OBJECT_TYPES), default=AD_OBJECT_TYPES[0]) @click.option( "--facebook-breakdown", multiple=True, - type=click.Choice(BREAKDOWNS_POSSIBLE_VALUES), + type=click.Choice(BREAKDOWNS), help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns/", ) -# At this time, the Facebook connector only handle the action-breakdown "action_type" @click.option( "--facebook-action-breakdown", multiple=True, - type=click.Choice("action_type"), - default=["action_type"], + type=click.Choice(ACTION_BREAKDOWNS), help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns#actionsbreakdown", ) -@click.option( - "--facebook-ad-insights", - type=click.BOOL, - default=True, - help="https://developers.facebook.com/docs/marketing-api/insights", -) -@click.option( - "--facebook-level", - type=click.Choice(LEVELS_POSSIBLE_VALUES), - default=LEVELS_POSSIBLE_VALUES[0], - help="Represents the granularity of result", -) +@click.option("--facebook-field", multiple=True, help="API fields, following Artefact format") @click.option("--facebook-time-increment") -@click.option("--facebook-field", multiple=True, help="Facebook API fields for the request") -@click.option( - "--facebook-desired-field", - multiple=True, - type=click.Choice(list(DESIRED_FIELDS.keys())), - help="Desired fields to get in the output report." - "https://developers.facebook.com/docs/marketing-api/insights/parameters/v5.0#fields", -) @click.option("--facebook-start-date", type=click.DateTime()) @click.option("--facebook-end-date", type=click.DateTime()) @click.option("--facebook-date-preset", type=click.Choice(DATE_PRESETS)) @@ -105,29 +106,23 @@ def check_object_id(ctx, param, values): help="If set to true, the date of the request will appear in the report", ) @processor("facebook_app_secret", "facebook_access_token") -def facebook_marketing(**kwargs): - # Should add later all the check restrictions on fields/parameters/breakdowns of the API following the value of - # object type, see more on : - # ---https://developers.facebook.com/docs/marketing-api/insights/breakdowns - # ---https://developers.facebook.com/docs/marketing-api/insights - return FacebookMarketingReader(**extract_args("facebook_", kwargs)) +def facebook(**kwargs): + return FacebookReader(**extract_args("facebook_", kwargs)) - -class FacebookMarketingReader(Reader): +class FacebookReader(Reader): def __init__( self, app_id, app_secret, access_token, - ad_object_id, - ad_object_type, + object_id, + object_type, + level, + ad_insights, breakdown, action_breakdown, - ad_insights, - level, - time_increment, field, - desired_field, + time_increment, start_date, end_date, date_preset, @@ -136,125 +131,177 @@ def __init__( self.app_id = app_id self.app_secret = app_secret self.access_token = access_token - self.ad_object_ids = ad_object_id - self.ad_object_type = ad_object_type + + self.object_ids = object_id + self.object_type = object_type + self.level = level + + self.ad_insights = ad_insights self.breakdowns = list(breakdown) self.action_breakdowns = list(action_breakdown) - self.ad_insights = ad_insights - self.level = level - self.time_increment = time_increment or False self.fields = list(field) - self.desired_fields = list(desired_field) + self._field_paths = [re.split(r"[][]+",f.strip("]")) for f in self.fields] + self._api_fields = list({f[0] for f in self._field_paths if f[0] not in self.breakdowns}) + + self.time_increment = time_increment or False self.start_date = start_date self.end_date = end_date self.date_preset = date_preset self.add_date_to_report = add_date_to_report - @retry - def run_query_on_fb_account_obj(self, params, ad_object_id): - account = AdAccount("act_" + ad_object_id) - for el in account.get_insights(params=params): - yield el + # Check input parameters - @retry - def run_query_on_fb_account_obj_conf(self, params, ad_object_id): - if ad_object_id.startswith("act_"): - raise ClickException("Wrong format. Account ID should only contains digits") - account = AdAccount("act_" + ad_object_id) - campaigns = account.get_campaigns() - for el in chain( - *[self.run_query_on_fb_campaign_obj_conf(params, campaign.get("id")) for campaign in campaigns] - ): - yield el + if (self.level != self.object_type) and (self.level not in EDGE_MAPPING[self.object_type]): + raise ClickException("Wrong query. Asked level ({}) is not compatible with object type ({}). Please choose level from: {}".format(self.level,self.object_type,[self.object_type]+EDGE_MAPPING[self.object_type])) - @retry - def run_query_on_fb_campaign_obj_conf(self, params, ad_object_id): - campaign = Campaign(ad_object_id) - if self.level == LEVELS_POSSIBLE_VALUES[2]: - val_cmp = campaign.api_get(fields=self.desired_fields, params=params) - yield val_cmp - - elif self.level == LEVELS_POSSIBLE_VALUES[1]: - for el in chain( - *[self.run_query_on_fb_adset_obj_conf(params, adset.get("id")) for adset in campaign.get_ad_sets()] - ): - yield el - else: - raise ClickException( - "Received level: " + self.level + ". Available levels are " + repr(LEVELS_POSSIBLE_VALUES[1:3]) - ) + if self.ad_insights: - @retry - def run_query_on_fb_adset_obj_conf(self, params, ad_object_id, level): - adset = AdSet(ad_object_id) - if level == LEVELS_POSSIBLE_VALUES[1]: - val_adset = adset.api_get(fields=self.desired_fields, params=params) - yield val_adset - else: - raise ClickException("Adset setup is available at 'adset' level. Received level: " + self.level) + if self.level == "creative" or self.object_type == 'creative': + raise ClickException("Wrong query. The 'creative' level is not available in AdInsights queries. Accepted levels: {}".format(FACEBOOK_OBJECTS[1:])) + + missing_breakdowns = {f[0] for f in self._field_paths if (f[0] in BREAKDOWNS) and (f[0] not in self.breakdowns)} + if missing_breakdowns != set(): + raise ClickException("Wrong query. Please add to Breakdowns: {}".format(missing_breakdowns)) + + missing_action_breakdowns = {flt for f in self._field_paths for flt in get_action_breakdown_filters(f) + if flt not in self.action_breakdowns} + if missing_action_breakdowns != set(): + raise ClickException("Wrong query. Please add to Action Breakdowns: {}".format(missing_action_breakdowns)) + + elif not self.ad_insights and (self.breakdowns!=[] or self.action_breakdowns!=[]): + raise ClickException("Wrong query. Facebook Object Node queries do not accept Breakdowns nor Action Breakdowns.") def get_params(self): - params = { - "action_breakdowns": self.action_breakdowns, - "fields": self.fields, - "breakdowns": self.breakdowns, - "level": self.level, - } - self.add_period_to_parameters(params) + """ + Build the request parameters that will be sent to the API: + - If AdInsights query: all levels accept parameters + - If Facebook Object Node query: only the Campaign, AdSet or Ad objects accept parameters + """ + params = {} + + if self.ad_insights: + + params["breakdowns"] = self.breakdowns + params["action_breakdowns"] = self.action_breakdowns + params["level"] = self.level + self.add_period_to_params(params) + + else: + if self.level in ["campaign","adset","ad"]: + self.add_period_to_params(params) + return params - def add_period_to_parameters(self, params): + def add_period_to_params(self, params): + """ + Adding the time_increment, time_range and/or date_preset keys to parameters. + """ if self.time_increment: params["time_increment"] = self.time_increment if self.start_date and self.end_date: - logging.info("Date format used for request : start_date and end_date") - params["time_range"] = self.create_time_range(self.start_date, self.end_date) + logging.info("Date format used for request: start_date and end_date") + params["time_range"] = self.create_time_range() elif self.date_preset: - logging.info("Date format used for request : date_preset") + logging.info("Date format used for request: date_preset") params["date_preset"] = self.date_preset else: logging.warning("No date range provided - Last 30 days by default") - logging.warning( - "https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters" - ) + logging.warning("https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters") + + def create_time_range(self): + return {"since": self.start_date.strftime(DATEFORMAT), "until": self.end_date.strftime(DATEFORMAT)} + + def create_object(self, object_id): + """ + Create a Facebook object based on the provided object_type and object_id. + """ + if self.object_type == "account": + object_id = "act_" + object_id + obj = OBJECT_CREATION_MAPPING[self.object_type](object_id) - @staticmethod - def create_time_range(start_date, end_date): - return {"since": start_date.strftime(DATEFORMAT), "until": end_date.strftime(DATEFORMAT)} + return obj + + @retry + def query_ad_insights(self, fields, params, object_id): + """ + AdInsights documentation: + https://developers.facebook.com/docs/marketing-api/insights + """ + # Step 1 - Create Facebook object + obj = self.create_object(object_id) + + # Step 2 - Run AdInsights query on Facebook object + for element in obj.get_insights(fields=fields, params=params): + yield element + + @retry + def query_object_node(self, fields, params, object_id): + """ + Supported Facebook Object Nodes: AdAccount, Campaign, AdSet, Ad and AdCreative + Documentation: https://developers.facebook.com/docs/marketing-api/reference/ + """ + # Step 1 - Create Facebook object + obj = self.create_object(object_id) + + # Step 2 - Run Facebook Object Node query on the Facebook object itself, + # or on one of its edges (depending on the specified level) + if self.level == self.object_type: + yield obj.api_get(fields=fields, params=params) + else: + EDGE_QUERY_MAPPING = { + "campaign": obj.get_campaigns(), + "adset": obj.get_ad_sets(), + "ad": obj.get_ads(), + "creative": obj.get_ad_creatives() + } + edge_objs = EDGE_QUERY_MAPPING[self.level] + for element in [edge_obj.api_get(fields=fields, params=params) for edge_obj in edge_objs]: + yield element def format_and_yield(self, record): - report = {field: get_field_value(record, field) for field in self.desired_fields} + """ + Parse a single record into an {item: value} dictionnary. + """ + report = {} + + for field_path in self._field_paths: + field_values = get_field_values(record, field_path, self.action_breakdowns, visited=None) + if field_values: + report = {**report, **field_values} + if self.add_date_to_report: report["date"] = datetime.today().strftime(DATEFORMAT) + yield report def result_generator(self, data): + """ + Parse all records into an {item: value} dictionnary. + """ for record in data: yield from self.format_and_yield(record.export_all_data()) - - def get_data(self): - for object_id in self.ad_object_ids: - yield from self.get_data_for_object(object_id) - - def get_data_for_object(self, ad_object_id): + + def get_data_for_object(self, object_id): + """ + Run an API query (AdInsights or Facebook Object Node) on a single object_id. + """ params = self.get_params() + if self.ad_insights: - query_mapping = {AD_OBJECT_TYPES[0]: self.run_query_on_fb_account_obj} + data = self.query_ad_insights(self._api_fields, params, object_id) else: - query_mapping = { - AD_OBJECT_TYPES[0]: self.run_query_on_fb_account_obj_conf, - AD_OBJECT_TYPES[1]: self.run_query_on_fb_campaign_obj_conf, - AD_OBJECT_TYPES[2]: self.run_query_on_fb_adset_obj_conf, - } - try: - query = query_mapping[self.ad_object_type] - data = query(params, ad_object_id) - except KeyError: - raise ClickException("`{}` is not a valid adObject type".format(self.ad_object_type)) + data = self.query_object_node(self._api_fields, params, object_id) + yield from self.result_generator(data) + def get_data(self): + """ + Run API queries on all object_ids. + """ + for object_id in self.object_ids: + yield from self.get_data_for_object(object_id) + def read(self): + FacebookAdsApi.init(self.app_id, self.app_secret, self.access_token) - yield NormalizedJSONStream( - "results_" + self.ad_object_type + "_" + "_".join(self.ad_object_ids), self.get_data() - ) + yield NormalizedJSONStream("results_" + self.object_type + "_" + "_".join(self.object_ids), self.get_data()) \ No newline at end of file diff --git a/nck/streams/normalized_json_stream.py b/nck/streams/normalized_json_stream.py index 6dfa89e6..164b5e83 100644 --- a/nck/streams/normalized_json_stream.py +++ b/nck/streams/normalized_json_stream.py @@ -47,4 +47,9 @@ def _normalize_key(key): .replace(":", "_") .replace("/", "_") .replace("\\", "_") + .replace("][", "_") + .replace("[", "_") + .replace("]", "_") + .replace(".", "_") + .strip("_") ) From 5708da146428338970eac72456324c4b0dc0bb3a Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 11 May 2020 17:50:23 +0200 Subject: [PATCH 04/46] Fix README.md --- nck/readers/README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index 35477830..b4fb853e 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -30,21 +30,21 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- |CLI option|Documentation| |:--|:--| -|`--facebook-app-id`|**[Not mandatory if Facebook Access Token is provided]** Facebook App ID.| -|`--facebook-app-secret`|**[Not mandatory if Facebook Access Token is provided]** Facebook App Secret.| +|`--facebook-app-id`|Facebook App ID. *Not mandatory if Facebook Access Token is provided.*| +|`--facebook-app-secret`|Facebook App Secret. *Not mandatory if Facebook Access Token is provided.*| |`--facebook-access-token`|Facebook App Access Token.| |`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Supported values: creative (available only for Facebook Object Nodes requests), ad (default), adset, campaign, account.*| |`--facebook-object-id`|ID of the root Facebook Object used to make the request.| -|`--facebook-level`|Granularity of the API response. *Supported values: creative (available only for Facebook Object Nodes requests), ad (default), adset, campaign or account.*| +|`--facebook-level`|Granularity of the response. *Supported values: creative (available only for Facebook Object Nodes requests), ad (default), adset, campaign or account.*| |`--facebook-ad-insights`|*True* (default) if *Facebook Ad Insights* request, *False* if *Facebook Object Nodes* request.| |`--facebook-field`|Fields to be retrieved.| -|`--facebook-start-date`|**[Specific to *Facebook Ad Insights* Requests, and to *Facebook Object Nodes* requests at the Campaign, Adset and Ad levels]** Start date of the requested time range.| -|`--facebook-end-date`|**[Specific to *Facebook Ad Insights* Requests, and to *Facebook Object Nodes* requests at the Campaign, Adset and Ad levels]** End date of the requested time range.| -|`--facebook-date-preset`|**[Specific to *Facebook Ad Insights* Requests, and to *Facebook Object Nodes* requests at the Campaign, Adset and Ad levels]** Relative time range. Ignored if a specific *--facebook-start date* and *--facebook-end-date* are specified.| -|`--facebook-time-increment`|**[Specific to *Facebook Ad Insights* Requests, and to *Facebook Object Nodes* requests at the Campaign, Adset and Ad levels]** Cuts the results between smaller time slices within the specified time range.| -|`--facebook-add-date-to-report`|**[Prefered ]** *True* if you wish to add the date of the request to each response record, *False* otherwise (default).| -|`--facebook-breakdown`|**[Specific to *Facebook Ad Insights* Requests]** How to break down the result.| -|`--facebook-action-breakdown`|**[Specific to *Facebook Ad Insights* Requests]** How to break down action results.| +|`--facebook-start-date`|Start date of the requested time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Nodes requests at the Campaign, Adset and Ad levels.*| +|`--facebook-end-date`|End date of the requested time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Nodes requests at the Campaign, Adset and Ad levels.*| +|`--facebook-date-preset`|Relative time range. Ignored if *--facebook-start date* and *--facebook-end-date* are specified. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Nodes requests at the Campaign, Adset and Ad levels.*| +|`--facebook-time-increment`|Cuts the results between smaller time slices within the specified time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Nodes requests at the Campaign, Adset and Ad levels.*| +|`--facebook-add-date-to-report`|*True* if you wish to add the date of the request to each response record, *False* otherwise (default).| +|`--facebook-breakdown`|How to break down the result. *This parameter is only relevant for Facebook Ad Insights Requests.*| +|`--facebook-action-breakdown`|How to break down action results. *This parameter is only relevant for Facebook Ad Insights Requests.*| #### Additional details for a relevant use of the Facebook Reader From a695af83e38c9b8d9759723e116b8a011cc1058c Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 11 May 2020 17:53:24 +0200 Subject: [PATCH 05/46] Fix README.md --- nck/readers/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index b4fb853e..39dc368e 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -48,7 +48,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- #### Additional details for a relevant use of the Facebook Reader -1. Select the appropriate `--facebook-level` +**ADVICE #1: Make sure to select the appropriate `--facebook-level`** |If Facebook Object Type is...|Facebook Level can be...| |:--|:--| @@ -58,7 +58,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- |`ad`|*ad, creative*| |`creative`|*creative*| -2. Format Facebook Reader response using `--facebook-fields` +**ADVICE #2: Format Facebook Reader response using `--facebook-fields`** 2.1. The list of **applicable fields** can be found on the links below: @@ -94,7 +94,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- } ``` -(2.3) **Action Breakdown filters** can be applied to the fields of ***Facebook Ad Insights* Requests** using the following syntax: `[:]`. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. +(2.3) **Action Breakdown filters** can be applied to the fields of ***Facebook Ad Insights* Requests** using the following syntax: [:]. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. *Facebook Reader Request* ``` From 7e2a08a8911d983c2875f735d379d9b64f0b480a Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 11 May 2020 18:46:28 +0200 Subject: [PATCH 06/46] Fix README.md --- nck/readers/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index 39dc368e..59667ee7 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -79,7 +79,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- "call_to_action": { "type": "LEARN_MORE", "value": { - "link": "http://artefact.com", + "link": "http://www.artefact.com", "link_format": "VIDEO_LPP" } } From 3a2f1db1cea57355ef8acb61c795ae8755b8d840 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 12 May 2020 10:58:35 +0200 Subject: [PATCH 07/46] Adding input checks --- nck/readers/facebook_reader.py | 43 ++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 21d41e08..1279ed69 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -168,14 +168,22 @@ def __init__( if missing_action_breakdowns != set(): raise ClickException("Wrong query. Please add to Action Breakdowns: {}".format(missing_action_breakdowns)) - elif not self.ad_insights and (self.breakdowns!=[] or self.action_breakdowns!=[]): - raise ClickException("Wrong query. Facebook Object Node queries do not accept Breakdowns nor Action Breakdowns.") + else: + + if self.breakdowns!=[] or self.action_breakdowns!=[]: + raise ClickException("Wrong query. Facebook Object Node queries do not accept Breakdowns nor Action Breakdowns.") + + if self.level not in ["campaign","adset","ad"] and ((self.start_date and self.end_date) or self.date_preset): + raise ClickException("Wrong query. Facebook Object Node queries only accept the time_range and date_preset parameters at the 'campaign', 'adset' or 'ad' levels.") + + if self.time_increment: + raise ClickException("Wrong query. Facebook Object Node queries do not accept the time_increment parameter.") def get_params(self): """ Build the request parameters that will be sent to the API: - - If AdInsights query: all levels accept parameters - - If Facebook Object Node query: only the Campaign, AdSet or Ad objects accept parameters + - If AdInsights query: breakdown, action_breakdowns, level, time_range and date_preset + - If Facebook Object Node query at the campaign, adset or ad level: time_range and date_preset """ params = {} @@ -194,19 +202,24 @@ def get_params(self): def add_period_to_params(self, params): """ - Adding the time_increment, time_range and/or date_preset keys to parameters. + Add the time_increment, time_range and/or date_preset keys to parameters. + - time_increment: available in AdInsights queries + - time_range and date_preset: available in AdInsights queries, + and in Facebook Object Node queries at the campaign, adset or ad levels only """ - if self.time_increment: + if self.ad_insights and self.time_increment: params["time_increment"] = self.time_increment - if self.start_date and self.end_date: - logging.info("Date format used for request: start_date and end_date") - params["time_range"] = self.create_time_range() - elif self.date_preset: - logging.info("Date format used for request: date_preset") - params["date_preset"] = self.date_preset - else: - logging.warning("No date range provided - Last 30 days by default") - logging.warning("https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters") + + if self.ad_insights or self.level in ["campaign","adset","ad"]: + if self.start_date and self.end_date: + logging.info("Date format used for request: start_date and end_date") + params["time_range"] = self.create_time_range() + elif self.date_preset: + logging.info("Date format used for request: date_preset") + params["date_preset"] = self.date_preset + else: + logging.warning("No date range provided - Last 30 days by default") + logging.warning("https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters") def create_time_range(self): return {"since": self.start_date.strftime(DATEFORMAT), "until": self.end_date.strftime(DATEFORMAT)} From 3de6177930a9dd7ed3326fb5be0555ebdf0febf9 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 12 May 2020 11:01:02 +0200 Subject: [PATCH 08/46] Fix README.md typo --- nck/readers/README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index 59667ee7..eda41c0b 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -14,7 +14,7 @@ Each reader role is to read data from external source and transform it into a St #### Quickstart -The Facebook Reader handles calls to 2 endpoints of the Facebook Marketing API: **Facebook Ad Insights** (to retrieve performance data), and **Facebook Object Nodes** (to retrieve configuration data). +The Facebook Reader handles calls to 2 endpoints of the Facebook Marketing API: **Facebook Ad Insights** (to retrieve performance data), and **Facebook Object Node** (to retrieve configuration data). *Example of Facebook Ad Insights Request* ``` @@ -33,22 +33,22 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- |`--facebook-app-id`|Facebook App ID. *Not mandatory if Facebook Access Token is provided.*| |`--facebook-app-secret`|Facebook App Secret. *Not mandatory if Facebook Access Token is provided.*| |`--facebook-access-token`|Facebook App Access Token.| -|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Supported values: creative (available only for Facebook Object Nodes requests), ad (default), adset, campaign, account.*| +|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Supported values: creative (available only for Facebook Object Node requests), ad (default), adset, campaign, account.*| |`--facebook-object-id`|ID of the root Facebook Object used to make the request.| -|`--facebook-level`|Granularity of the response. *Supported values: creative (available only for Facebook Object Nodes requests), ad (default), adset, campaign or account.*| -|`--facebook-ad-insights`|*True* (default) if *Facebook Ad Insights* request, *False* if *Facebook Object Nodes* request.| +|`--facebook-level`|Granularity of the response. *Supported values: creative (available only for Facebook Object Node requests), ad (default), adset, campaign or account.*| +|`--facebook-ad-insights`|*True* (default) if *Facebook Ad Insights* request, *False* if *Facebook Object Node* request.| |`--facebook-field`|Fields to be retrieved.| -|`--facebook-start-date`|Start date of the requested time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Nodes requests at the Campaign, Adset and Ad levels.*| -|`--facebook-end-date`|End date of the requested time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Nodes requests at the Campaign, Adset and Ad levels.*| -|`--facebook-date-preset`|Relative time range. Ignored if *--facebook-start date* and *--facebook-end-date* are specified. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Nodes requests at the Campaign, Adset and Ad levels.*| -|`--facebook-time-increment`|Cuts the results between smaller time slices within the specified time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Nodes requests at the Campaign, Adset and Ad levels.*| +|`--facebook-start-date`|Start date of the requested time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Node requests at the Campaign, Adset and Ad levels.*| +|`--facebook-end-date`|End date of the requested time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Node requests at the Campaign, Adset and Ad levels.*| +|`--facebook-date-preset`|Relative time range. Ignored if *--facebook-start date* and *--facebook-end-date* are specified. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Node requests at the Campaign, Adset and Ad levels.*| +|`--facebook-time-increment`|Cuts the results between smaller time slices within the specified time range. *This parameter is only relevant for Facebook Ad Insights Requests, and Facebook Object Node requests at the Campaign, Adset and Ad levels.*| |`--facebook-add-date-to-report`|*True* if you wish to add the date of the request to each response record, *False* otherwise (default).| |`--facebook-breakdown`|How to break down the result. *This parameter is only relevant for Facebook Ad Insights Requests.*| |`--facebook-action-breakdown`|How to break down action results. *This parameter is only relevant for Facebook Ad Insights Requests.*| #### Additional details for a relevant use of the Facebook Reader -**ADVICE #1: Make sure to select the appropriate `--facebook-level`** +**#1: Make sure to select the appropriate `--facebook-level`** |If Facebook Object Type is...|Facebook Level can be...| |:--|:--| @@ -58,12 +58,12 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- |`ad`|*ad, creative*| |`creative`|*creative*| -**ADVICE #2: Format Facebook Reader response using `--facebook-fields`** +**#2: Format Facebook Reader response using `--facebook-fields`** 2.1. The list of **applicable fields** can be found on the links below: - **Facebook Ad Insights Request**: [all fields](https://developers.facebook.com/docs/marketing-api/insights/parameters/v7.0) -- **Facebook Object Nodes Request**: [Account-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-account), [Campaign-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign-group), [Adset-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign), [Ad-level fields](https://developers.facebook.com/docs/marketing-api/reference/adgroup), [Creative-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-creative) +- **Facebook Object Node Request**: [Account-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-account), [Campaign-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign-group), [Adset-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-campaign), [Ad-level fields](https://developers.facebook.com/docs/marketing-api/reference/adgroup), [Creative-level fields](https://developers.facebook.com/docs/marketing-api/reference/ad-creative) 2.2. If you want to select **a nested field value**, simply indicate the path to this value within the request field. From 17e1199214583ce55708c96ad84c1de0674d5c64 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 12 May 2020 20:30:28 +0200 Subject: [PATCH 09/46] Adding FacebookReader tests --- tests/readers/test_facebook_reader.py | 215 +++++++++++++++++--------- 1 file changed, 146 insertions(+), 69 deletions(-) diff --git a/tests/readers/test_facebook_reader.py b/tests/readers/test_facebook_reader.py index cd96685b..036ba961 100644 --- a/tests/readers/test_facebook_reader.py +++ b/tests/readers/test_facebook_reader.py @@ -15,88 +15,165 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + from unittest import TestCase, mock -from freezegun import freeze_time +from parameterized import parameterized +from click import ClickException -from nck.readers.facebook_reader import FacebookMarketingReader +from nck.readers.facebook_reader import FacebookReader from facebook_business.api import FacebookAdsApi from facebook_business.adobjects.adsinsights import AdsInsights - +from facebook_business.adobjects.ad import Ad class FacebookReaderTest(TestCase): - DATEFORMAT = "%Y-%m-%d" - def mock_facebook_reader(self, **kwargs): - for param, value in kwargs.items(): - setattr(self, param, value) + DATEFORMAT = "%Y-%m-%d" kwargs = { - "ad_insights": True, "app_id": "", "app_secret": "", - "access_token": "", - "ad_object_ids": "123456789", - "recurse_level": 0, - "ad_object_type": "adaccount", - "desired_fields": ["date_start", "impressions"], - "add_date_to_report": False, + "access_token": "123456789", + "object_id": "123456789", + "object_type": "account", + "level": "ad", + "ad_insights": True, + "breakdown": [], + "action_breakdown": [], + "field": [], + "time_increment": None, + "start_date": None, + "end_date": None, + "date_preset": None, + "add_date_to_report": False } - @mock.patch("nck.readers.facebook_reader.FacebookMarketingReader.run_query_on_fb_account_obj") - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - @mock.patch.object(FacebookMarketingReader, "get_params", lambda *args: None) - @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) - def test_read_empty_data(self, mock_query): - reader = FacebookMarketingReader(**self.kwargs) - mock_query.return_value = [] - if len(list(reader.read())) > 1: - assert False, "Data is not empty" - - @mock.patch("nck.readers.facebook_reader.FacebookMarketingReader.run_query_on_fb_account_obj") - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - @mock.patch.object(FacebookMarketingReader, "get_params", lambda *args: None) + @parameterized.expand( + [ + ("incompatible_level",{"object_type": "ad", "level": "account"}), + ("missing_breakdown", {"ad_insights": True, "field": ["age"], "breakdown": []}), + ("missing_action_breakdown",{"ad_insights": True, "field": ["actions[action_type:link_click]"], "action_breakdown": []}), + ("creative_level_for_adinsights_query", {"ad_insights": True, "object_type": "creative", "level": "creative"}), + ("breakdown_for_object_node_query", {"ad_insights": False, "breakdown": ["age"]}), + ("action_breakdown_for_object_node_query", {"ad_insights": False, "action_breakdown": ["action_type"]}), + ("time_range_for_object_node_query", {"ad_insights": False, "level": "account", "start_date": "2020-01-01", "end_date": "2020-01-01"}), + ("date_preset_for_object_node_query", {"ad_insights": False, "level": "account", "date_preset": "last_30d"}), + ("time_increment_for_object_node_query", {"ad_insights": False, "time_increment": "1"}) + ] + ) + def test_refuse_invalid_input(self, name, parameters): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update(parameters) + with self.assertRaises(ClickException): + FacebookReader(**temp_kwargs) + + def test_get_api_fields(self): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + { + "field": ["impressions", "link_url_asset[website_url]", "actions[action_type:link_click]"], + "breakdown": ["link_url_asset"], + "action_breakdown": ["action_type"] + } + ) + expected = ["impressions", "actions"] + assert set(FacebookReader(**temp_kwargs)._api_fields) == set(expected) + + def test_get_field_paths(self): + + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + { + "field": ["impressions", "link_url_asset[website_url]", "actions[action_type:link_click]"], + "breakdown": ["link_url_asset"], + "action_breakdown": ["action_type"] + } + ) + expected = [["impressions"], ["link_url_asset", "website_url"], ["actions", "action_type:link_click"]] + assert FacebookReader(**temp_kwargs)._field_paths == expected + + @mock.patch("nck.readers.facebook_reader.FacebookReader.query_ad_insights") + @mock.patch.object(FacebookReader, "get_params", lambda *args: {}) @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) - def test_read_data(self, mock_query): - reader = FacebookMarketingReader(**self.kwargs) + def test_read_with_ad_insights_query(self, mock_query_ad_insights): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"ad_insights": True, "field": ["date_start", "impressions"]}) + row1, row2 = AdsInsights(), AdsInsights() - row1.set_data({"date_start": "2019-01-01", "impressions": "1"}) - row2.set_data({"date_start": "2019-01-01", "impressions": "2"}) - mock_query.return_value = [row1, row2] - - expected = [{"date_start": "2019-01-01", "impressions": "1"}, {"date_start": "2019-01-01", "impressions": "2"}] - - data = next(reader.read()) - assert len(list(data.readlines())) != 0 - data = next(reader.read()) - for record, output in zip(data.readlines(), iter(expected)): - assert record == output - - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - def test_format_standard_field(self): - kwargs = {"desired_fields": ["clicks", "gender", "impressions"], "add_date_to_report": False} - record = {"clicks": "0", "date_start": "2020-01-01", "gender": "unknown", "impressions": "300"} - expected = {"clicks": "0", "gender": "unknown", "impressions": "300"} - assert next(FacebookMarketingReader(**kwargs).format_and_yield(record)) == expected - - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - def test_format_nested_field(self): - kwargs = {"desired_fields": ["outbound_clicks"], "add_date_to_report": False} - record = {"outbound_clicks": [{"action_type": "outbound_click", "value": "1"}]} - expected = {"outbound_clicks": "1"} - assert next(FacebookMarketingReader(**kwargs).format_and_yield(record)) == expected - - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - def test_format_field_not_in_report(self): - kwargs = {"desired_fields": ["age", "outbound_clicks"], "add_date_to_report": False} - record = {"gender": "unknown"} - expected = {"age": None, "outbound_clicks": None} - assert next(FacebookMarketingReader(**kwargs).format_and_yield(record)) == expected - - @mock.patch.object(FacebookMarketingReader, "__init__", mock_facebook_reader) - @freeze_time("2020-01-01") - def test_add_date_to_report_in_report(self): - kwargs = {"desired_fields": ["clicks"], "add_date_to_report": True} - record = {"clicks": "0", "date_start": "2020-01-01"} - expected = {"clicks": "0", "date": "2020-01-01"} - assert next(FacebookMarketingReader(**kwargs).format_and_yield(record)) == expected + row1.set_data({"date_start": "2020-01-01", "impressions": "1"}) + row2.set_data({"date_start": "2020-01-01", "impressions": "2"}) + mock_query_ad_insights.return_value = [row1, row2] + + data = next(FacebookReader(**temp_kwargs).read()) + expected = [{"date_start": "2020-01-01", "impressions": "1"}, {"date_start": "2020-01-01", "impressions": "2"}] + + for record, report in zip(data.readlines(), iter(expected)): + assert record == report + + # Test fails, but I didn't manage to get why: any idea? + @mock.patch("nck.readers.facebook_reader.FacebookReader.query_object_node") + @mock.patch.object(FacebookReader, "get_params", lambda *args: {}) + @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) + def test_read_with_object_node_query(self, mock_query_object_node): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"ad_insights": False, "field": ["id", "status"]}) + + row1, row2 = Ad(), Ad() + row1.set_data({"id": "123456789", "status": "ACTIVE"}) + row1.set_data({"id": "987654321", "status": "PAUSED"}) + mock_query_object_node.return_value = [row1, row2] + + data = next(FacebookReader(**temp_kwargs).read()) + expected = [{"id": "123456789", "status": "ACTIVE"}, {"id": "987654321", "status": "PAUSED"}] + + for record, report in zip(data.readlines(), iter(expected)): + assert record == report + + @parameterized.expand( + [ + ( + "simple_field", + {"field":["impressions"], "action_breakdown":[]}, + {"impressions": "10314"}, + {"impressions": "10314"} + ), + ( + "nested_field", + {"field":["creative[id]"], "action_breakdown":[]}, + {"creative": {"id": "123456789"}}, + {"creative[id]": "123456789"} + ), + ( + "action_breakdown_field_without_filters", + {"field":["actions"], "action_breakdown":["action_type", "action_device"]}, + {"actions": [ + {"action_type": "link_click", "value": "0"}, + {"action_type": "post_engagement", "value": "1"} + ] + }, + {"actions[action_type:link_click]": "0", "actions[action_type:post_engagement]": "1"} + ), + ( + "action_breakdown_field_without_filters", + {"field":["actions[action_type:link_click][action_device:iphone]"],"action_breakdown":["action_type", "action_device"]}, + {"actions": [ + {"action_type": "link_click", "action_device":"iphone", "value": "0"}, + {"action_type": "post_engagement", "action_device":"iphone", "value": "1"}, + {"action_type": "link_click", "action_device":"desktop", "value": "2"}, + {"action_type": "post_engagement", "action_device":"desktop", "value": "3"}, + ] + }, + {"actions[action_type:link_click][action_device:iphone]": "0"} + ), + ( + "field_not_in_record", + {"field":["impressions", "clicks"], "action_breakdown":[]}, + {"impressions": "1"}, + {"impressions": "1"} + ) + ] + ) + def test_format_and_yield(self, name, parameters, record, expected): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update(parameters) + assert next(FacebookReader(**temp_kwargs).format_and_yield(record)) == expected \ No newline at end of file From 7bde5f49d1c03d6bdfb7c8c9bda42f5c4a84b7b3 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Wed, 13 May 2020 09:48:02 +0200 Subject: [PATCH 10/46] Updating README.md --- nck/readers/README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index eda41c0b..29cc4649 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -79,7 +79,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- "call_to_action": { "type": "LEARN_MORE", "value": { - "link": "http://www.artefact.com", + "link": "https://www.artefact.com", "link_format": "VIDEO_LPP" } } @@ -89,9 +89,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- *Facebook Reader Response* ``` -{ - "object_story_spec[video_data][call_to_action][value][link]": "https://www.artefact.com" -} +{"object_story_spec[video_data][call_to_action][value][link]": "https://www.artefact.com"} ``` (2.3) **Action Breakdown filters** can be applied to the fields of ***Facebook Ad Insights* Requests** using the following syntax: [:]. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. @@ -126,10 +124,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- *Facebook Reader Response* ``` -{ - "actions[action_type:video_view]": "17", - "actions[action_type:post_engagement]": "25", -} +{"actions[action_type:video_view]": "17", "actions[action_type:post_engagement]": "25"} ``` ## Google Readers From 0f3c8015a189b5b7a3f8251d572977cce21b1bd3 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 12:02:48 +0200 Subject: [PATCH 11/46] Adding throttle and code formatting --- nck/helpers/adobe_helper_2_0.py | 44 ++++--- nck/readers/adobe_reader_2_0.py | 199 ++++++++++++++++++++++---------- 2 files changed, 166 insertions(+), 77 deletions(-) diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py index badd4235..c6466202 100644 --- a/nck/helpers/adobe_helper_2_0.py +++ b/nck/helpers/adobe_helper_2_0.py @@ -9,10 +9,13 @@ def build_request_headers(jwt_client): "Authorization": "Bearer {}".format(jwt_client.access_token), "Content-Type": "application/json", "x-api-key": jwt_client.api_key, - "x-proxy-global-company-id": jwt_client.global_company_id + "x-proxy-global-company-id": jwt_client.global_company_id, } -def add_metric_container_to_report_description(rep_desc,dimensions,breakdown_item_ids,metrics): + +def add_metric_container_to_report_description( + rep_desc, dimensions, breakdown_item_ids, metrics +): """ Filling the metricContainer section of a report description: - Creates 1 filter per dimension breakdown x metric @@ -24,22 +27,26 @@ def add_metric_container_to_report_description(rep_desc,dimensions,breakdown_ite rep_desc["metricContainer"]["metricFilters"] = [ { - "id": i+j*nb_breakdowns, + "id": i + j * nb_breakdowns, "type": "breakdown", "dimension": f"variables/{dimensions[i]}", - "itemId": breakdown_item_ids[i] + "itemId": breakdown_item_ids[i], } - for j in range(nb_metrics) for i in range(nb_breakdowns)] + for j in range(nb_metrics) + for i in range(nb_breakdowns) + ] rep_desc["metricContainer"]["metrics"] = [ { "id": f"metrics/{metrics[j]}", - "filters": [i+j*nb_breakdowns for i in range(nb_breakdowns)] + "filters": [i + j * nb_breakdowns for i in range(nb_breakdowns)], } - for j in range(nb_metrics)] + for j in range(nb_metrics) + ] return rep_desc + def get_node_values_from_response(response): """ Extracting dimension values from a report response, @@ -51,25 +58,34 @@ def get_node_values_from_response(response): values = [row["value"] for row in response["rows"]] item_ids = [row["itemId"] for row in response["rows"]] - return {"{}_{}".format(name,item_id): value for (item_id,value) in zip(item_ids,values)} + return { + "{}_{}".format(name, item_id): value + for (item_id, value) in zip(item_ids, values) + } + def get_item_ids_from_nodes(list_of_strings): """ Extacting item_ids from a list of nodes, each node being expressed as 'name_itemId' """ - + return [string.split("_")[1] for string in list_of_strings if string] -def parse_response(response,metrics,parent_dim_parsed): + +def parse_response(response, metrics, parent_dim_parsed): """ Parsing a raw JSON response into the following format: {dimension: value, metric: value} (1 dictionnary per row) """ - + dimension = response["columns"]["dimension"]["id"].split("variables/")[1] for row in response["rows"]: - parsed_row_metrics = {m:v for m, v in zip(metrics,row["data"])} - parsed_row = {**parent_dim_parsed, dimension:row["value"], **parsed_row_metrics} - yield parsed_row \ No newline at end of file + parsed_row_metrics = {m: v for m, v in zip(metrics, row["data"])} + parsed_row = { + **parent_dim_parsed, + dimension: row["value"], + **parsed_row_metrics, + } + yield parsed_row diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index 00b6e66d..b6deb053 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -3,7 +3,9 @@ import logging import datetime import requests +import time from itertools import chain + from nck.utils.args import extract_args from nck.commands.command import processor from nck.readers.reader import Reader @@ -14,14 +16,17 @@ add_metric_container_to_report_description, get_node_values_from_response, get_item_ids_from_nodes, - parse_response + parse_response, ) DATEFORMAT = "%Y-%m-%dT%H:%M:%S" +API_WINDOW_DURATION = 6 +API_REQUESTS_OVER_WINDOW_LIMIT = 12 logging.basicConfig(level="INFO") logger = logging.getLogger() + @click.command(name="read_adobe_2_0") @click.option("--adobe-api-key", required=True) @click.option("--adobe-tech-account-id", required=True) @@ -34,12 +39,19 @@ @click.option("--adobe-report-suite-id", required=True) @click.option("--adobe-dimensions", required=True, multiple=True) @click.option("--adobe-metrics", required=True, multiple=True) -@processor("adobe_api_key","adobe_tech_account_id","adobe_org_id","adobe_client_secret","adobe_metascopes","adobe_private_key_path") +@processor( + "adobe_api_key", + "adobe_tech_account_id", + "adobe_org_id", + "adobe_client_secret", + "adobe_metascopes", + "adobe_private_key_path", +) def adobe_2_0(**kwargs): return AdobeReader_2_0(**extract_args("adobe_", kwargs)) -class AdobeReader_2_0(Reader): +class AdobeReader_2_0(Reader): def __init__( self, api_key, @@ -52,21 +64,28 @@ def __init__( date_stop, report_suite_id, dimensions, - metrics + metrics, ): # We should probably define a method to create the jwt_client within the NewAdobeReader - self.jwt_client = JWTClient(api_key, tech_account_id, org_id, client_secret, metascopes, private_key_path) + self.jwt_client = JWTClient( + api_key, + tech_account_id, + org_id, + client_secret, + metascopes, + private_key_path, + ) self.date_start = date_start self.date_stop = date_stop + datetime.timedelta(days=1) self.report_suite_id = report_suite_id self.dimensions = dimensions self.metrics = metrics self.node_values = {} - + def build_date_range(self): return f"{self.date_start.strftime(DATEFORMAT)}/{self.date_stop.strftime(DATEFORMAT)}" - def build_report_description(self,breakdown_item_ids,metrics): + def build_report_description(self, breakdown_item_ids, metrics): """ Building a report description, to be passed as a parameter to the Reporting API. Documentation: @@ -75,38 +94,75 @@ def build_report_description(self,breakdown_item_ids,metrics): """ rep_desc = { - "rsid": self.report_suite_id, - "globalFilters": [{"type": "dateRange","dateRange": self.build_date_range()}], - "metricContainer": {}, - "dimension": "variables/{}".format(self.dimensions[len(breakdown_item_ids)]), - "settings": {"countRepeatInstances": "true","limit":"500"} - } + "rsid": self.report_suite_id, + "globalFilters": [ + {"type": "dateRange", "dateRange": self.build_date_range()} + ], + "metricContainer": {}, + "dimension": "variables/{}".format( + self.dimensions[len(breakdown_item_ids)] + ), + "settings": {"countRepeatInstances": "true", "limit": "500"}, + } rep_desc = add_metric_container_to_report_description( - rep_desc = rep_desc, - dimensions = self.dimensions, - breakdown_item_ids = breakdown_item_ids, - metrics = metrics + rep_desc=rep_desc, + dimensions=self.dimensions, + breakdown_item_ids=breakdown_item_ids, + metrics=metrics, ) return rep_desc - - def get_report_page(self,rep_desc,page_nb=0): + + def get_report_page(self, rep_desc, page_nb=0): """ Getting a single report page, and returning it into a raw JSON format. """ + global tracker + + # Pause if API rate limit is enforced (12 requests every 6 seconds) + + current_time = time.time() + tracker.append(current_time) + tracker_over_window = [ + t for t in tracker if t >= (current_time - API_WINDOW_DURATION) + ] + + if len(tracker_over_window) >= API_REQUESTS_OVER_WINDOW_LIMIT: + sleep_time = tracker_over_window[0] + API_WINDOW_DURATION - current_time + logging.warning( + "Throttling activated: sleeping for {} seconds...".format(sleep_time) + ) + time.sleep(sleep_time) + + # Make request rep_desc["settings"]["page"] = page_nb - - response = requests.post( - "https://analytics.adobe.io/api/{}/reports".format(self.jwt_client.global_company_id), - headers = build_request_headers(self.jwt_client), - data = json.dumps(rep_desc) - ) + report_available = False - return response.json() - - def get_parsed_report(self,rep_desc,metrics,parent_dim_parsed): + while not report_available: + + response = requests.post( + "https://analytics.adobe.io/api/{}/reports".format( + self.jwt_client.global_company_id + ), + headers=build_request_headers(self.jwt_client), + data=json.dumps(rep_desc), + ).json() + + if response.get("message") == "Too many requests": + logging.warning( + "Throttling activated: sleeping for {} seconds...".format( + API_WINDOW_DURATION + ) + ) + time.sleep(API_WINDOW_DURATION) + else: + report_available = True + + return response + + def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed): """ Iterating over report pages, parsing them, and returning a list of iterators, containing dictonnary-formatted records: {dimension: value, metric: value} @@ -118,34 +174,41 @@ def get_parsed_report(self,rep_desc,metrics,parent_dim_parsed): logging.info(f"Getting report: {rep_desc}") - first_response = self.get_report_page(rep_desc=rep_desc,page_nb=0) - all_responses = [parse_response(first_response,metrics,parent_dim_parsed)] + first_response = self.get_report_page(rep_desc) + all_responses = [parse_response(first_response, metrics, parent_dim_parsed)] + + if first_response["totalPages"] > 1: + for page_nb in range(1, first_response["totalPages"]): + next_response = self.get_report_page(rep_desc, page_nb) + all_responses += [ + parse_response(next_response, metrics, parent_dim_parsed) + ] - if first_response['totalPages']>1: - for page_nb in range(1,first_response['totalPages']): - next_response = reader.get_report_page(rep_desc=rep_desc,page_nb=page_nb) - all_responses += [parse_response(next_response,metrics,parent_dim_parsed)] - return chain(*all_responses) - - def get_node_values(self,breakdown_item_ids): + + def get_node_values(self, breakdown_item_ids): """ Extracting dimension values from a full report response (all pages), and returning them into a dictionnary of nodes: {name_itemId: value} For instance: {'daterangeday_1200001': 'Jan 1, 2020'} """ - rep_desc = self.build_report_description(breakdown_item_ids=breakdown_item_ids,metrics=["visits"]) - first_response = self.get_report_page(rep_desc=rep_desc,page_nb=0) + rep_desc = self.build_report_description( + breakdown_item_ids=breakdown_item_ids, metrics=["visits"] + ) + first_response = self.get_report_page(rep_desc) node_values = get_node_values_from_response(first_response) - if first_response['totalPages']>1: - for page_nb in range(1,first_response['totalPages']): - node_values += get_node_values_from_response(reader.get_report_page(rep_desc=rep_desc,page_nb=page_nb)) - + if first_response["totalPages"] > 1: + for page_nb in range(1, first_response["totalPages"]): + next_node_values = get_node_values_from_response( + self.get_report_page(rep_desc, page_nb) + ) + node_values.update(next_node_values) + return node_values - - def add_child_nodes_to_graph(self,graph,node,path_to_node): + + def add_child_nodes_to_graph(self, graph, node, path_to_node): """ Adding child nodes to Adobe graph, at two levels: parent_node: [child_node_0, child_node_1, child_node_2] @@ -164,53 +227,63 @@ def add_child_nodes_to_graph(self,graph,node,path_to_node): return graph - def read(self,graph=None,node=None): + def result_generator(self, data): + yield from data + + def read_through_graph(self, graph=None, node=None): """ Exploring Adobe graph using a DFS (Deep-First-Search) algorithm. """ global visited global path_to_node + global tracker if graph: # If remaining node children to explore: add node children to graph - if len(path_to_node) < len(self.dimensions)-1: + if len(path_to_node) < len(self.dimensions) - 1: - graph = self.add_child_nodes_to_graph(graph,node,path_to_node) + graph = self.add_child_nodes_to_graph(graph, node, path_to_node) # If no remaining node children to explore: get report - if len(path_to_node) == len(self.dimensions)-1: + if len(path_to_node) == len(self.dimensions) - 1: - parent_dim_parsed = {node.split("_")[0]:self.node_values[node] for node in path_to_node} + parent_dim_parsed = { + node.split("_")[0]: self.node_values[node] for node in path_to_node + } breakdown_item_ids = get_item_ids_from_nodes(path_to_node) - rep_desc = self.build_report_description(breakdown_item_ids=breakdown_item_ids,metrics=self.metrics) - data = self.get_parsed_report(rep_desc=rep_desc,metrics=self.metrics,parent_dim_parsed=parent_dim_parsed) + rep_desc = self.build_report_description( + breakdown_item_ids, self.metrics + ) + data = self.get_parsed_report(rep_desc, self.metrics, parent_dim_parsed) - def result_generator(): - yield from data - - yield JSONStream("adobe_results", result_generator()) + yield from self.result_generator(data) else: # Create graph and add first level of nodes - graph, node, path_to_node, visited = {}, "root", [], [] - graph = self.add_child_nodes_to_graph(graph=graph,node=node,path_to_node=path_to_node) - + graph, node, path_to_node, visited, tracker = {}, "root", [], [], [] + graph = self.add_child_nodes_to_graph(graph, node, path_to_node) + # Add node to visited if node not in visited: visited.append(node) - + # Update unvisited_childs - unvisited_childs = [child_node for child_node in graph[node] if child_node not in visited] + unvisited_childs = [ + child_node for child_node in graph[node] if child_node not in visited + ] # Read through node children for child_node in unvisited_childs: path_to_node.append(child_node) - yield from self.read(graph=graph,node=child_node) + yield from self.read_through_graph(graph=graph, node=child_node) path_to_node.remove(child_node) # Remove local_root_node children from visited if path_to_node != []: local_root_node = path_to_node[-1] - visited = [n for n in visited if n not in graph[local_root_node]] \ No newline at end of file + visited = [n for n in visited if n not in graph[local_root_node]] + + def read(self): + yield JSONStream("adobe_results", self.read_through_graph()) From 8275d94eee284078f887d289df135b4f61733a5a Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 14:35:07 +0200 Subject: [PATCH 12/46] Fix code formatting --- nck/helpers/facebook_helper.py | 74 ++++++++++----- nck/readers/facebook_reader.py | 158 ++++++++++++++++++++++++--------- 2 files changed, 168 insertions(+), 64 deletions(-) diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index 1495ef72..df513d0c 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -20,17 +20,26 @@ FACEBOOK_OBJECTS = ["creative", "ad", "adset", "campaign", "account"] -DATE_PRESETS = [v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__")] +DATE_PRESETS = [ + v for k, v in AdsInsights.DatePreset.__dict__.items() if not k.startswith("__") +] -BREAKDOWNS = [v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__")] +BREAKDOWNS = [ + v for k, v in AdsInsights.Breakdowns.__dict__.items() if not k.startswith("__") +] + +ACTION_BREAKDOWNS = [ + v + for k, v in AdsInsights.ActionBreakdowns.__dict__.items() + if not k.startswith("__") +] -ACTION_BREAKDOWNS = [v for k, v in AdsInsights.ActionBreakdowns.__dict__.items() if not k.startswith("__")] def get_action_breakdown_filters(field_path): """ Extracts action breakdown filters from a field path, and returns them as a dictionnary. - + For instance: 'actions[action_type:video_view][action_type:post_engagement][action_device:iphone]' returns: @@ -45,28 +54,32 @@ def get_action_breakdown_filters(field_path): if action_breakdown not in filters: filters[action_breakdown] = [action_breakdown_value] else: - filters[action_breakdown].append(action_breakdown_value) + filters[action_breakdown].append(action_breakdown_value) return filters + def format_field_path(field_path): """ Formats a field_path back into a field. - + For instance: ['actions', 'action_type:post_engagement'] returns: 'actions[action_type:post_engagement]' """ - if len(field_path)==1: + if len(field_path) == 1: return field_path[0] else: - return "".join([field_path[0]] + ["[{}]".format(element) for element in field_path[1:]]) + return "".join( + [field_path[0]] + ["[{}]".format(element) for element in field_path[1:]] + ) + def check_if_obj_meets_action_breakdown_filters(obj, filters): """ Checks if a nested action breakdown object meets the conditions defined by action breakdown filters. - + For instance, if action breakdown filters are: {'action_type': ['post_engagement', 'video_view'] 'action_device': ['iphone']} @@ -83,11 +96,12 @@ def check_if_obj_meets_action_breakdown_filters(obj, filters): break return obj_meets_all_filters + def get_action_breakdown_value(obj, visited, action_breakdowns): """ Extracts the action breakdown value of a nested action breakdown object. - + For instance: {actions: [{'action_type':'video_view', 'action_device':'iphone', 'value':'12'}]} Here, the nested action_breakdown object is: @@ -95,9 +109,14 @@ def get_action_breakdown_value(obj, visited, action_breakdowns): returns: {'actions[action_type:video_view][action_device:iphone]': '12'} """ - obj_action_breakdown = ["{}:{}".format(action_breakdown,obj[action_breakdown]) for action_breakdown in action_breakdowns if action_breakdown in obj] + obj_action_breakdown = [ + "{}:{}".format(action_breakdown, obj[action_breakdown]) + for action_breakdown in action_breakdowns + if action_breakdown in obj + ] return {format_field_path(visited + obj_action_breakdown): obj["value"]} + def get_all_action_breakdown_values(resp_obj, visited, action_breakdowns, filters=None): """ Extracts action breakdown values from a list of nested action breakdown objects, @@ -107,33 +126,46 @@ def get_all_action_breakdown_values(resp_obj, visited, action_breakdowns, filter for obj in resp_obj: if filters: if check_if_obj_meets_action_breakdown_filters(obj, filters): - action_breakdown_values = {**action_breakdown_values, **get_action_breakdown_value(obj, visited, action_breakdowns)} + action_breakdown_values = { + **action_breakdown_values, + **get_action_breakdown_value(obj, visited, action_breakdowns), + } else: - action_breakdown_values = {**action_breakdown_values, **get_action_breakdown_value(obj, visited, action_breakdowns)} + action_breakdown_values = { + **action_breakdown_values, + **get_action_breakdown_value(obj, visited, action_breakdowns), + } return action_breakdown_values + def get_field_values(resp_obj, field_path, action_breakdowns, visited=None): """ Recursive function extracting (and formating) the values of a requested field from an API response and a field path. """ path_item = field_path[0] - remaining_path_items = len(field_path)-1 - + remaining_path_items = len(field_path) - 1 + if visited is None: visited = [path_item] else: visited.append(path_item) - + if path_item in resp_obj: if remaining_path_items == 0: - if isinstance(resp_obj[path_item],str): + if isinstance(resp_obj[path_item], str): return {format_field_path(visited): resp_obj[path_item]} - if isinstance(resp_obj[path_item],list): - return get_all_action_breakdown_values(resp_obj[path_item], visited, action_breakdowns) + if isinstance(resp_obj[path_item], list): + return get_all_action_breakdown_values( + resp_obj[path_item], visited, action_breakdowns + ) else: - return get_field_values(resp_obj[path_item], field_path[1:], action_breakdowns, visited) + return get_field_values( + resp_obj[path_item], field_path[1:], action_breakdowns, visited + ) else: if all(":" in f for f in field_path): filters = get_action_breakdown_filters(field_path) - return get_all_action_breakdown_values(resp_obj, visited[:-1], action_breakdowns, filters) \ No newline at end of file + return get_all_action_breakdown_values( + resp_obj, visited[:-1], action_breakdowns, filters + ) diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 1279ed69..6d89f332 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -35,7 +35,6 @@ ACTION_BREAKDOWNS, get_action_breakdown_filters, get_field_values, - format_field_path ) from facebook_business.api import FacebookAdsApi @@ -52,16 +51,17 @@ "campaign": Campaign, "adset": AdSet, "ad": Ad, - "creative": AdCreative + "creative": AdCreative, } EDGE_MAPPING = { "account": ["campaign", "adset", "ad", "creative"], "campaign": ["adset", "ad"], "adset": ["ad", "creative"], - "ad": ["creative"] + "ad": ["creative"], } + def check_object_id(ctx, param, values): try: [int(value) for value in values] @@ -69,13 +69,31 @@ def check_object_id(ctx, param, values): except ValueError: raise ClickException("Wrong format. Ad object IDs should only contains digits.") + @click.command(name="read_facebook") -@click.option("--facebook-app-id", default="", help="Not mandatory for AdsInsights reporting if access-token provided") -@click.option("--facebook-app-secret", default="", help="Not mandatory for AdsInsights reporting if access-token provided") +@click.option( + "--facebook-app-id", + default="", + help="Not mandatory for AdsInsights reporting if access-token provided", +) +@click.option( + "--facebook-app-secret", + default="", + help="Not mandatory for AdsInsights reporting if access-token provided", +) @click.option("--facebook-access-token", required=True) -@click.option("--facebook-object-id", required=True, multiple=True, callback=check_object_id) -@click.option("--facebook-object-type",type=click.Choice(FACEBOOK_OBJECTS),default="account") -@click.option("--facebook-level", type=click.Choice(FACEBOOK_OBJECTS), default="ad", help="Granularity of result") +@click.option( + "--facebook-object-id", required=True, multiple=True, callback=check_object_id +) +@click.option( + "--facebook-object-type", type=click.Choice(FACEBOOK_OBJECTS), default="account" +) +@click.option( + "--facebook-level", + type=click.Choice(FACEBOOK_OBJECTS), + default="ad", + help="Granularity of result", +) @click.option( "--facebook-ad-insights", type=click.BOOL, @@ -94,7 +112,9 @@ def check_object_id(ctx, param, values): type=click.Choice(ACTION_BREAKDOWNS), help="https://developers.facebook.com/docs/marketing-api/insights/breakdowns#actionsbreakdown", ) -@click.option("--facebook-field", multiple=True, help="API fields, following Artefact format") +@click.option( + "--facebook-field", multiple=True, help="API fields, following Artefact format" +) @click.option("--facebook-time-increment") @click.option("--facebook-start-date", type=click.DateTime()) @click.option("--facebook-end-date", type=click.DateTime()) @@ -109,6 +129,7 @@ def check_object_id(ctx, param, values): def facebook(**kwargs): return FacebookReader(**extract_args("facebook_", kwargs)) + class FacebookReader(Reader): def __init__( self, @@ -135,13 +156,15 @@ def __init__( self.object_ids = object_id self.object_type = object_type self.level = level - + self.ad_insights = ad_insights self.breakdowns = list(breakdown) self.action_breakdowns = list(action_breakdown) self.fields = list(field) - self._field_paths = [re.split(r"[][]+",f.strip("]")) for f in self.fields] - self._api_fields = list({f[0] for f in self._field_paths if f[0] not in self.breakdowns}) + self._field_paths = [re.split(r"[][]+", f.strip("]")) for f in self.fields] + self._api_fields = list( + {f[0] for f in self._field_paths if f[0] not in self.breakdowns} + ) self.time_increment = time_increment or False self.start_date = start_date @@ -149,35 +172,72 @@ def __init__( self.date_preset = date_preset self.add_date_to_report = add_date_to_report - # Check input parameters + # Check input parameters - if (self.level != self.object_type) and (self.level not in EDGE_MAPPING[self.object_type]): - raise ClickException("Wrong query. Asked level ({}) is not compatible with object type ({}). Please choose level from: {}".format(self.level,self.object_type,[self.object_type]+EDGE_MAPPING[self.object_type])) + if (self.level != self.object_type) and ( + self.level not in EDGE_MAPPING[self.object_type] + ): + raise ClickException( + "Wrong query. Asked level ({}) is not compatible with object type ({}). Please choose level from: {}".format( + self.level, + self.object_type, + [self.object_type] + EDGE_MAPPING[self.object_type], + ) + ) if self.ad_insights: - if self.level == "creative" or self.object_type == 'creative': - raise ClickException("Wrong query. The 'creative' level is not available in AdInsights queries. Accepted levels: {}".format(FACEBOOK_OBJECTS[1:])) - - missing_breakdowns = {f[0] for f in self._field_paths if (f[0] in BREAKDOWNS) and (f[0] not in self.breakdowns)} + if self.level == "creative" or self.object_type == "creative": + raise ClickException( + "Wrong query. The 'creative' level is not available in AdInsights queries. Accepted levels: {}".format( + FACEBOOK_OBJECTS[1:] + ) + ) + + missing_breakdowns = { + f[0] + for f in self._field_paths + if (f[0] in BREAKDOWNS) and (f[0] not in self.breakdowns) + } if missing_breakdowns != set(): - raise ClickException("Wrong query. Please add to Breakdowns: {}".format(missing_breakdowns)) - - missing_action_breakdowns = {flt for f in self._field_paths for flt in get_action_breakdown_filters(f) - if flt not in self.action_breakdowns} + raise ClickException( + "Wrong query. Please add to Breakdowns: {}".format( + missing_breakdowns + ) + ) + + missing_action_breakdowns = { + flt + for f in self._field_paths + for flt in get_action_breakdown_filters(f) + if flt not in self.action_breakdowns + } if missing_action_breakdowns != set(): - raise ClickException("Wrong query. Please add to Action Breakdowns: {}".format(missing_action_breakdowns)) + raise ClickException( + "Wrong query. Please add to Action Breakdowns: {}".format( + missing_action_breakdowns + ) + ) else: - if self.breakdowns!=[] or self.action_breakdowns!=[]: - raise ClickException("Wrong query. Facebook Object Node queries do not accept Breakdowns nor Action Breakdowns.") - - if self.level not in ["campaign","adset","ad"] and ((self.start_date and self.end_date) or self.date_preset): - raise ClickException("Wrong query. Facebook Object Node queries only accept the time_range and date_preset parameters at the 'campaign', 'adset' or 'ad' levels.") + if self.breakdowns != [] or self.action_breakdowns != []: + raise ClickException( + "Wrong query. Facebook Object Node queries do not accept Breakdowns nor Action Breakdowns." + ) + + if self.level not in ["campaign", "adset", "ad"] and ( + (self.start_date and self.end_date) or self.date_preset + ): + raise ClickException( + "Wrong query. Facebook Object Node queries only accept the time_range\ + and date_preset parameters at the 'campaign', 'adset' or 'ad' levels." + ) if self.time_increment: - raise ClickException("Wrong query. Facebook Object Node queries do not accept the time_increment parameter.") + raise ClickException( + "Wrong query. Facebook Object Node queries do not accept the time_increment parameter." + ) def get_params(self): """ @@ -195,7 +255,7 @@ def get_params(self): self.add_period_to_params(params) else: - if self.level in ["campaign","adset","ad"]: + if self.level in ["campaign", "adset", "ad"]: self.add_period_to_params(params) return params @@ -210,7 +270,7 @@ def add_period_to_params(self, params): if self.ad_insights and self.time_increment: params["time_increment"] = self.time_increment - if self.ad_insights or self.level in ["campaign","adset","ad"]: + if self.ad_insights or self.level in ["campaign", "adset", "ad"]: if self.start_date and self.end_date: logging.info("Date format used for request: start_date and end_date") params["time_range"] = self.create_time_range() @@ -219,17 +279,22 @@ def add_period_to_params(self, params): params["date_preset"] = self.date_preset else: logging.warning("No date range provided - Last 30 days by default") - logging.warning("https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters") - + logging.warning( + "https://developers.facebook.com/docs/marketing-api/reference/ad-account/insights#parameters" + ) + def create_time_range(self): - return {"since": self.start_date.strftime(DATEFORMAT), "until": self.end_date.strftime(DATEFORMAT)} + return { + "since": self.start_date.strftime(DATEFORMAT), + "until": self.end_date.strftime(DATEFORMAT), + } def create_object(self, object_id): """ Create a Facebook object based on the provided object_type and object_id. """ if self.object_type == "account": - object_id = "act_" + object_id + object_id = "act_" + object_id obj = OBJECT_CREATION_MAPPING[self.object_type](object_id) return obj @@ -246,7 +311,7 @@ def query_ad_insights(self, fields, params, object_id): # Step 2 - Run AdInsights query on Facebook object for element in obj.get_insights(fields=fields, params=params): yield element - + @retry def query_object_node(self, fields, params, object_id): """ @@ -265,10 +330,12 @@ def query_object_node(self, fields, params, object_id): "campaign": obj.get_campaigns(), "adset": obj.get_ad_sets(), "ad": obj.get_ads(), - "creative": obj.get_ad_creatives() + "creative": obj.get_ad_creatives(), } edge_objs = EDGE_QUERY_MAPPING[self.level] - for element in [edge_obj.api_get(fields=fields, params=params) for edge_obj in edge_objs]: + for element in [ + edge_obj.api_get(fields=fields, params=params) for edge_obj in edge_objs + ]: yield element def format_and_yield(self, record): @@ -278,13 +345,15 @@ def format_and_yield(self, record): report = {} for field_path in self._field_paths: - field_values = get_field_values(record, field_path, self.action_breakdowns, visited=None) + field_values = get_field_values( + record, field_path, self.action_breakdowns, visited=None + ) if field_values: report = {**report, **field_values} if self.add_date_to_report: report["date"] = datetime.today().strftime(DATEFORMAT) - + yield report def result_generator(self, data): @@ -293,7 +362,7 @@ def result_generator(self, data): """ for record in data: yield from self.format_and_yield(record.export_all_data()) - + def get_data_for_object(self, object_id): """ Run an API query (AdInsights or Facebook Object Node) on a single object_id. @@ -317,4 +386,7 @@ def get_data(self): def read(self): FacebookAdsApi.init(self.app_id, self.app_secret, self.access_token) - yield NormalizedJSONStream("results_" + self.object_type + "_" + "_".join(self.object_ids), self.get_data()) \ No newline at end of file + yield NormalizedJSONStream( + "results_" + self.object_type + "_" + "_".join(self.object_ids), + self.get_data(), + ) From 42ac3ede22da6c6a4986959e95b16e9ad642679f Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 14:50:34 +0200 Subject: [PATCH 13/46] Fix AdobeReader formatting --- nck/readers/adobe_reader.py | 39 ++++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/nck/readers/adobe_reader.py b/nck/readers/adobe_reader.py index 5957a88a..e61cafbf 100644 --- a/nck/readers/adobe_reader.py +++ b/nck/readers/adobe_reader.py @@ -98,8 +98,12 @@ def build_report_description(self): "reportDescription": { "source": "warehouse", "reportSuiteID": self.kwargs.get("report_suite_id"), - "elements": [{"id": el} for el in self.kwargs.get("report_element_id", [])], - "metrics": [{"id": mt} for mt in self.kwargs.get("report_metric_id", [])] + "elements": [ + {"id": el} for el in self.kwargs.get("report_element_id", []) + ], + "metrics": [ + {"id": mt} for mt in self.kwargs.get("report_metric_id", []) + ], } } self.set_date_gran_report_desc(report_description) @@ -109,7 +113,12 @@ def build_report_description(self): def get_days_delta(self): days_range = self.kwargs.get("day_range") - delta_mapping = {"PREVIOUS_DAY": 1, "LAST_7_DAYS": 7, "LAST_30_DAYS": 30, "LAST_90_DAYS": 90} + delta_mapping = { + "PREVIOUS_DAY": 1, + "LAST_7_DAYS": 7, + "LAST_30_DAYS": 30, + "LAST_90_DAYS": 90, + } try: days_delta = delta_mapping[days_range] except KeyError: @@ -126,15 +135,21 @@ def set_date_range_report_desc(self, report_description): else: end_date = datetime.datetime.now().date() start_date = end_date - datetime.timedelta(days=self.get_days_delta()) - report_description["reportDescription"]["dateFrom"] = start_date.strftime("%Y-%m-%d") - report_description["reportDescription"]["dateTo"] = end_date.strftime("%Y-%m-%d") + report_description["reportDescription"]["dateFrom"] = start_date.strftime( + "%Y-%m-%d" + ) + report_description["reportDescription"]["dateTo"] = end_date.strftime( + "%Y-%m-%d" + ) def set_date_gran_report_desc(self, report_description): """ Adds the dateGranularity parameter to a reportDescription. """ if self.kwargs.get("date_granularity", None) is not None: - report_description["reportDescription"]["dateGranularity"] = self.kwargs.get("date_granularity") + report_description["reportDescription"][ + "dateGranularity" + ] = self.kwargs.get("date_granularity") @retry def query_report(self): @@ -145,7 +160,9 @@ def query_report(self): - Output: reportID, to be passed to the Report.Get method - Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_Queue.md """ - query_report = self.request(api="Report", method="Queue", data=self.build_report_description()) + query_report = self.request( + api="Report", method="Queue", data=self.build_report_description() + ) return query_report @retry @@ -157,14 +174,18 @@ def get_report(self, report_id, page_number=1): - Output: reportResponse containing the requested report data - Doc: https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_Get.md """ - request_f = lambda: self.request(api="Report", method="Get", data={"reportID": report_id, "page": page_number}) + request_f = lambda: self.request( + api="Report", + method="Get", + data={"reportID": report_id, "page": page_number}, + ) response = request_f() idx = 1 while response.get("error") == "report_not_ready": logging.info(f"waiting {idx} s for report to be ready") sleep(idx + 1) if idx + 1 > MAX_WAIT_REPORT_DELAY: - raise ReportNotReadyError(f"waited too long for report to be ready") + raise ReportNotReadyError("waited too long for report to be ready") idx = idx * 2 response = request_f() return response From fbe1cd0632b864fc5658ec9b192dc35ac8266c98 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 14:55:05 +0200 Subject: [PATCH 14/46] Fix tests code formatting --- tests/readers/test_facebook_reader.py | 161 +++++++++++++++++++------- 1 file changed, 121 insertions(+), 40 deletions(-) diff --git a/tests/readers/test_facebook_reader.py b/tests/readers/test_facebook_reader.py index 036ba961..47c6b46d 100644 --- a/tests/readers/test_facebook_reader.py +++ b/tests/readers/test_facebook_reader.py @@ -26,6 +26,7 @@ from facebook_business.adobjects.adsinsights import AdsInsights from facebook_business.adobjects.ad import Ad + class FacebookReaderTest(TestCase): DATEFORMAT = "%Y-%m-%d" @@ -45,20 +46,53 @@ class FacebookReaderTest(TestCase): "start_date": None, "end_date": None, "date_preset": None, - "add_date_to_report": False + "add_date_to_report": False, } @parameterized.expand( [ - ("incompatible_level",{"object_type": "ad", "level": "account"}), - ("missing_breakdown", {"ad_insights": True, "field": ["age"], "breakdown": []}), - ("missing_action_breakdown",{"ad_insights": True, "field": ["actions[action_type:link_click]"], "action_breakdown": []}), - ("creative_level_for_adinsights_query", {"ad_insights": True, "object_type": "creative", "level": "creative"}), - ("breakdown_for_object_node_query", {"ad_insights": False, "breakdown": ["age"]}), - ("action_breakdown_for_object_node_query", {"ad_insights": False, "action_breakdown": ["action_type"]}), - ("time_range_for_object_node_query", {"ad_insights": False, "level": "account", "start_date": "2020-01-01", "end_date": "2020-01-01"}), - ("date_preset_for_object_node_query", {"ad_insights": False, "level": "account", "date_preset": "last_30d"}), - ("time_increment_for_object_node_query", {"ad_insights": False, "time_increment": "1"}) + ("incompatible_level", {"object_type": "ad", "level": "account"}), + ( + "missing_breakdown", + {"ad_insights": True, "field": ["age"], "breakdown": []}, + ), + ( + "missing_action_breakdown", + { + "ad_insights": True, + "field": ["actions[action_type:link_click]"], + "action_breakdown": [], + }, + ), + ( + "creative_level_for_adinsights_query", + {"ad_insights": True, "object_type": "creative", "level": "creative"}, + ), + ( + "breakdown_for_object_node_query", + {"ad_insights": False, "breakdown": ["age"]}, + ), + ( + "action_breakdown_for_object_node_query", + {"ad_insights": False, "action_breakdown": ["action_type"]}, + ), + ( + "time_range_for_object_node_query", + { + "ad_insights": False, + "level": "account", + "start_date": "2020-01-01", + "end_date": "2020-01-01", + }, + ), + ( + "date_preset_for_object_node_query", + {"ad_insights": False, "level": "account", "date_preset": "last_30d"}, + ), + ( + "time_increment_for_object_node_query", + {"ad_insights": False, "time_increment": "1"}, + ), ] ) def test_refuse_invalid_input(self, name, parameters): @@ -71,25 +105,37 @@ def test_get_api_fields(self): temp_kwargs = self.kwargs.copy() temp_kwargs.update( { - "field": ["impressions", "link_url_asset[website_url]", "actions[action_type:link_click]"], + "field": [ + "impressions", + "link_url_asset[website_url]", + "actions[action_type:link_click]", + ], "breakdown": ["link_url_asset"], - "action_breakdown": ["action_type"] + "action_breakdown": ["action_type"], } ) expected = ["impressions", "actions"] assert set(FacebookReader(**temp_kwargs)._api_fields) == set(expected) - + def test_get_field_paths(self): - + temp_kwargs = self.kwargs.copy() temp_kwargs.update( { - "field": ["impressions", "link_url_asset[website_url]", "actions[action_type:link_click]"], + "field": [ + "impressions", + "link_url_asset[website_url]", + "actions[action_type:link_click]", + ], "breakdown": ["link_url_asset"], - "action_breakdown": ["action_type"] + "action_breakdown": ["action_type"], } ) - expected = [["impressions"], ["link_url_asset", "website_url"], ["actions", "action_type:link_click"]] + expected = [ + ["impressions"], + ["link_url_asset", "website_url"], + ["actions", "action_type:link_click"], + ] assert FacebookReader(**temp_kwargs)._field_paths == expected @mock.patch("nck.readers.facebook_reader.FacebookReader.query_ad_insights") @@ -97,7 +143,9 @@ def test_get_field_paths(self): @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) def test_read_with_ad_insights_query(self, mock_query_ad_insights): temp_kwargs = self.kwargs.copy() - temp_kwargs.update({"ad_insights": True, "field": ["date_start", "impressions"]}) + temp_kwargs.update( + {"ad_insights": True, "field": ["date_start", "impressions"]} + ) row1, row2 = AdsInsights(), AdsInsights() row1.set_data({"date_start": "2020-01-01", "impressions": "1"}) @@ -105,7 +153,10 @@ def test_read_with_ad_insights_query(self, mock_query_ad_insights): mock_query_ad_insights.return_value = [row1, row2] data = next(FacebookReader(**temp_kwargs).read()) - expected = [{"date_start": "2020-01-01", "impressions": "1"}, {"date_start": "2020-01-01", "impressions": "2"}] + expected = [ + {"date_start": "2020-01-01", "impressions": "1"}, + {"date_start": "2020-01-01", "impressions": "2"}, + ] for record, report in zip(data.readlines(), iter(expected)): assert record == report @@ -124,8 +175,11 @@ def test_read_with_object_node_query(self, mock_query_object_node): mock_query_object_node.return_value = [row1, row2] data = next(FacebookReader(**temp_kwargs).read()) - expected = [{"id": "123456789", "status": "ACTIVE"}, {"id": "987654321", "status": "PAUSED"}] - + expected = [ + {"id": "123456789", "status": "ACTIVE"}, + {"id": "987654321", "status": "PAUSED"}, + ] + for record, report in zip(data.readlines(), iter(expected)): assert record == report @@ -133,47 +187,74 @@ def test_read_with_object_node_query(self, mock_query_object_node): [ ( "simple_field", - {"field":["impressions"], "action_breakdown":[]}, + {"field": ["impressions"], "action_breakdown": []}, + {"impressions": "10314"}, {"impressions": "10314"}, - {"impressions": "10314"} ), ( "nested_field", - {"field":["creative[id]"], "action_breakdown":[]}, + {"field": ["creative[id]"], "action_breakdown": []}, {"creative": {"id": "123456789"}}, - {"creative[id]": "123456789"} + {"creative[id]": "123456789"}, ), ( "action_breakdown_field_without_filters", - {"field":["actions"], "action_breakdown":["action_type", "action_device"]}, - {"actions": [ + { + "field": ["actions"], + "action_breakdown": ["action_type", "action_device"], + }, + { + "actions": [ {"action_type": "link_click", "value": "0"}, - {"action_type": "post_engagement", "value": "1"} + {"action_type": "post_engagement", "value": "1"}, ] }, - {"actions[action_type:link_click]": "0", "actions[action_type:post_engagement]": "1"} + { + "actions[action_type:link_click]": "0", + "actions[action_type:post_engagement]": "1", + }, ), ( "action_breakdown_field_without_filters", - {"field":["actions[action_type:link_click][action_device:iphone]"],"action_breakdown":["action_type", "action_device"]}, - {"actions": [ - {"action_type": "link_click", "action_device":"iphone", "value": "0"}, - {"action_type": "post_engagement", "action_device":"iphone", "value": "1"}, - {"action_type": "link_click", "action_device":"desktop", "value": "2"}, - {"action_type": "post_engagement", "action_device":"desktop", "value": "3"}, + { + "field": ["actions[action_type:link_click][action_device:iphone]"], + "action_breakdown": ["action_type", "action_device"], + }, + { + "actions": [ + { + "action_type": "link_click", + "action_device": "iphone", + "value": "0", + }, + { + "action_type": "post_engagement", + "action_device": "iphone", + "value": "1", + }, + { + "action_type": "link_click", + "action_device": "desktop", + "value": "2", + }, + { + "action_type": "post_engagement", + "action_device": "desktop", + "value": "3", + }, ] }, - {"actions[action_type:link_click][action_device:iphone]": "0"} + {"actions[action_type:link_click][action_device:iphone]": "0"}, ), ( "field_not_in_record", - {"field":["impressions", "clicks"], "action_breakdown":[]}, + {"field": ["impressions", "clicks"], "action_breakdown": []}, {"impressions": "1"}, - {"impressions": "1"} - ) + {"impressions": "1"}, + ), ] ) def test_format_and_yield(self, name, parameters, record, expected): temp_kwargs = self.kwargs.copy() temp_kwargs.update(parameters) - assert next(FacebookReader(**temp_kwargs).format_and_yield(record)) == expected \ No newline at end of file + assert next(FacebookReader(**temp_kwargs).format_and_yield(record)) == expected From 362297e7d7d76709e3f275cb3e01f3d961816a4b Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 15:42:31 +0200 Subject: [PATCH 15/46] Adding f-strings and removing usage of non-boolean variables as booleans --- nck/helpers/facebook_helper.py | 34 +++++++++++++--------------------- nck/readers/facebook_reader.py | 24 ++++++++---------------- 2 files changed, 21 insertions(+), 37 deletions(-) diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index df513d0c..a918ba96 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -49,8 +49,7 @@ def get_action_breakdown_filters(field_path): filters = {} for path_item in field_path: if ":" in path_item: - action_breakdown = path_item.split(":")[0] - action_breakdown_value = path_item.split(":")[1] + action_breakdown, action_breakdown_value = path_item.split(":") if action_breakdown not in filters: filters[action_breakdown] = [action_breakdown_value] else: @@ -70,9 +69,7 @@ def format_field_path(field_path): if len(field_path) == 1: return field_path[0] else: - return "".join( - [field_path[0]] + ["[{}]".format(element) for element in field_path[1:]] - ) + return "".join([field_path[0]] + [f"[{element}]" for element in field_path[1:]]) def check_if_obj_meets_action_breakdown_filters(obj, filters): @@ -110,35 +107,33 @@ def get_action_breakdown_value(obj, visited, action_breakdowns): {'actions[action_type:video_view][action_device:iphone]': '12'} """ obj_action_breakdown = [ - "{}:{}".format(action_breakdown, obj[action_breakdown]) + f"{action_breakdown}:{obj[action_breakdown]}" for action_breakdown in action_breakdowns if action_breakdown in obj ] return {format_field_path(visited + obj_action_breakdown): obj["value"]} -def get_all_action_breakdown_values(resp_obj, visited, action_breakdowns, filters=None): +def get_all_action_breakdown_values(resp_obj, visited, action_breakdowns, filters={}): """ Extracts action breakdown values from a list of nested action breakdown objects, only if they meet the conditions defined by action breakdown filters. """ action_breakdown_values = {} for obj in resp_obj: - if filters: + if filters != {}: if check_if_obj_meets_action_breakdown_filters(obj, filters): - action_breakdown_values = { - **action_breakdown_values, - **get_action_breakdown_value(obj, visited, action_breakdowns), - } + action_breakdown_values.update( + get_action_breakdown_value(obj, visited, action_breakdowns) + ) else: - action_breakdown_values = { - **action_breakdown_values, - **get_action_breakdown_value(obj, visited, action_breakdowns), - } + action_breakdown_values.update( + get_action_breakdown_value(obj, visited, action_breakdowns) + ) return action_breakdown_values -def get_field_values(resp_obj, field_path, action_breakdowns, visited=None): +def get_field_values(resp_obj, field_path, action_breakdowns, visited=[]): """ Recursive function extracting (and formating) the values of a requested field from an API response and a field path. @@ -146,10 +141,7 @@ def get_field_values(resp_obj, field_path, action_breakdowns, visited=None): path_item = field_path[0] remaining_path_items = len(field_path) - 1 - if visited is None: - visited = [path_item] - else: - visited.append(path_item) + visited.append(path_item) if path_item in resp_obj: if remaining_path_items == 0: diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 6d89f332..3a17f04f 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -178,20 +178,16 @@ def __init__( self.level not in EDGE_MAPPING[self.object_type] ): raise ClickException( - "Wrong query. Asked level ({}) is not compatible with object type ({}). Please choose level from: {}".format( - self.level, - self.object_type, - [self.object_type] + EDGE_MAPPING[self.object_type], - ) + f"Wrong query. Asked level ({self.level}) is not compatible with object type ({self.object_type}).\ + Please choose level from: {[self.object_type] + EDGE_MAPPING[self.object_type]}" ) if self.ad_insights: if self.level == "creative" or self.object_type == "creative": raise ClickException( - "Wrong query. The 'creative' level is not available in AdInsights queries. Accepted levels: {}".format( - FACEBOOK_OBJECTS[1:] - ) + f"Wrong query. The 'creative' level is not available in AdInsights queries.\ + Accepted levels: {FACEBOOK_OBJECTS[1:]}" ) missing_breakdowns = { @@ -201,9 +197,7 @@ def __init__( } if missing_breakdowns != set(): raise ClickException( - "Wrong query. Please add to Breakdowns: {}".format( - missing_breakdowns - ) + f"Wrong query. Please add to Breakdowns: {missing_breakdowns}" ) missing_action_breakdowns = { @@ -214,9 +208,7 @@ def __init__( } if missing_action_breakdowns != set(): raise ClickException( - "Wrong query. Please add to Action Breakdowns: {}".format( - missing_action_breakdowns - ) + f"Wrong query. Please add to Action Breakdowns: {missing_action_breakdowns}" ) else: @@ -346,10 +338,10 @@ def format_and_yield(self, record): for field_path in self._field_paths: field_values = get_field_values( - record, field_path, self.action_breakdowns, visited=None + record, field_path, self.action_breakdowns, visited=[] ) if field_values: - report = {**report, **field_values} + report.update(field_values) if self.add_date_to_report: report["date"] = datetime.today().strftime(DATEFORMAT) From 991c2762c82a78efa5751e09c8f122b4a7f8bd3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20B=C3=A9ranger?= <56034720+gabrielleberanger@users.noreply.github.com> Date: Thu, 14 May 2020 16:28:23 +0200 Subject: [PATCH 16/46] Update check_if_obj_meets_action_breakdown_filters Co-authored-by: vivienmorlet --- nck/helpers/facebook_helper.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index a918ba96..eb009ef5 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -85,13 +85,10 @@ def check_if_obj_meets_action_breakdown_filters(obj, filters): - {'action_type':'video_view', 'action_device':'iphone', 'value':12}: True - {'action_type':'post_engagement', 'action_device':'desktop', 'value':12}: False """ - obj_meets_all_filters = True for action_breakdown in filters: - obj_meets_filter = obj[action_breakdown] in filters[action_breakdown] - obj_meets_all_filters = obj_meets_all_filters and obj_meets_filter - if obj_meets_all_filters is False: - break - return obj_meets_all_filters + if obj[action_breakdown] not in filters[action_breakdown]: + return False + return True def get_action_breakdown_value(obj, visited, action_breakdowns): From 121ffc1f0cb1b1fd906109e482bfdd4d765baee5 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 16:34:42 +0200 Subject: [PATCH 17/46] Updating get_action_breakdown_filters --- nck/helpers/facebook_helper.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/nck/helpers/facebook_helper.py b/nck/helpers/facebook_helper.py index eb009ef5..f52c6f02 100644 --- a/nck/helpers/facebook_helper.py +++ b/nck/helpers/facebook_helper.py @@ -50,10 +50,7 @@ def get_action_breakdown_filters(field_path): for path_item in field_path: if ":" in path_item: action_breakdown, action_breakdown_value = path_item.split(":") - if action_breakdown not in filters: - filters[action_breakdown] = [action_breakdown_value] - else: - filters[action_breakdown].append(action_breakdown_value) + filters.setdefault(action_breakdown, []).append(action_breakdown_value) return filters From 8bf04d39f4b7f81f17931c7b5062083742f453e9 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 16:55:28 +0200 Subject: [PATCH 18/46] Updating regex --- nck/readers/facebook_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 3a17f04f..3d6e6c69 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -161,7 +161,7 @@ def __init__( self.breakdowns = list(breakdown) self.action_breakdowns = list(action_breakdown) self.fields = list(field) - self._field_paths = [re.split(r"[][]+", f.strip("]")) for f in self.fields] + self._field_paths = [re.split(r"[\]\[]+", f.strip("]")) for f in self.fields] self._api_fields = list( {f[0] for f in self._field_paths if f[0] not in self.breakdowns} ) From 0194058598a0f1a1eb4891727f9ec7c63c91e11d Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 18:26:05 +0200 Subject: [PATCH 19/46] Using unittest convention --- tests/readers/test_facebook_reader.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/readers/test_facebook_reader.py b/tests/readers/test_facebook_reader.py index 47c6b46d..b2a555d9 100644 --- a/tests/readers/test_facebook_reader.py +++ b/tests/readers/test_facebook_reader.py @@ -115,7 +115,7 @@ def test_get_api_fields(self): } ) expected = ["impressions", "actions"] - assert set(FacebookReader(**temp_kwargs)._api_fields) == set(expected) + self.assertEqual(set(FacebookReader(**temp_kwargs)._api_fields), set(expected)) def test_get_field_paths(self): @@ -136,7 +136,7 @@ def test_get_field_paths(self): ["link_url_asset", "website_url"], ["actions", "action_type:link_click"], ] - assert FacebookReader(**temp_kwargs)._field_paths == expected + self.assertEqual(FacebookReader(**temp_kwargs)._field_paths, expected) @mock.patch("nck.readers.facebook_reader.FacebookReader.query_ad_insights") @mock.patch.object(FacebookReader, "get_params", lambda *args: {}) @@ -159,7 +159,7 @@ def test_read_with_ad_insights_query(self, mock_query_ad_insights): ] for record, report in zip(data.readlines(), iter(expected)): - assert record == report + self.assertEqual(record, report) # Test fails, but I didn't manage to get why: any idea? @mock.patch("nck.readers.facebook_reader.FacebookReader.query_object_node") @@ -181,7 +181,7 @@ def test_read_with_object_node_query(self, mock_query_object_node): ] for record, report in zip(data.readlines(), iter(expected)): - assert record == report + self.assertEqual(record, report) @parameterized.expand( [ @@ -257,4 +257,6 @@ def test_read_with_object_node_query(self, mock_query_object_node): def test_format_and_yield(self, name, parameters, record, expected): temp_kwargs = self.kwargs.copy() temp_kwargs.update(parameters) - assert next(FacebookReader(**temp_kwargs).format_and_yield(record)) == expected + self.assertEqual( + next(FacebookReader(**temp_kwargs).format_and_yield(record)), expected + ) From ad83353d13afa7656c03498afce0e7e08df0f66d Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 18:30:48 +0200 Subject: [PATCH 20/46] Fixing README.md --- nck/readers/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index 29cc4649..8213fc8d 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -52,11 +52,11 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- |If Facebook Object Type is...|Facebook Level can be...| |:--|:--| -|`account`|*account, campaign, adset, ad, creative*| -|`campaign`|*campaign, adset, ad*| -|`adset`|*adset, ad, creative*| -|`ad`|*ad, creative*| -|`creative`|*creative*| +|`account`|account, campaign, adset, ad, creative| +|`campaign`|campaign, adset, ad| +|`adset`|adset, ad, creative| +|`ad`|ad, creative| +|`creative`|creative| **#2: Format Facebook Reader response using `--facebook-fields`** @@ -89,7 +89,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- *Facebook Reader Response* ``` -{"object_story_spec[video_data][call_to_action][value][link]": "https://www.artefact.com"} +{"object_story_spec_video_data_call_to_action_value_link": "https://www.artefact.com"} ``` (2.3) **Action Breakdown filters** can be applied to the fields of ***Facebook Ad Insights* Requests** using the following syntax: [:]. You can combine multiple Action Breakdown filters on the same field by adding them in cascade next to each other. @@ -124,7 +124,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- *Facebook Reader Response* ``` -{"actions[action_type:video_view]": "17", "actions[action_type:post_engagement]": "25"} +{"actions_action_type_video_view": "17", "actions_action_type_post_engagement": "25"} ``` ## Google Readers From 0027b1fcd4ded1be24ed94475143c43d0761be7d Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 14 May 2020 19:18:36 +0200 Subject: [PATCH 21/46] Fixing test_read_with_object_node_query --- tests/readers/test_facebook_reader.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/readers/test_facebook_reader.py b/tests/readers/test_facebook_reader.py index b2a555d9..affdfa1e 100644 --- a/tests/readers/test_facebook_reader.py +++ b/tests/readers/test_facebook_reader.py @@ -35,7 +35,7 @@ class FacebookReaderTest(TestCase): "app_id": "", "app_secret": "", "access_token": "123456789", - "object_id": "123456789", + "object_id": ["123456789"], "object_type": "account", "level": "ad", "ad_insights": True, @@ -161,7 +161,6 @@ def test_read_with_ad_insights_query(self, mock_query_ad_insights): for record, report in zip(data.readlines(), iter(expected)): self.assertEqual(record, report) - # Test fails, but I didn't manage to get why: any idea? @mock.patch("nck.readers.facebook_reader.FacebookReader.query_object_node") @mock.patch.object(FacebookReader, "get_params", lambda *args: {}) @mock.patch.object(FacebookAdsApi, "init", lambda *args: None) @@ -171,7 +170,7 @@ def test_read_with_object_node_query(self, mock_query_object_node): row1, row2 = Ad(), Ad() row1.set_data({"id": "123456789", "status": "ACTIVE"}) - row1.set_data({"id": "987654321", "status": "PAUSED"}) + row2.set_data({"id": "987654321", "status": "PAUSED"}) mock_query_object_node.return_value = [row1, row2] data = next(FacebookReader(**temp_kwargs).read()) From f6d06443df2023ae0624a9745aeaa150e1762acd Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 18 May 2020 10:25:41 +0200 Subject: [PATCH 22/46] Minor fixes --- nck/clients/adobe_client.py | 49 ++++++++++----- nck/helpers/adobe_helper_2_0.py | 17 ++++- nck/readers/adobe_reader_2_0.py | 108 ++++++++++++++++++-------------- 3 files changed, 110 insertions(+), 64 deletions(-) diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py index ee0aaf64..e5ad1d15 100644 --- a/nck/clients/adobe_client.py +++ b/nck/clients/adobe_client.py @@ -16,13 +16,22 @@ logging.basicConfig(level="INFO") logger = logging.getLogger() -class JWTClient(): + +class JWTClient: """ Following the steps described in this repo: https://github.com/AdobeDocs/analytics-2.0-apis/tree/master/examples/jwt/python """ - def __init__(self, api_key, tech_account_id, org_id, client_secret, metascopes, private_key_path): + def __init__( + self, + api_key, + tech_account_id, + org_id, + client_secret, + metascopes, + private_key_path, + ): self.api_key = api_key self.tech_account_id = tech_account_id self.org_id = org_id @@ -30,35 +39,41 @@ def __init__(self, api_key, tech_account_id, org_id, client_secret, metascopes, self.metascopes = metascopes self.private_key_path = private_key_path - #Creating jwt_token attribute + # Creating jwt_token attribute logging.info("Getting jwt_token.") - with open(self.private_key_path, 'r') as file: + with open(self.private_key_path, "r") as file: private_key = file.read() - self.jwt_token = jwt.encode({ - "exp": datetime.datetime.utcnow() + datetime.timedelta(seconds=30), - "iss": self.org_id, - "sub": self.tech_account_id, - f"https://{IMS_HOST}/s/{self.metascopes}": True, - "aud": f"https://{IMS_HOST}/c/{self.api_key}" - }, private_key, algorithm='RS256') + self.jwt_token = jwt.encode( + { + "exp": datetime.datetime.utcnow() + datetime.timedelta(seconds=30), + "iss": self.org_id, + "sub": self.tech_account_id, + f"https://{IMS_HOST}/s/{self.metascopes}": True, + "aud": f"https://{IMS_HOST}/c/{self.api_key}", + }, + private_key, + algorithm="RS256", + ) - #Creating access_token attribute + # Creating access_token attribute logging.info("Getting access_token.") post_body = { "client_id": self.api_key, "client_secret": self.client_secret, - "jwt_token": self.jwt_token + "jwt_token": self.jwt_token, } response = requests.post(IMS_EXCHANGE, data=post_body) self.access_token = response.json()["access_token"] - #Creating global_company_id attribute + # Creating global_company_id attribute logging.info("Getting global_company_id.") response = requests.get( DISCOVERY_URL, headers={ "Authorization": f"Bearer {self.access_token}", - "x-api-key": self.api_key - } + "x-api-key": self.api_key, + }, + ) + self.global_company_id = ( + response.json().get("imsOrgs")[0].get("companies")[0].get("globalCompanyId") ) - self.global_company_id = response.json().get("imsOrgs")[0].get("companies")[0].get("globalCompanyId") \ No newline at end of file diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py index c6466202..ad4ce268 100644 --- a/nck/helpers/adobe_helper_2_0.py +++ b/nck/helpers/adobe_helper_2_0.py @@ -1,3 +1,14 @@ +from datetime import datetime + + +def format_date(date_string): + """ + Input: "Jan 1, 2020" + Output: "2020-01-01" + """ + return datetime.strptime(date_string, "%b %d, %Y").strftime("%Y-%m-%d") + + def build_request_headers(jwt_client): """ Building headers to authenticate with the Reporting API. @@ -14,7 +25,7 @@ def build_request_headers(jwt_client): def add_metric_container_to_report_description( - rep_desc, dimensions, breakdown_item_ids, metrics + rep_desc, dimensions, metrics, breakdown_item_ids ): """ Filling the metricContainer section of a report description: @@ -88,4 +99,8 @@ def parse_response(response, metrics, parent_dim_parsed): dimension: row["value"], **parsed_row_metrics, } + parsed_row = { + k: (format_date(v) if k == "daterangeday" else v) + for k, v in parsed_row.items() + } yield parsed_row diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index b6deb053..25b05d75 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -1,6 +1,6 @@ +import logging import click import json -import logging import datetime import requests import time @@ -37,8 +37,8 @@ @click.option("--adobe-date-start", required=True, type=click.DateTime()) @click.option("--adobe-date-stop", required=True, type=click.DateTime()) @click.option("--adobe-report-suite-id", required=True) -@click.option("--adobe-dimensions", required=True, multiple=True) -@click.option("--adobe-metrics", required=True, multiple=True) +@click.option("--adobe-dimension", required=True, multiple=True) +@click.option("--adobe-metric", required=True, multiple=True) @processor( "adobe_api_key", "adobe_tech_account_id", @@ -63,10 +63,10 @@ def __init__( date_start, date_stop, report_suite_id, - dimensions, - metrics, + dimension, + metric, ): - # We should probably define a method to create the jwt_client within the NewAdobeReader + # JWT authentification will be changed to OAth authentification self.jwt_client = JWTClient( api_key, tech_account_id, @@ -78,14 +78,15 @@ def __init__( self.date_start = date_start self.date_stop = date_stop + datetime.timedelta(days=1) self.report_suite_id = report_suite_id - self.dimensions = dimensions - self.metrics = metrics + self.dimensions = list(dimension) + self.metrics = list(metric) + self.ingestion_tracker = [] self.node_values = {} def build_date_range(self): return f"{self.date_start.strftime(DATEFORMAT)}/{self.date_stop.strftime(DATEFORMAT)}" - def build_report_description(self, breakdown_item_ids, metrics): + def build_report_description(self, metrics, breakdown_item_ids=[]): """ Building a report description, to be passed as a parameter to the Reporting API. Documentation: @@ -102,59 +103,61 @@ def build_report_description(self, breakdown_item_ids, metrics): "dimension": "variables/{}".format( self.dimensions[len(breakdown_item_ids)] ), - "settings": {"countRepeatInstances": "true", "limit": "500"}, + "settings": {"countRepeatInstances": "true", "limit": "5000"}, } rep_desc = add_metric_container_to_report_description( rep_desc=rep_desc, dimensions=self.dimensions, - breakdown_item_ids=breakdown_item_ids, metrics=metrics, + breakdown_item_ids=breakdown_item_ids, ) return rep_desc - def get_report_page(self, rep_desc, page_nb=0): + def throttle(self): """ - Getting a single report page, and returning it into a raw JSON format. + Monitoring API rate limit (12 requests every 6 seconds). """ - global tracker - - # Pause if API rate limit is enforced (12 requests every 6 seconds) current_time = time.time() - tracker.append(current_time) - tracker_over_window = [ - t for t in tracker if t >= (current_time - API_WINDOW_DURATION) + self.ingestion_tracker.append(current_time) + window_ingestion_tracker = [ + t + for t in self.ingestion_tracker + if t >= (current_time - API_WINDOW_DURATION) ] - if len(tracker_over_window) >= API_REQUESTS_OVER_WINDOW_LIMIT: - sleep_time = tracker_over_window[0] + API_WINDOW_DURATION - current_time + if len(window_ingestion_tracker) >= API_REQUESTS_OVER_WINDOW_LIMIT: + sleep_time = ( + window_ingestion_tracker[0] + API_WINDOW_DURATION - current_time + ) logging.warning( - "Throttling activated: sleeping for {} seconds...".format(sleep_time) + f"Throttling activated: sleeping for {sleep_time} seconds..." ) time.sleep(sleep_time) - # Make request + def get_report_page(self, rep_desc, page_nb=0): + """ + Getting a single report page, and returning it into a raw JSON format. + """ + self.throttle() rep_desc["settings"]["page"] = page_nb - report_available = False + # As throttling failed occasionnaly, we had to include a back-up check + report_available = False while not report_available: response = requests.post( - "https://analytics.adobe.io/api/{}/reports".format( - self.jwt_client.global_company_id - ), + f"https://analytics.adobe.io/api/{self.jwt_client.global_company_id}/reports", headers=build_request_headers(self.jwt_client), data=json.dumps(rep_desc), ).json() if response.get("message") == "Too many requests": logging.warning( - "Throttling activated: sleeping for {} seconds...".format( - API_WINDOW_DURATION - ) + f"Throttling activated: sleeping for {API_WINDOW_DURATION} seconds..." ) time.sleep(API_WINDOW_DURATION) else: @@ -162,7 +165,7 @@ def get_report_page(self, rep_desc, page_nb=0): return response - def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed): + def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): """ Iterating over report pages, parsing them, and returning a list of iterators, containing dictonnary-formatted records: {dimension: value, metric: value} @@ -194,7 +197,7 @@ def get_node_values(self, breakdown_item_ids): """ rep_desc = self.build_report_description( - breakdown_item_ids=breakdown_item_ids, metrics=["visits"] + metrics=["visits"], breakdown_item_ids=breakdown_item_ids ) first_response = self.get_report_page(rep_desc) node_values = get_node_values_from_response(first_response) @@ -230,41 +233,46 @@ def add_child_nodes_to_graph(self, graph, node, path_to_node): def result_generator(self, data): yield from data + def read_one_dimension(self): + """ + If the requests includes only one dimension, it can be made straight away. + """ + + rep_desc = self.build_report_description(self.metrics) + data = self.get_parsed_report(rep_desc, self.metrics) + yield from self.result_generator(data) + def read_through_graph(self, graph=None, node=None): """ - Exploring Adobe graph using a DFS (Deep-First-Search) algorithm. + If the request includes more than one dimension, it can be made + by exploring Adobe graph with a DFS (Deep-First-Search) algorithm. """ global visited global path_to_node - global tracker - if graph: + if not graph: + # Create graph and add first level of nodes + graph, node, path_to_node, visited = {}, "root", [], [] + graph = self.add_child_nodes_to_graph(graph, node, path_to_node) + else: # If remaining node children to explore: add node children to graph if len(path_to_node) < len(self.dimensions) - 1: - graph = self.add_child_nodes_to_graph(graph, node, path_to_node) # If no remaining node children to explore: get report if len(path_to_node) == len(self.dimensions) - 1: - parent_dim_parsed = { node.split("_")[0]: self.node_values[node] for node in path_to_node } breakdown_item_ids = get_item_ids_from_nodes(path_to_node) rep_desc = self.build_report_description( - breakdown_item_ids, self.metrics + self.metrics, breakdown_item_ids ) data = self.get_parsed_report(rep_desc, self.metrics, parent_dim_parsed) - yield from self.result_generator(data) - else: - # Create graph and add first level of nodes - graph, node, path_to_node, visited, tracker = {}, "root", [], [], [] - graph = self.add_child_nodes_to_graph(graph, node, path_to_node) - # Add node to visited if node not in visited: visited.append(node) @@ -274,7 +282,7 @@ def read_through_graph(self, graph=None, node=None): child_node for child_node in graph[node] if child_node not in visited ] - # Read through node children + # Read through child node children for child_node in unvisited_childs: path_to_node.append(child_node) yield from self.read_through_graph(graph=graph, node=child_node) @@ -286,4 +294,12 @@ def read_through_graph(self, graph=None, node=None): visited = [n for n in visited if n not in graph[local_root_node]] def read(self): - yield JSONStream("adobe_results", self.read_through_graph()) + + if len(self.dimensions) == 1: + yield JSONStream( + "results_" + self.report_suite_id, self.read_one_dimension() + ) + elif len(self.dimensions) > 1: + yield JSONStream( + "results_" + self.report_suite_id, self.read_through_graph() + ) From 81db24acbfed1f657dac35ac30dca6e45588178f Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 18 May 2020 10:42:50 +0200 Subject: [PATCH 23/46] Adding tests --- tests/readers/test_adobe_reader_2_0.py | 337 +++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 tests/readers/test_adobe_reader_2_0.py diff --git a/tests/readers/test_adobe_reader_2_0.py b/tests/readers/test_adobe_reader_2_0.py new file mode 100644 index 00000000..a324d9a4 --- /dev/null +++ b/tests/readers/test_adobe_reader_2_0.py @@ -0,0 +1,337 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from nck.readers.adobe_reader_2_0 import AdobeReader_2_0 +from unittest import TestCase, mock + +import datetime + + +class AdobeReaderTest_2_0(TestCase): + + kwargs = { + "api_key": "", + "tech_account_id": "", + "org_id": "", + "client_secret": "", + "metascopes": "", + "private_key_path": "", + "date_start": datetime.date(2020, 1, 1), + "date_stop": datetime.date(2020, 1, 2), + "report_suite_id": "XXXXXXXXX", + "dimension": [], + "metric": [], + } + + @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + def test_build_date_range(self, mock_jwt_client): + output = AdobeReader_2_0(**self.kwargs).build_date_range() + expected = "2020-01-01T00:00:00/2020-01-03T00:00:00" + self.assertEqual(output, expected) + + @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + def test_build_report_description_one_dimension(self, mock_jwt_client): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"dimension": ["daterangeday"]}) + metrics = ["visits", "bounces"] + + output = AdobeReader_2_0(**temp_kwargs).build_report_description(metrics) + expected = { + "rsid": "XXXXXXXXX", + "globalFilters": [ + { + "type": "dateRange", + "dateRange": "2020-01-01T00:00:00/2020-01-03T00:00:00", + } + ], + "metricContainer": { + "metricFilters": [], + "metrics": [ + {"id": "metrics/visits", "filters": []}, + {"id": "metrics/bounces", "filters": []}, + ], + }, + "dimension": "variables/daterangeday", + "settings": {"countRepeatInstances": "true", "limit": "500"}, + } + self.assertEqual(output, expected) + + @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + def test_build_report_description_multiple_dimensions(self, mock_jwt_client): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update({"dimension": ["daterangeday", "campaign", "pagename"]}) + metrics = ["visits", "bounces"] + breakdown_item_ids = ["000000000", "111111111"] + + output = AdobeReader_2_0(**temp_kwargs).build_report_description( + metrics, breakdown_item_ids + ) + expected = { + "rsid": "XXXXXXXXX", + "globalFilters": [ + { + "type": "dateRange", + "dateRange": "2020-01-01T00:00:00/2020-01-03T00:00:00", + } + ], + "metricContainer": { + "metricFilters": [ + { + "id": 0, + "type": "breakdown", + "dimension": "variables/daterangeday", + "itemId": "000000000", + }, + { + "id": 1, + "type": "breakdown", + "dimension": "variables/campaign", + "itemId": "111111111", + }, + { + "id": 2, + "type": "breakdown", + "dimension": "variables/daterangeday", + "itemId": "000000000", + }, + { + "id": 3, + "type": "breakdown", + "dimension": "variables/campaign", + "itemId": "111111111", + }, + ], + "metrics": [ + {"id": "metrics/visits", "filters": [0, 1]}, + {"id": "metrics/bounces", "filters": [2, 3]}, + ], + }, + "dimension": "variables/pagename", + "settings": {"countRepeatInstances": "true", "limit": "500"}, + } + self.assertEqual(output, expected) + + @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_report_page", + side_effect=[ + { + "totalPages": 2, + "firstPage": True, + "lastPage": False, + "columns": {"dimension": {"id": "variables/daterangeday"}}, + "rows": [ + {"itemId": "1200201", "value": "Jan 1, 2020", "data": [11, 21]}, + {"itemId": "1200202", "value": "Jan 2, 2020", "data": [12, 22]}, + ], + }, + { + "totalPages": 2, + "firstPage": False, + "lastPage": True, + "columns": {"dimension": {"id": "variables/daterangeday"}}, + "rows": [ + {"itemId": "1200203", "value": "Jan 3, 2020", "data": [13, 23]}, + {"itemId": "1200204", "value": "Jan 4, 2020", "data": [14, 24]}, + ], + }, + ], + ) + def test_get_parsed_report(self, mock_jwt_client, mock_get_report_page): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + { + "dimension": ["daterangeday"], + "date_start": datetime.date(2020, 1, 1), + "date_stop": datetime.date(2020, 1, 4), + } + ) + metrics = ["visits", "bounces"] + + output = AdobeReader_2_0(**temp_kwargs).get_parsed_report({}, metrics) + expected = [ + {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, + {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, + {"daterangeday": "2020-01-03", "visits": 13, "bounces": 23}, + {"daterangeday": "2020-01-04", "visits": 14, "bounces": 24}, + ] + for output_record, expected_record in zip(output, expected): + self.assertEqual(output_record, expected_record) + + @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_node_values", + return_value={ + "lasttouchchannel_1": "Paid Search", + "lasttouchchannel_2": "Natural_Search", + }, + ) + def test_add_child_nodes_to_graph(self, mock_jwt_client, mock_get_node_values): + graph = { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": [], + "daterangeday_1200202": [], + } + node = "daterangeday_1200201" + path_to_node = ["daterangeday_1200201"] + + output = AdobeReader_2_0(**self.kwargs).add_child_nodes_to_graph( + graph, node, path_to_node + ) + expected = { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": ["lasttouchchannel_1", "lasttouchchannel_2"], + "daterangeday_1200202": [], + "lasttouchchannel_1": [], + "lasttouchchannel_2": [], + } + self.assertEqual(output, expected) + + @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_parsed_report", + return_value=[ + {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, + {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, + ], + ) + def test_read_one_dimension_reports(self, mock_jwt_client, mock_get_parsed_report): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + {"dimension": ["daterangeday"], "metric": ["visits", "bounces"]} + ) + + output = next(AdobeReader_2_0(**temp_kwargs).read()) + expected = [ + {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, + {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, + ] + for output_record, expected_output in zip(output.readlines(), iter(expected)): + self.assertEqual(output_record, expected_output) + + @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.add_child_nodes_to_graph", + side_effect=[ + { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": [], + "daterangeday_1200202": [], + }, + { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": ["lasttouchchannel_1"], + "daterangeday_1200202": [], + "lasttouchchannel_1": [], + }, + { + "root": ["daterangeday_1200201", "daterangeday_1200202"], + "daterangeday_1200201": ["lasttouchchannel_1"], + "daterangeday_1200202": ["lasttouchchannel_2"], + "lasttouchchannel_1": [], + "lasttouchchannel_2": [], + }, + ], + ) + @mock.patch( + "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_parsed_report", + side_effect=[ + [ + { + "daterangeday": "2020-01-01", + "lastouchchannel": "Paid Search", + "campaign": "Campaign_1", + "visits": 11, + "bounces": 21, + }, + { + "daterangeday": "2020-01-01", + "lastouchchannel": "Paid Search", + "campaign": "Campaign_2", + "visits": 12, + "bounces": 22, + }, + ], + [ + { + "daterangeday": "2020-01-02", + "lastouchchannel": "Natural Search", + "campaign": "Campaign_1", + "visits": 13, + "bounces": 23, + }, + { + "daterangeday": "2020-01-02", + "lastouchchannel": "Natural Search", + "campaign": "Campaign_2", + "visits": 14, + "bounces": 24, + }, + ], + ], + ) + def test_read_multiple_dimension_reports( + self, mock_jwt_client, mock_add_child_nodes_to_graph, mock_get_parsed_report + ): + temp_kwargs = self.kwargs.copy() + temp_kwargs.update( + { + "dimension": ["daterangeday", "lastouchchannel", "campaign"], + "metric": ["visits", "bounces"], + } + ) + reader = AdobeReader_2_0(**temp_kwargs) + reader.node_values = { + "daterangeday_1200201": "Jan 1, 2020", + "daterangeday_1200202": "Jan 2, 2020", + "lasttouchchannel_1": "Paid Search", + "lasttouchchannel_2": "Natural Search", + } + output = next(reader.read()) + expected = [ + { + "daterangeday": "2020-01-01", + "lastouchchannel": "Paid Search", + "campaign": "Campaign_1", + "visits": 11, + "bounces": 21, + }, + { + "daterangeday": "2020-01-01", + "lastouchchannel": "Paid Search", + "campaign": "Campaign_2", + "visits": 12, + "bounces": 22, + }, + { + "daterangeday": "2020-01-02", + "lastouchchannel": "Natural Search", + "campaign": "Campaign_1", + "visits": 13, + "bounces": 23, + }, + { + "daterangeday": "2020-01-02", + "lastouchchannel": "Natural Search", + "campaign": "Campaign_2", + "visits": 14, + "bounces": 24, + }, + ] + for output_record, expected_record in zip(output.readlines(), iter(expected)): + self.assertEqual(output_record, expected_record) From c8567fed98cfa3ca2b1159b9f68b178b4a486933 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 18 May 2020 17:59:07 +0200 Subject: [PATCH 24/46] Minor fixes --- nck/clients/adobe_client.py | 22 +++++++++--- nck/readers/adobe_reader_2_0.py | 47 +++++++++++++++++++------- tests/readers/test_adobe_reader_2_0.py | 8 ++--- 3 files changed, 56 insertions(+), 21 deletions(-) diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py index e5ad1d15..3ed5406f 100644 --- a/nck/clients/adobe_client.py +++ b/nck/clients/adobe_client.py @@ -1,8 +1,20 @@ -# PRELIMINARY STEPS TO FOLLOW TO GET JWT CREDENTIALS -# - Get developper access to Adobe Analytics: -# https://helpx.adobe.com/enterprise/using/manage-developers.html -# - Create an integration to Adobe Analytics on Adobe I/O Console: -# https://console.adobe.io/ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging import datetime diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index 25b05d75..189832df 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -1,3 +1,21 @@ +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + import logging import click import json @@ -34,11 +52,11 @@ @click.option("--adobe-client-secret", required=True) @click.option("--adobe-metascopes", required=True) @click.option("--adobe-private-key-path", required=True) -@click.option("--adobe-date-start", required=True, type=click.DateTime()) -@click.option("--adobe-date-stop", required=True, type=click.DateTime()) @click.option("--adobe-report-suite-id", required=True) @click.option("--adobe-dimension", required=True, multiple=True) @click.option("--adobe-metric", required=True, multiple=True) +@click.option("--adobe-start-date", required=True, type=click.DateTime()) +@click.option("--adobe-end-date", required=True, type=click.DateTime()) @processor( "adobe_api_key", "adobe_tech_account_id", @@ -60,11 +78,11 @@ def __init__( client_secret, metascopes, private_key_path, - date_start, - date_stop, report_suite_id, dimension, metric, + start_date, + end_date, ): # JWT authentification will be changed to OAth authentification self.jwt_client = JWTClient( @@ -75,16 +93,16 @@ def __init__( metascopes, private_key_path, ) - self.date_start = date_start - self.date_stop = date_stop + datetime.timedelta(days=1) self.report_suite_id = report_suite_id self.dimensions = list(dimension) self.metrics = list(metric) + self.start_date = start_date + self.end_date = end_date + datetime.timedelta(days=1) self.ingestion_tracker = [] self.node_values = {} def build_date_range(self): - return f"{self.date_start.strftime(DATEFORMAT)}/{self.date_stop.strftime(DATEFORMAT)}" + return f"{self.start_date.strftime(DATEFORMAT)}/{self.end_date.strftime(DATEFORMAT)}" def build_report_description(self, metrics, breakdown_item_ids=[]): """ @@ -100,9 +118,7 @@ def build_report_description(self, metrics, breakdown_item_ids=[]): {"type": "dateRange", "dateRange": self.build_date_range()} ], "metricContainer": {}, - "dimension": "variables/{}".format( - self.dimensions[len(breakdown_item_ids)] - ), + "dimension": f"variables/{self.dimensions[len(breakdown_item_ids)]}", "settings": {"countRepeatInstances": "true", "limit": "5000"}, } @@ -145,7 +161,7 @@ def get_report_page(self, rep_desc, page_nb=0): self.throttle() rep_desc["settings"]["page"] = page_nb - # As throttling failed occasionnaly, we had to include a back-up check + # As throttling failed occasionnaly (with no obvious reason), we had to include a back-up check report_available = False while not report_available: @@ -175,7 +191,12 @@ def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): so that we can add their values to output records. """ - logging.info(f"Getting report: {rep_desc}") + report_info = { + "parent_dim": parent_dim_parsed, + "dim": rep_desc["dimension"].split("variables/")[1], + "metrics": metrics, + } + logging.info(f"Getting report: {report_info}") first_response = self.get_report_page(rep_desc) all_responses = [parse_response(first_response, metrics, parent_dim_parsed)] @@ -220,6 +241,8 @@ def add_child_nodes_to_graph(self, graph, node, path_to_node): child_node_2: [] """ + logging.info(f"Adding child nodes of '{node}' to graph.") + breakdown_item_ids = get_item_ids_from_nodes(path_to_node) child_node_values = self.get_node_values(breakdown_item_ids) self.node_values.update(child_node_values) diff --git a/tests/readers/test_adobe_reader_2_0.py b/tests/readers/test_adobe_reader_2_0.py index a324d9a4..3d31e062 100644 --- a/tests/readers/test_adobe_reader_2_0.py +++ b/tests/readers/test_adobe_reader_2_0.py @@ -31,11 +31,11 @@ class AdobeReaderTest_2_0(TestCase): "client_secret": "", "metascopes": "", "private_key_path": "", - "date_start": datetime.date(2020, 1, 1), - "date_stop": datetime.date(2020, 1, 2), "report_suite_id": "XXXXXXXXX", "dimension": [], "metric": [], + "start_date": datetime.date(2020, 1, 1), + "end_date": datetime.date(2020, 1, 2), } @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) @@ -157,8 +157,8 @@ def test_get_parsed_report(self, mock_jwt_client, mock_get_report_page): temp_kwargs.update( { "dimension": ["daterangeday"], - "date_start": datetime.date(2020, 1, 1), - "date_stop": datetime.date(2020, 1, 4), + "start_date": datetime.date(2020, 1, 1), + "end_date": datetime.date(2020, 1, 4), } ) metrics = ["visits", "bounces"] From e21cb3b9e2287bc06d5ed5106a8df6a53be6d0b5 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 18 May 2020 18:00:32 +0200 Subject: [PATCH 25/46] Updating Adobe documentation --- README.md | 3 +- nck/readers/README.md | 76 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 91f5dde7..a3d9f4ff 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,8 @@ Nautilus connectors kit is a tool which aim is getting raw data from different s - SalesForce - MySQL - Radarly -- Adobe Analytics 1.4 +- Adobe Analytics 1.4 +- Adobe Analytics 2.0 ### Writers diff --git a/nck/readers/README.md b/nck/readers/README.md index b34fc950..82c48b00 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -228,6 +228,82 @@ Detailed version [here](https://tech.yandex.com/direct/doc/reports/spec-docpage/ | `--yandex-date-start` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-stop` and `--yandex-date-range` set to `CUSTOM_DATE`. | | `--yandex-date-stop` | (Optional) Selects data on a specific period of time. Combined with `--yandex-date-start` and `--yandex-date-range` set to `CUSTOM_DATE`. | +## Adobe Analytics Readers + +As of May 2020 (last update of this section of the documentation), **two versions of Adobe Analytics Reporting API are coexisting: 1.4 and 2.0**. As some functionalities of API 1.4 have not been made available in API 2.0 yet (Data Warehouse reports in particular), our Adobe Analytics Readers are also available in these two versions. + +### Adobe Analytics Reader 1.4 + +#### How to obtain credentials + +Our Adobe Analytics Reader 1.4 uses the **WSSE authentification framework**. This authentification framework is now deprecated, so you won't be able to generate new WSSE authentification credentials (Username, Password) on Adobe Developper Console if you don't already have them. + +#### Quickstart + +Call example to Adobe Analytics Reader 1.4, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: + +``` +python nck/entrypoint.py read_adobe --adobe-username --adobe-password --adobe-report-suite-id --adobe-date-granularity day --adobe-report-element-id trackingcode --adobe-report-metric-id visits --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 write_console +``` + +#### Parameters + +|CLI option|Documentation| +|--|--| +|`--adobe-username`|Username used for WSSE authentification| +|`--adobe-password`|Password used for WSSE authentification| +|`--adobe-list-report-suite`|Should be set to *True* if you wish to request the list of available Adobe Report Suites (*default: False*). If set to True, the below parameters should be left empty.| +|`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| +|`--adobe-report-element-id`|ID of the element (i.e. dimension) to include in the report| +|`--adobe-report-metric-id`|ID of the metric to include in the report| +|`--adobe-date-granularity`|Granularity of the report. *Possible values: PREVIOUS_DAY, LAST_30_DAYS, LAST_7_DAYS, LAST_90_DAYS*| +|`--adobe-start-date`|Start date of the report (format: YYYY-MM-DD)| +|`--adobe-end-date`|End date of the report (format: YYYY-MM-DD)| + +#### Addtional information +- **The full list of available elements and metrics** can be retrieved with the [GetElements](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetElements.md) and [GetMetrics](https://github.com/AdobeDocs/analytics-1.4-apis/blob/master/docs/reporting-api/methods/r_GetMetrics.md) methods. +- **Adobe Analytics Reader 1.4 requests Data Warehouse reports** (the "source" parameter is set to "warehouse" in the report description), allowing it to efficiently process multiple-dimension requests. +- **If you need further information**, the documentation of Adobe APIs 1.4 can be found [here](https://github.com/AdobeDocs/analytics-1.4-apis). + +### Adobe Analytics Reader 2.0 + +#### How to obtain credentials + +Adobe Analytics Reader 2.0 uses the **JWT authentification framework**. +- Get developper access to Adobe Analytics (documentation can be found [here](https://helpx.adobe.com/enterprise/using/manage-developers.html)) +- Create a Service Account integration to Adobe Analytics on [Adobe Developper Console](https://console.adobe.io/) +- This integration will generate your JWT authentification credentials (API Key, Technical Account ID, Organization ID, Client Secret and Metascopes), to be passed to Adobe Analytics Reader 2.0. + +#### Quickstart + +Call example to Adobe Analytics Reader 2.0, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: + +``` +python nck/entrypoint.py read_adobe_2_0 --adobe-api-key --adobe-tech-account-id --adobe-org-id --adobe-client-secret --adobe-metascopes --adobe-private-key-path --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 --adobe-report-suite-id --adobe-dimension daterangeday --adobe-dimension campaign --adobe-metric visits write_console +``` + +#### Parameters + +|CLI option|Documentation| +|--|--| +|`--adobe-api-key`|API Key, generated after creating the Service Account Integration on Adobe Developper Console| +|`--adobe-tech-account-id`|Technical Account ID, generated after creating the Service Account Integration on Adobe Developper Console| +|`--adobe-org-id`|Organization ID, generated after creating the Service Account Integration on Adobe Developper Console| +|`--adobe-client-secret`|Client Secret, generated after creating the Service Account Integration on Adobe Developper Console| +|`--adobe-private-key-path`|Path to the private.key file, that you had to provide to create the Service Account Integration on Adobe Developper Console| +|`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| +|`--adobe-dimension`|Dimension to include in the report| +|`--adobe-metric`|Metric to include in the report| +|`--adobe-start-date`|Start date of the report (format: YYYY-MM-DD)| +|`--adobe-end-date`|End date of the report (format: YYYY-MM-DD)| + +#### Additional information + +- **In API 2.0, dimension and metric names are slightly different from API 1.4**. To get new metric and dimension names and reproduce the behavior Adobe Analytics UI as closely as possible, [enable the Debugger feature in Adobe Analytics Workspace](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md): it allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. +- **In API 2.0, the date granularity parameter was removed, and should now be handled as a dimension**: a request featuring `--adobe-dimension daterangeday` will produce a report with a day granularity. +- **API 2.0 does not feature Data Warehouse reports yet** (along with other features, that are indicated on the "Current limitations" section of [this page](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/migration-guide.md)). For this reason, if you wish to collect multiple-dimension reports, Adobe Analytics Reader 1.4 might be a more efficient solution in terms of processing time. +- **If you need any further information**, the documentation of Adobe APIs 2.0 can be found [here](https://github.com/AdobeDocs/analytics-2.0-apis). + ### Troubleshooting You encountered and you don't know what 's going on. You may find an answer in the troubleshooting guide below. From fd217d7a33af62721e12b0ecdbabcc4cb9cb3f9f Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 18 May 2020 18:31:19 +0200 Subject: [PATCH 26/46] Adding jwt to requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 71a60889..4a25a239 100644 --- a/requirements.txt +++ b/requirements.txt @@ -60,4 +60,5 @@ Unidecode==1.1.1 uritemplate==3.0.0 urllib3==1.25.7 Werkzeug==0.16.0 -googleads==22.0.0 \ No newline at end of file +googleads==22.0.0 +jwt==1.0.0 \ No newline at end of file From 320c9c619cd7e2abba21b63dec0b068a3d102423 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Mon, 18 May 2020 18:32:06 +0200 Subject: [PATCH 27/46] Fixing tests --- tests/readers/test_adobe_reader_2_0.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/readers/test_adobe_reader_2_0.py b/tests/readers/test_adobe_reader_2_0.py index 3d31e062..22d55897 100644 --- a/tests/readers/test_adobe_reader_2_0.py +++ b/tests/readers/test_adobe_reader_2_0.py @@ -67,7 +67,7 @@ def test_build_report_description_one_dimension(self, mock_jwt_client): ], }, "dimension": "variables/daterangeday", - "settings": {"countRepeatInstances": "true", "limit": "500"}, + "settings": {"countRepeatInstances": "true", "limit": "5000"}, } self.assertEqual(output, expected) @@ -122,7 +122,7 @@ def test_build_report_description_multiple_dimensions(self, mock_jwt_client): ], }, "dimension": "variables/pagename", - "settings": {"countRepeatInstances": "true", "limit": "500"}, + "settings": {"countRepeatInstances": "true", "limit": "5000"}, } self.assertEqual(output, expected) @@ -163,7 +163,9 @@ def test_get_parsed_report(self, mock_jwt_client, mock_get_report_page): ) metrics = ["visits", "bounces"] - output = AdobeReader_2_0(**temp_kwargs).get_parsed_report({}, metrics) + output = AdobeReader_2_0(**temp_kwargs).get_parsed_report( + {"dimension": "variables/daterangeday"}, metrics + ) expected = [ {"daterangeday": "2020-01-01", "visits": 11, "bounces": 21}, {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, From be5b306bf3cec32962e9338e6961930da0cd4542 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 19 May 2020 14:29:19 +0200 Subject: [PATCH 28/46] Improving authentification --- nck/clients/adobe_client.py | 46 +++++++++++++------------- nck/helpers/adobe_helper_2_0.py | 15 --------- nck/readers/README.md | 15 +++++---- nck/readers/adobe_reader_2_0.py | 33 +++++++++--------- tests/readers/test_adobe_reader_2_0.py | 36 ++++++++++---------- 5 files changed, 65 insertions(+), 80 deletions(-) diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py index 3ed5406f..7105d87a 100644 --- a/nck/clients/adobe_client.py +++ b/nck/clients/adobe_client.py @@ -29,27 +29,28 @@ logger = logging.getLogger() -class JWTClient: +class AdobeClient: """ - Following the steps described in this repo: + Create an Adobe Client for JWT Authentification, + following the steps described in this repo: https://github.com/AdobeDocs/analytics-2.0-apis/tree/master/examples/jwt/python """ def __init__( self, - api_key, + client_id, + client_secret, tech_account_id, org_id, - client_secret, - metascopes, private_key_path, + global_company_id, ): - self.api_key = api_key + self.client_id = client_id + self.client_secret = client_secret self.tech_account_id = tech_account_id self.org_id = org_id - self.client_secret = client_secret - self.metascopes = metascopes self.private_key_path = private_key_path + self.global_company_id = global_company_id # Creating jwt_token attribute logging.info("Getting jwt_token.") @@ -60,8 +61,8 @@ def __init__( "exp": datetime.datetime.utcnow() + datetime.timedelta(seconds=30), "iss": self.org_id, "sub": self.tech_account_id, - f"https://{IMS_HOST}/s/{self.metascopes}": True, - "aud": f"https://{IMS_HOST}/c/{self.api_key}", + f"https://{IMS_HOST}/s/ent_analytics_bulk_ingest_sdk": True, + "aud": f"https://{IMS_HOST}/c/{self.client_id}", }, private_key, algorithm="RS256", @@ -70,22 +71,21 @@ def __init__( # Creating access_token attribute logging.info("Getting access_token.") post_body = { - "client_id": self.api_key, + "client_id": self.client_id, "client_secret": self.client_secret, "jwt_token": self.jwt_token, } response = requests.post(IMS_EXCHANGE, data=post_body) self.access_token = response.json()["access_token"] - # Creating global_company_id attribute - logging.info("Getting global_company_id.") - response = requests.get( - DISCOVERY_URL, - headers={ - "Authorization": f"Bearer {self.access_token}", - "x-api-key": self.api_key, - }, - ) - self.global_company_id = ( - response.json().get("imsOrgs")[0].get("companies")[0].get("globalCompanyId") - ) + def build_request_headers(self): + """ + Build request headers to be used to interract with Adobe Analytics APIs 2.0. + """ + return { + "Accept": "application/json", + "Authorization": f"Bearer {self.access_token}", + "Content-Type": "application/json", + "x-api-key": self.client_id, + "x-proxy-global-company-id": self.global_company_id, + } diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py index ad4ce268..f612356c 100644 --- a/nck/helpers/adobe_helper_2_0.py +++ b/nck/helpers/adobe_helper_2_0.py @@ -9,21 +9,6 @@ def format_date(date_string): return datetime.strptime(date_string, "%b %d, %Y").strftime("%Y-%m-%d") -def build_request_headers(jwt_client): - """ - Building headers to authenticate with the Reporting API. - Input: JWTClient object - """ - - return { - "Accept": "application/json", - "Authorization": "Bearer {}".format(jwt_client.access_token), - "Content-Type": "application/json", - "x-api-key": jwt_client.api_key, - "x-proxy-global-company-id": jwt_client.global_company_id, - } - - def add_metric_container_to_report_description( rep_desc, dimensions, metrics, breakdown_item_ids ): diff --git a/nck/readers/README.md b/nck/readers/README.md index 6733397b..06b18d66 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -328,25 +328,26 @@ python nck/entrypoint.py read_adobe --adobe-username --adobe-passwor Adobe Analytics Reader 2.0 uses the **JWT authentification framework**. - Get developper access to Adobe Analytics (documentation can be found [here](https://helpx.adobe.com/enterprise/using/manage-developers.html)) - Create a Service Account integration to Adobe Analytics on [Adobe Developper Console](https://console.adobe.io/) -- This integration will generate your JWT authentification credentials (API Key, Technical Account ID, Organization ID, Client Secret and Metascopes), to be passed to Adobe Analytics Reader 2.0. +- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID and Organization ID) to retrieve your Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me': [example code](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/examples/jwt/python/ims_client.py)). All these parameters will be passed to Adobe Analytics Reader 2.0. #### Quickstart Call example to Adobe Analytics Reader 2.0, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: ``` -python nck/entrypoint.py read_adobe_2_0 --adobe-api-key --adobe-tech-account-id --adobe-org-id --adobe-client-secret --adobe-metascopes --adobe-private-key-path --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 --adobe-report-suite-id --adobe-dimension daterangeday --adobe-dimension campaign --adobe-metric visits write_console +python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-client-secret --adobe-tech-account-id --adobe-org-id --adobe-private-key-path --adobe-global-company-id --adobe-report-suite-id --adobe-dimension daterangeday --adobe-dimension campaign --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 --adobe-metric visits write_console ``` #### Parameters |CLI option|Documentation| |--|--| -|`--adobe-api-key`|API Key, generated after creating the Service Account Integration on Adobe Developper Console| -|`--adobe-tech-account-id`|Technical Account ID, generated after creating the Service Account Integration on Adobe Developper Console| -|`--adobe-org-id`|Organization ID, generated after creating the Service Account Integration on Adobe Developper Console| -|`--adobe-client-secret`|Client Secret, generated after creating the Service Account Integration on Adobe Developper Console| -|`--adobe-private-key-path`|Path to the private.key file, that you had to provide to create the Service Account Integration on Adobe Developper Console| +|`--adobe-client-id`|Client ID, that you can find in the integration section on Adobe Developper Console| +|`--adobe-client-secret`|Client Secret, that you can find in the integration section on Adobe Developper Console| +|`--adobe-tech-account-id`|Technical Account ID, that you can find in the integration section on Adobe Developper Console| +|`--adobe-org-id`|Organization ID, that you can find in the integration section on Adobe Developper Console| +|`--adobe-private-key-path`|Path to the private.key file, that you had to provide to create the integration| +|`--adobe-global-company-id`|Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me')| |`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| |`--adobe-dimension`|Dimension to include in the report| |`--adobe-metric`|Metric to include in the report| diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index 189832df..527ac0eb 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -27,10 +27,9 @@ from nck.utils.args import extract_args from nck.commands.command import processor from nck.readers.reader import Reader -from nck.clients.adobe_client import JWTClient +from nck.clients.adobe_client import AdobeClient from nck.streams.json_stream import JSONStream from nck.helpers.adobe_helper_2_0 import ( - build_request_headers, add_metric_container_to_report_description, get_node_values_from_response, get_item_ids_from_nodes, @@ -46,23 +45,22 @@ @click.command(name="read_adobe_2_0") -@click.option("--adobe-api-key", required=True) +@click.option("--adobe-client-id", required=True) +@click.option("--adobe-client-secret", required=True) @click.option("--adobe-tech-account-id", required=True) @click.option("--adobe-org-id", required=True) -@click.option("--adobe-client-secret", required=True) -@click.option("--adobe-metascopes", required=True) @click.option("--adobe-private-key-path", required=True) +@click.option("--adobe-global-company-id", required=True) @click.option("--adobe-report-suite-id", required=True) @click.option("--adobe-dimension", required=True, multiple=True) @click.option("--adobe-metric", required=True, multiple=True) @click.option("--adobe-start-date", required=True, type=click.DateTime()) @click.option("--adobe-end-date", required=True, type=click.DateTime()) @processor( - "adobe_api_key", + "adobe_client_id", + "adobe_client_secret", "adobe_tech_account_id", "adobe_org_id", - "adobe_client_secret", - "adobe_metascopes", "adobe_private_key_path", ) def adobe_2_0(**kwargs): @@ -72,26 +70,25 @@ def adobe_2_0(**kwargs): class AdobeReader_2_0(Reader): def __init__( self, - api_key, + client_id, + client_secret, tech_account_id, org_id, - client_secret, - metascopes, private_key_path, + global_company_id, report_suite_id, dimension, metric, start_date, end_date, ): - # JWT authentification will be changed to OAth authentification - self.jwt_client = JWTClient( - api_key, + self.adobe_client = AdobeClient( + client_id, + client_secret, tech_account_id, org_id, - client_secret, - metascopes, private_key_path, + global_company_id, ) self.report_suite_id = report_suite_id self.dimensions = list(dimension) @@ -166,8 +163,8 @@ def get_report_page(self, rep_desc, page_nb=0): while not report_available: response = requests.post( - f"https://analytics.adobe.io/api/{self.jwt_client.global_company_id}/reports", - headers=build_request_headers(self.jwt_client), + f"https://analytics.adobe.io/api/{self.adobe_client.global_company_id}/reports", + headers=self.adobe_client.build_request_headers(), data=json.dumps(rep_desc), ).json() diff --git a/tests/readers/test_adobe_reader_2_0.py b/tests/readers/test_adobe_reader_2_0.py index 22d55897..306dcd1e 100644 --- a/tests/readers/test_adobe_reader_2_0.py +++ b/tests/readers/test_adobe_reader_2_0.py @@ -25,12 +25,12 @@ class AdobeReaderTest_2_0(TestCase): kwargs = { - "api_key": "", + "client_id": "", + "client_secret": "", "tech_account_id": "", "org_id": "", - "client_secret": "", - "metascopes": "", "private_key_path": "", + "global_company_id": "", "report_suite_id": "XXXXXXXXX", "dimension": [], "metric": [], @@ -38,14 +38,14 @@ class AdobeReaderTest_2_0(TestCase): "end_date": datetime.date(2020, 1, 2), } - @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) - def test_build_date_range(self, mock_jwt_client): + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + def test_build_date_range(self, mock_adobe_client): output = AdobeReader_2_0(**self.kwargs).build_date_range() expected = "2020-01-01T00:00:00/2020-01-03T00:00:00" self.assertEqual(output, expected) - @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) - def test_build_report_description_one_dimension(self, mock_jwt_client): + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + def test_build_report_description_one_dimension(self, mock_adobe_client): temp_kwargs = self.kwargs.copy() temp_kwargs.update({"dimension": ["daterangeday"]}) metrics = ["visits", "bounces"] @@ -71,8 +71,8 @@ def test_build_report_description_one_dimension(self, mock_jwt_client): } self.assertEqual(output, expected) - @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) - def test_build_report_description_multiple_dimensions(self, mock_jwt_client): + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) + def test_build_report_description_multiple_dimensions(self, mock_adobe_client): temp_kwargs = self.kwargs.copy() temp_kwargs.update({"dimension": ["daterangeday", "campaign", "pagename"]}) metrics = ["visits", "bounces"] @@ -126,7 +126,7 @@ def test_build_report_description_multiple_dimensions(self, mock_jwt_client): } self.assertEqual(output, expected) - @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) @mock.patch( "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_report_page", side_effect=[ @@ -152,7 +152,7 @@ def test_build_report_description_multiple_dimensions(self, mock_jwt_client): }, ], ) - def test_get_parsed_report(self, mock_jwt_client, mock_get_report_page): + def test_get_parsed_report(self, mock_adobe_client, mock_get_report_page): temp_kwargs = self.kwargs.copy() temp_kwargs.update( { @@ -175,7 +175,7 @@ def test_get_parsed_report(self, mock_jwt_client, mock_get_report_page): for output_record, expected_record in zip(output, expected): self.assertEqual(output_record, expected_record) - @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) @mock.patch( "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_node_values", return_value={ @@ -183,7 +183,7 @@ def test_get_parsed_report(self, mock_jwt_client, mock_get_report_page): "lasttouchchannel_2": "Natural_Search", }, ) - def test_add_child_nodes_to_graph(self, mock_jwt_client, mock_get_node_values): + def test_add_child_nodes_to_graph(self, mock_adobe_client, mock_get_node_values): graph = { "root": ["daterangeday_1200201", "daterangeday_1200202"], "daterangeday_1200201": [], @@ -204,7 +204,7 @@ def test_add_child_nodes_to_graph(self, mock_jwt_client, mock_get_node_values): } self.assertEqual(output, expected) - @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) @mock.patch( "nck.readers.adobe_reader_2_0.AdobeReader_2_0.get_parsed_report", return_value=[ @@ -212,7 +212,9 @@ def test_add_child_nodes_to_graph(self, mock_jwt_client, mock_get_node_values): {"daterangeday": "2020-01-02", "visits": 12, "bounces": 22}, ], ) - def test_read_one_dimension_reports(self, mock_jwt_client, mock_get_parsed_report): + def test_read_one_dimension_reports( + self, mock_adobe_client, mock_get_parsed_report + ): temp_kwargs = self.kwargs.copy() temp_kwargs.update( {"dimension": ["daterangeday"], "metric": ["visits", "bounces"]} @@ -226,7 +228,7 @@ def test_read_one_dimension_reports(self, mock_jwt_client, mock_get_parsed_repor for output_record, expected_output in zip(output.readlines(), iter(expected)): self.assertEqual(output_record, expected_output) - @mock.patch("nck.clients.adobe_client.JWTClient.__init__", return_value=None) + @mock.patch("nck.clients.adobe_client.AdobeClient.__init__", return_value=None) @mock.patch( "nck.readers.adobe_reader_2_0.AdobeReader_2_0.add_child_nodes_to_graph", side_effect=[ @@ -288,7 +290,7 @@ def test_read_one_dimension_reports(self, mock_jwt_client, mock_get_parsed_repor ], ) def test_read_multiple_dimension_reports( - self, mock_jwt_client, mock_add_child_nodes_to_graph, mock_get_parsed_report + self, mock_adobe_client, mock_add_child_nodes_to_graph, mock_get_parsed_report ): temp_kwargs = self.kwargs.copy() temp_kwargs.update( From cc8c92cb63ce680b5eb1bd6c1af6419e014c623e Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 19 May 2020 15:42:36 +0200 Subject: [PATCH 29/46] Minor authentification fixes --- nck/clients/adobe_client.py | 18 ++++++------------ nck/readers/adobe_reader_2_0.py | 12 ++++-------- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py index 7105d87a..550d4f1a 100644 --- a/nck/clients/adobe_client.py +++ b/nck/clients/adobe_client.py @@ -31,26 +31,20 @@ class AdobeClient: """ - Create an Adobe Client for JWT Authentification, - following the steps described in this repo: + Create an Adobe Client for JWT Authentification. + Doc: https://github.com/AdobeDocs/adobeio-auth/blob/stage/JWT/JWT.md + Most of the code is taken from this repo: https://github.com/AdobeDocs/analytics-2.0-apis/tree/master/examples/jwt/python """ def __init__( - self, - client_id, - client_secret, - tech_account_id, - org_id, - private_key_path, - global_company_id, + self, client_id, client_secret, tech_account_id, org_id, private_key_path, ): self.client_id = client_id self.client_secret = client_secret self.tech_account_id = tech_account_id self.org_id = org_id self.private_key_path = private_key_path - self.global_company_id = global_company_id # Creating jwt_token attribute logging.info("Getting jwt_token.") @@ -78,7 +72,7 @@ def __init__( response = requests.post(IMS_EXCHANGE, data=post_body) self.access_token = response.json()["access_token"] - def build_request_headers(self): + def build_request_headers(self, global_company_id): """ Build request headers to be used to interract with Adobe Analytics APIs 2.0. """ @@ -87,5 +81,5 @@ def build_request_headers(self): "Authorization": f"Bearer {self.access_token}", "Content-Type": "application/json", "x-api-key": self.client_id, - "x-proxy-global-company-id": self.global_company_id, + "x-proxy-global-company-id": global_company_id, } diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index 527ac0eb..2766b2f2 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -83,13 +83,9 @@ def __init__( end_date, ): self.adobe_client = AdobeClient( - client_id, - client_secret, - tech_account_id, - org_id, - private_key_path, - global_company_id, + client_id, client_secret, tech_account_id, org_id, private_key_path, ) + self.global_company_id = global_company_id self.report_suite_id = report_suite_id self.dimensions = list(dimension) self.metrics = list(metric) @@ -163,8 +159,8 @@ def get_report_page(self, rep_desc, page_nb=0): while not report_available: response = requests.post( - f"https://analytics.adobe.io/api/{self.adobe_client.global_company_id}/reports", - headers=self.adobe_client.build_request_headers(), + f"https://analytics.adobe.io/api/{self.global_company_id}/reports", + headers=self.adobe_client.build_request_headers(self.global_company_id), data=json.dumps(rep_desc), ).json() From 24e51bb14c652118e0465ee091a89d19f7a60a8b Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 21 May 2020 05:36:28 +0200 Subject: [PATCH 30/46] Minor fixes --- nck/helpers/adobe_helper_2_0.py | 39 +++++++++++++++++++++++---------- nck/readers/README.md | 20 ++++++++--------- nck/readers/adobe_reader_2_0.py | 26 +++++++--------------- 3 files changed, 45 insertions(+), 40 deletions(-) diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py index f612356c..d29d8005 100644 --- a/nck/helpers/adobe_helper_2_0.py +++ b/nck/helpers/adobe_helper_2_0.py @@ -1,12 +1,22 @@ -from datetime import datetime - +# GNU Lesser General Public License v3.0 only +# Copyright (C) 2020 Artefact +# licence-information@artefact.com +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 3 of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -def format_date(date_string): - """ - Input: "Jan 1, 2020" - Output: "2020-01-01" - """ - return datetime.strptime(date_string, "%b %d, %Y").strftime("%Y-%m-%d") +from datetime import datetime def add_metric_container_to_report_description( @@ -54,10 +64,7 @@ def get_node_values_from_response(response): values = [row["value"] for row in response["rows"]] item_ids = [row["itemId"] for row in response["rows"]] - return { - "{}_{}".format(name, item_id): value - for (item_id, value) in zip(item_ids, values) - } + return {f"{name}_{item_id}": value for (item_id, value) in zip(item_ids, values)} def get_item_ids_from_nodes(list_of_strings): @@ -69,6 +76,14 @@ def get_item_ids_from_nodes(list_of_strings): return [string.split("_")[1] for string in list_of_strings if string] +def format_date(date_string): + """ + Input: "Jan 1, 2020" + Output: "2020-01-01" + """ + return datetime.strptime(date_string, "%b %d, %Y").strftime("%Y-%m-%d") + + def parse_response(response, metrics, parent_dim_parsed): """ Parsing a raw JSON response into the following format: diff --git a/nck/readers/README.md b/nck/readers/README.md index 06b18d66..ff5d02d4 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -292,7 +292,7 @@ As of May 2020 (last update of this section of the documentation), **two version #### How to obtain credentials -Our Adobe Analytics Reader 1.4 uses the **WSSE authentification framework**. This authentification framework is now deprecated, so you won't be able to generate new WSSE authentification credentials (Username, Password) on Adobe Developper Console if you don't already have them. +Our Adobe Analytics Reader 1.4 uses the **WSSE authentication framework**. This authentication framework is now deprecated, so you won't be able to generate new WSSE authentication credentials (Username, Password) on Adobe Developper Console if you don't already have them. #### Quickstart @@ -306,9 +306,9 @@ python nck/entrypoint.py read_adobe --adobe-username --adobe-passwor |CLI option|Documentation| |--|--| -|`--adobe-username`|Username used for WSSE authentification| -|`--adobe-password`|Password used for WSSE authentification| -|`--adobe-list-report-suite`|Should be set to *True* if you wish to request the list of available Adobe Report Suites (*default: False*). If set to True, the below parameters should be left empty.| +|`--adobe-username`|Username used for WSSE authentication| +|`--adobe-password`|Password used for WSSE authentication| +|`--adobe-list-report-suite`|Should be set to *True* if you wish to request the list of available Adobe Report Suites (*default: False*). If set to *True*, the below parameters should be left empty.| |`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| |`--adobe-report-element-id`|ID of the element (i.e. dimension) to include in the report| |`--adobe-report-metric-id`|ID of the metric to include in the report| @@ -325,10 +325,10 @@ python nck/entrypoint.py read_adobe --adobe-username --adobe-passwor #### How to obtain credentials -Adobe Analytics Reader 2.0 uses the **JWT authentification framework**. +Adobe Analytics Reader 2.0 uses the **JWT authentication framework**. - Get developper access to Adobe Analytics (documentation can be found [here](https://helpx.adobe.com/enterprise/using/manage-developers.html)) - Create a Service Account integration to Adobe Analytics on [Adobe Developper Console](https://console.adobe.io/) -- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID and Organization ID) to retrieve your Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me': [example code](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/examples/jwt/python/ims_client.py)). All these parameters will be passed to Adobe Analytics Reader 2.0. +- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID, Organization ID and private.key file) to retrieve your Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me'). All these parameters will be passed to Adobe Analytics Reader 2.0. #### Quickstart @@ -342,10 +342,10 @@ python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-cl |CLI option|Documentation| |--|--| -|`--adobe-client-id`|Client ID, that you can find in the integration section on Adobe Developper Console| -|`--adobe-client-secret`|Client Secret, that you can find in the integration section on Adobe Developper Console| -|`--adobe-tech-account-id`|Technical Account ID, that you can find in the integration section on Adobe Developper Console| -|`--adobe-org-id`|Organization ID, that you can find in the integration section on Adobe Developper Console| +|`--adobe-client-id`|Client ID, that you can find on Adobe Developper Console| +|`--adobe-client-secret`|Client Secret, that you can find on Adobe Developper Console| +|`--adobe-tech-account-id`|Technical Account ID, that you can find on Adobe Developper Console| +|`--adobe-org-id`|Organization ID, that you can find on Adobe Developper Console| |`--adobe-private-key-path`|Path to the private.key file, that you had to provide to create the integration| |`--adobe-global-company-id`|Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me')| |`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index 2766b2f2..093f0903 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -24,6 +24,7 @@ import time from itertools import chain +from nck.utils.retry import retry from nck.utils.args import extract_args from nck.commands.command import processor from nck.readers.reader import Reader @@ -146,6 +147,7 @@ def throttle(self): ) time.sleep(sleep_time) + @retry def get_report_page(self, rep_desc, page_nb=0): """ Getting a single report page, and returning it into a raw JSON format. @@ -154,25 +156,13 @@ def get_report_page(self, rep_desc, page_nb=0): self.throttle() rep_desc["settings"]["page"] = page_nb - # As throttling failed occasionnaly (with no obvious reason), we had to include a back-up check - report_available = False - while not report_available: - - response = requests.post( - f"https://analytics.adobe.io/api/{self.global_company_id}/reports", - headers=self.adobe_client.build_request_headers(self.global_company_id), - data=json.dumps(rep_desc), - ).json() - - if response.get("message") == "Too many requests": - logging.warning( - f"Throttling activated: sleeping for {API_WINDOW_DURATION} seconds..." - ) - time.sleep(API_WINDOW_DURATION) - else: - report_available = True + response = requests.post( + f"https://analytics.adobe.io/api/{self.global_company_id}/reports", + headers=self.adobe_client.build_request_headers(self.global_company_id), + data=json.dumps(rep_desc), + ) - return response + return response.json() def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): """ From bb265e9a9539041c11bc47d7ab59af2928609f78 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Tue, 26 May 2020 19:58:37 +0200 Subject: [PATCH 31/46] test auto publish NCK v1.0 --- .env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.env b/.env index 438ecf16..7c30bc2c 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ PROJECT_ID=artefact-docker-containers -DOCKER_IMAGE=nautilus-connector-kit -DOCKER_TAG=1.4.0 +DOCKER_IMAGE=nautilus-connectors-kit +DOCKER_TAG=v1.1 DOCKER_REGISTRY=eu.gcr.io From 7f680422b6adcff10004ab206586aae55f41e0f2 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Tue, 26 May 2020 21:41:03 +0200 Subject: [PATCH 32/46] test auto publish NCK v1.0 --- .github/workflows/buildtogcp.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/buildtogcp.yml b/.github/workflows/buildtogcp.yml index 602177d6..4ebf4ccf 100644 --- a/.github/workflows/buildtogcp.yml +++ b/.github/workflows/buildtogcp.yml @@ -34,13 +34,12 @@ on: # Environment variables available to all jobs and steps in this workflow env: - GCP_PROJECT: ${{ secrets.GCP_PROJECT }} GCP_EMAIL: ${{ secrets.GCP_EMAIL }} PROJECT_ID: ${{ secrets.PROJECT_ID }} DOCKER_TAG: ${{ github.run_id }} DOCKER_REGISTRY: ${{ secrets.DOCKER_REGISTRY }} DOCKER_IMAGE: ${{ secrets.DOCKER_IMAGE }}-${{ github.ref }} - + CLOUDSDK_PYTHON_SITEPACKAGES: 1 jobs: setup-build-publish: From 994b85f629c7ad676620b0a0c45be98a58fc00de Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Tue, 26 May 2020 21:45:35 +0200 Subject: [PATCH 33/46] test auto publish NCK v1.0 --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9c7782e3..93a8e207 100644 --- a/requirements.txt +++ b/requirements.txt @@ -60,4 +60,5 @@ Unidecode==1.1.1 uritemplate==3.0.0 urllib3==1.25.7 Werkzeug==0.16.0 -googleads==22.0.0 \ No newline at end of file +googleads==22.0.0 +pyOpenSSL==19.0.0 \ No newline at end of file From ee4c2c27c8a2434820fb2e7a500bf865b4070410 Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Tue, 26 May 2020 23:21:55 +0200 Subject: [PATCH 34/46] test auto publish NCK v1.0 --- .github/workflows/buildtogcp.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/buildtogcp.yml b/.github/workflows/buildtogcp.yml index 4ebf4ccf..6504815d 100644 --- a/.github/workflows/buildtogcp.yml +++ b/.github/workflows/buildtogcp.yml @@ -34,6 +34,7 @@ on: # Environment variables available to all jobs and steps in this workflow env: + GCP_PROJECT: ${{ secrets.GCP_PROJECT }} GCP_EMAIL: ${{ secrets.GCP_EMAIL }} PROJECT_ID: ${{ secrets.PROJECT_ID }} DOCKER_TAG: ${{ github.run_id }} @@ -45,7 +46,7 @@ jobs: setup-build-publish: if: github.event.pull_request.merged == true name: Setup, Build, Publish - runs-on: ubuntu-latest + runs-on: ubuntu-16.04 steps: - name: Checkout @@ -54,7 +55,7 @@ jobs: # Setup gcloud CLI - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master with: - version: '270.0.0' + version: '290.0.1' service_account_email: ${{ secrets.GCP_EMAIL }} service_account_key: ${{ secrets.GCP_KEY }} From c0863912598fc69d791dfffdb727486622e1035c Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 27 May 2020 00:03:14 +0200 Subject: [PATCH 35/46] test auto publish NCK v1.0 --- .github/workflows/buildtogcp.yml | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/.github/workflows/buildtogcp.yml b/.github/workflows/buildtogcp.yml index 6504815d..92ebf6b0 100644 --- a/.github/workflows/buildtogcp.yml +++ b/.github/workflows/buildtogcp.yml @@ -34,7 +34,6 @@ on: # Environment variables available to all jobs and steps in this workflow env: - GCP_PROJECT: ${{ secrets.GCP_PROJECT }} GCP_EMAIL: ${{ secrets.GCP_EMAIL }} PROJECT_ID: ${{ secrets.PROJECT_ID }} DOCKER_TAG: ${{ github.run_id }} @@ -46,12 +45,22 @@ jobs: setup-build-publish: if: github.event.pull_request.merged == true name: Setup, Build, Publish - runs-on: ubuntu-16.04 + runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v1 + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + # Setup gcloud CLI - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master with: From f162e3d6d19731573615bdb10149aa529dc921be Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 27 May 2020 00:13:24 +0200 Subject: [PATCH 36/46] final test auto publish NCK v1.0 --- .github/workflows/buildtogcp.yml | 10 ---------- requirements.txt | 3 +-- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/.github/workflows/buildtogcp.yml b/.github/workflows/buildtogcp.yml index 92ebf6b0..7b59a7e2 100644 --- a/.github/workflows/buildtogcp.yml +++ b/.github/workflows/buildtogcp.yml @@ -51,16 +51,6 @@ jobs: - name: Checkout uses: actions/checkout@v1 - - name: Set up Python 3.8 - uses: actions/setup-python@v1 - with: - python-version: 3.8 - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - # Setup gcloud CLI - uses: GoogleCloudPlatform/github-actions/setup-gcloud@master with: diff --git a/requirements.txt b/requirements.txt index 93a8e207..9c7782e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -60,5 +60,4 @@ Unidecode==1.1.1 uritemplate==3.0.0 urllib3==1.25.7 Werkzeug==0.16.0 -googleads==22.0.0 -pyOpenSSL==19.0.0 \ No newline at end of file +googleads==22.0.0 \ No newline at end of file From cedfee4e76fda1ea79a185f8206d0969c22db2d6 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Wed, 27 May 2020 18:38:08 +0200 Subject: [PATCH 37/46] Replacing private_key_path param by private_key --- nck/clients/adobe_client.py | 16 ++--- nck/readers/README.md | 6 +- nck/readers/adobe_reader_2_0.py | 97 +++++++++++++++++++++----- requirements.txt | 2 +- tests/readers/test_adobe_reader_2_0.py | 2 +- 5 files changed, 93 insertions(+), 30 deletions(-) diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py index 550d4f1a..a3c69d92 100644 --- a/nck/clients/adobe_client.py +++ b/nck/clients/adobe_client.py @@ -17,13 +17,14 @@ # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. import logging -import datetime +from datetime import datetime, timedelta import requests import jwt +from nck.utils.retry import retry + IMS_HOST = "ims-na1.adobelogin.com" IMS_EXCHANGE = "https://ims-na1.adobelogin.com/ims/exchange/jwt" -DISCOVERY_URL = "https://analytics.adobe.io/discovery/me" logging.basicConfig(level="INFO") logger = logging.getLogger() @@ -37,28 +38,27 @@ class AdobeClient: https://github.com/AdobeDocs/analytics-2.0-apis/tree/master/examples/jwt/python """ + @retry def __init__( - self, client_id, client_secret, tech_account_id, org_id, private_key_path, + self, client_id, client_secret, tech_account_id, org_id, private_key, ): self.client_id = client_id self.client_secret = client_secret self.tech_account_id = tech_account_id self.org_id = org_id - self.private_key_path = private_key_path + self.private_key = private_key # Creating jwt_token attribute logging.info("Getting jwt_token.") - with open(self.private_key_path, "r") as file: - private_key = file.read() self.jwt_token = jwt.encode( { - "exp": datetime.datetime.utcnow() + datetime.timedelta(seconds=30), + "exp": datetime.utcnow() + timedelta(seconds=30), "iss": self.org_id, "sub": self.tech_account_id, f"https://{IMS_HOST}/s/ent_analytics_bulk_ingest_sdk": True, "aud": f"https://{IMS_HOST}/c/{self.client_id}", }, - private_key, + self.private_key, algorithm="RS256", ) diff --git a/nck/readers/README.md b/nck/readers/README.md index ff5d02d4..9262f2e0 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -335,7 +335,7 @@ Adobe Analytics Reader 2.0 uses the **JWT authentication framework**. Call example to Adobe Analytics Reader 2.0, getting the number of visits per day and tracking code for a specified Report Suite, between 2020-01-01 and 2020-01-31: ``` -python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-client-secret --adobe-tech-account-id --adobe-org-id --adobe-private-key-path --adobe-global-company-id --adobe-report-suite-id --adobe-dimension daterangeday --adobe-dimension campaign --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 --adobe-metric visits write_console +python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-client-secret --adobe-tech-account-id --adobe-org-id --adobe-private-key --adobe-global-company-id --adobe-report-suite-id --adobe-dimension daterangeday --adobe-dimension campaign --adobe-start-date 2020-01-01 --adobe-end-date 2020-01-31 --adobe-metric visits write_console ``` #### Parameters @@ -346,7 +346,7 @@ python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-cl |`--adobe-client-secret`|Client Secret, that you can find on Adobe Developper Console| |`--adobe-tech-account-id`|Technical Account ID, that you can find on Adobe Developper Console| |`--adobe-org-id`|Organization ID, that you can find on Adobe Developper Console| -|`--adobe-private-key-path`|Path to the private.key file, that you had to provide to create the integration| +|`--adobe-private-key-path`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| |`--adobe-global-company-id`|Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me')| |`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| |`--adobe-dimension`|Dimension to include in the report| @@ -356,7 +356,7 @@ python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-cl #### Additional information -- **In API 2.0, dimension and metric names are slightly different from API 1.4**. To get new metric and dimension names and reproduce the behavior Adobe Analytics UI as closely as possible, [enable the Debugger feature in Adobe Analytics Workspace](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md): it allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. +- **In API 2.0, dimension and metric names are slightly different from API 1.4**. To get new metric and dimension names and reproduce the behavior of Adobe Analytics UI as closely as possible, [enable the Debugger feature in Adobe Analytics Workspace](https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md): it allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. - **In API 2.0, the date granularity parameter was removed, and should now be handled as a dimension**: a request featuring `--adobe-dimension daterangeday` will produce a report with a day granularity. - **API 2.0 does not feature Data Warehouse reports yet** (along with other features, that are indicated on the "Current limitations" section of [this page](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/migration-guide.md)). For this reason, if you wish to collect multiple-dimension reports, Adobe Analytics Reader 1.4 might be a more efficient solution in terms of processing time. - **If you need any further information**, the documentation of Adobe APIs 2.0 can be found [here](https://github.com/AdobeDocs/analytics-2.0-apis). diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index 093f0903..f8ef2cc2 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -19,10 +19,10 @@ import logging import click import json -import datetime import requests import time from itertools import chain +from datetime import timedelta from nck.utils.retry import retry from nck.utils.args import extract_args @@ -45,24 +45,86 @@ logger = logging.getLogger() +def format_key_if_needed(ctx, param, value): + """ + In some cases, newlines are escaped when passed as a click.option(). + This callback corrects this unexpected behaviour. + """ + return value.replace("\\n", "\n") + + @click.command(name="read_adobe_2_0") -@click.option("--adobe-client-id", required=True) -@click.option("--adobe-client-secret", required=True) -@click.option("--adobe-tech-account-id", required=True) -@click.option("--adobe-org-id", required=True) -@click.option("--adobe-private-key-path", required=True) -@click.option("--adobe-global-company-id", required=True) -@click.option("--adobe-report-suite-id", required=True) -@click.option("--adobe-dimension", required=True, multiple=True) -@click.option("--adobe-metric", required=True, multiple=True) -@click.option("--adobe-start-date", required=True, type=click.DateTime()) -@click.option("--adobe-end-date", required=True, type=click.DateTime()) +@click.option( + "--adobe-client-id", + required=True, + help="Client ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-client-secret", + required=True, + help="Client Secret, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-tech-account-id", + required=True, + help="Technical Account ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-org-id", + required=True, + help="Organization ID, that you can find in your integration section on Adobe Developper Console.", +) +@click.option( + "--adobe-private-key", + required=True, + callback=format_key_if_needed, + help="Content of the private.key file, that you had to provide to create the integration. " + "Make sure to enter the parameter in quotes, include headers, and indicate newlines as '\\n'.", +) +@click.option( + "--adobe-global-company-id", + required=True, + help="Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me')", +) +@click.option( + "--adobe-report-suite-id", + required=True, + help="ID of the requested Adobe Report Suite", +) +@click.option( + "--adobe-dimension", + required=True, + multiple=True, + help="To get dimension names, enable the Debugger feature in Adobe Analytics Workspace: " + "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " + "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", +) +@click.option( + "--adobe-metric", + required=True, + multiple=True, + help="To get metric names, enable the Debugger feature in Adobe Analytics Workspace: " + "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " + "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", +) +@click.option( + "--adobe-start-date", + required=True, + type=click.DateTime(), + help="Start date of the report", +) +@click.option( + "--adobe-end-date", + required=True, + type=click.DateTime(), + help="End date of the report", +) @processor( "adobe_client_id", "adobe_client_secret", "adobe_tech_account_id", "adobe_org_id", - "adobe_private_key_path", + "adobe_private_key", ) def adobe_2_0(**kwargs): return AdobeReader_2_0(**extract_args("adobe_", kwargs)) @@ -75,7 +137,7 @@ def __init__( client_secret, tech_account_id, org_id, - private_key_path, + private_key, global_company_id, report_suite_id, dimension, @@ -84,14 +146,14 @@ def __init__( end_date, ): self.adobe_client = AdobeClient( - client_id, client_secret, tech_account_id, org_id, private_key_path, + client_id, client_secret, tech_account_id, org_id, private_key, ) self.global_company_id = global_company_id self.report_suite_id = report_suite_id self.dimensions = list(dimension) self.metrics = list(metric) self.start_date = start_date - self.end_date = end_date + datetime.timedelta(days=1) + self.end_date = end_date + timedelta(days=1) self.ingestion_tracker = [] self.node_values = {} @@ -147,7 +209,6 @@ def throttle(self): ) time.sleep(sleep_time) - @retry def get_report_page(self, rep_desc, page_nb=0): """ Getting a single report page, and returning it into a raw JSON format. @@ -164,6 +225,7 @@ def get_report_page(self, rep_desc, page_nb=0): return response.json() + @retry def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): """ Iterating over report pages, parsing them, and returning a list of iterators, @@ -193,6 +255,7 @@ def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): return chain(*all_responses) + @retry def get_node_values(self, breakdown_item_ids): """ Extracting dimension values from a full report response (all pages), diff --git a/requirements.txt b/requirements.txt index 4a25a239..02b7f20b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,4 +61,4 @@ uritemplate==3.0.0 urllib3==1.25.7 Werkzeug==0.16.0 googleads==22.0.0 -jwt==1.0.0 \ No newline at end of file +pyjwt==1.7.1 \ No newline at end of file diff --git a/tests/readers/test_adobe_reader_2_0.py b/tests/readers/test_adobe_reader_2_0.py index 306dcd1e..e953227a 100644 --- a/tests/readers/test_adobe_reader_2_0.py +++ b/tests/readers/test_adobe_reader_2_0.py @@ -29,7 +29,7 @@ class AdobeReaderTest_2_0(TestCase): "client_secret": "", "tech_account_id": "", "org_id": "", - "private_key_path": "", + "private_key": "", "global_company_id": "", "report_suite_id": "XXXXXXXXX", "dimension": [], From aba1fd847cb1b8ba3814dcdf6f6534e6fbef610a Mon Sep 17 00:00:00 2001 From: Vivien MORLET Date: Wed, 27 May 2020 18:48:26 +0200 Subject: [PATCH 38/46] retry to get access token --- nck/clients/adobe_client.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/nck/clients/adobe_client.py b/nck/clients/adobe_client.py index a3c69d92..359fcc94 100644 --- a/nck/clients/adobe_client.py +++ b/nck/clients/adobe_client.py @@ -20,8 +20,7 @@ from datetime import datetime, timedelta import requests import jwt - -from nck.utils.retry import retry +from tenacity import retry, wait_exponential, stop_after_delay IMS_HOST = "ims-na1.adobelogin.com" IMS_EXCHANGE = "https://ims-na1.adobelogin.com/ims/exchange/jwt" @@ -38,10 +37,7 @@ class AdobeClient: https://github.com/AdobeDocs/analytics-2.0-apis/tree/master/examples/jwt/python """ - @retry - def __init__( - self, client_id, client_secret, tech_account_id, org_id, private_key, - ): + def __init__(self, client_id, client_secret, tech_account_id, org_id, private_key): self.client_id = client_id self.client_secret = client_secret self.tech_account_id = tech_account_id @@ -64,13 +60,13 @@ def __init__( # Creating access_token attribute logging.info("Getting access_token.") - post_body = { - "client_id": self.client_id, - "client_secret": self.client_secret, - "jwt_token": self.jwt_token, - } + self.access_token = self.get_access_token() + + @retry(wait=wait_exponential(multiplier=60, min=60, max=1200), stop=stop_after_delay(3600)) + def get_access_token(self): + post_body = {"client_id": self.client_id, "client_secret": self.client_secret, "jwt_token": self.jwt_token} response = requests.post(IMS_EXCHANGE, data=post_body) - self.access_token = response.json()["access_token"] + return response.json()["access_token"] def build_request_headers(self, global_company_id): """ From e31160688f99d00399e31a1cb4e1956c07df2658 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 28 May 2020 09:22:04 +0200 Subject: [PATCH 39/46] Fix doc typo --- nck/readers/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index 9262f2e0..d2bd5878 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -346,7 +346,7 @@ python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-cl |`--adobe-client-secret`|Client Secret, that you can find on Adobe Developper Console| |`--adobe-tech-account-id`|Technical Account ID, that you can find on Adobe Developper Console| |`--adobe-org-id`|Organization ID, that you can find on Adobe Developper Console| -|`--adobe-private-key-path`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| +|`--adobe-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| |`--adobe-global-company-id`|Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me')| |`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| |`--adobe-dimension`|Dimension to include in the report| From 4d39ffc430ee126924417d982e84ad84878713c7 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Thu, 28 May 2020 12:02:55 +0200 Subject: [PATCH 40/46] Updating retry to handle API rate limit --- nck/helpers/adobe_helper_2_0.py | 10 ++++++++++ nck/readers/README.md | 4 ++-- nck/readers/adobe_reader_2_0.py | 14 +++++++++----- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/nck/helpers/adobe_helper_2_0.py b/nck/helpers/adobe_helper_2_0.py index d29d8005..621cf519 100644 --- a/nck/helpers/adobe_helper_2_0.py +++ b/nck/helpers/adobe_helper_2_0.py @@ -16,8 +16,18 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +import logging from datetime import datetime +logging.basicConfig(level="INFO") +logger = logging.getLogger() + + +class APIRateLimitError(Exception): + def __init__(self, message): + super().__init__(message) + logging.error(message) + def add_metric_container_to_report_description( rep_desc, dimensions, metrics, breakdown_item_ids diff --git a/nck/readers/README.md b/nck/readers/README.md index d2bd5878..023ecac6 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -328,7 +328,7 @@ python nck/entrypoint.py read_adobe --adobe-username --adobe-passwor Adobe Analytics Reader 2.0 uses the **JWT authentication framework**. - Get developper access to Adobe Analytics (documentation can be found [here](https://helpx.adobe.com/enterprise/using/manage-developers.html)) - Create a Service Account integration to Adobe Analytics on [Adobe Developper Console](https://console.adobe.io/) -- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID, Organization ID and private.key file) to retrieve your Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me'). All these parameters will be passed to Adobe Analytics Reader 2.0. +- Use the generated JWT credentials (Client ID, Client Secret, Technical Account ID, Organization ID and private.key file) to retrieve your Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md')). All these parameters will be passed to Adobe Analytics Reader 2.0. #### Quickstart @@ -347,7 +347,7 @@ python nck/entrypoint.py read_adobe_2_0 --adobe-client-id --adobe-cl |`--adobe-tech-account-id`|Technical Account ID, that you can find on Adobe Developper Console| |`--adobe-org-id`|Organization ID, that you can find on Adobe Developper Console| |`--adobe-private-key`|Content of the private.key file, that you had to provide to create the integration. Make sure to enter the parameter in quotes, include headers, and indicate newlines as \n.| -|`--adobe-global-company-id`|Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me')| +|`--adobe-global-company-id`|Global Company ID (to be requested to [Discovery API](https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md'))| |`--adobe-report-suite-id`|ID of the requested Adobe Report Suite| |`--adobe-dimension`|Dimension to include in the report| |`--adobe-metric`|Metric to include in the report| diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index f8ef2cc2..54145d56 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -31,6 +31,7 @@ from nck.clients.adobe_client import AdobeClient from nck.streams.json_stream import JSONStream from nck.helpers.adobe_helper_2_0 import ( + APIRateLimitError, add_metric_container_to_report_description, get_node_values_from_response, get_item_ids_from_nodes, @@ -84,7 +85,8 @@ def format_key_if_needed(ctx, param, value): @click.option( "--adobe-global-company-id", required=True, - help="Global Company ID (to be requested to 'https://analytics.adobe.io/discovery/me')", + help="Global Company ID, to be requested to Discovery API. " + "Doc: https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)", ) @click.option( "--adobe-report-suite-id", @@ -209,6 +211,7 @@ def throttle(self): ) time.sleep(sleep_time) + @retry def get_report_page(self, rep_desc, page_nb=0): """ Getting a single report page, and returning it into a raw JSON format. @@ -221,11 +224,13 @@ def get_report_page(self, rep_desc, page_nb=0): f"https://analytics.adobe.io/api/{self.global_company_id}/reports", headers=self.adobe_client.build_request_headers(self.global_company_id), data=json.dumps(rep_desc), - ) + ).json() - return response.json() + if response.get("message") == "Too many requests": + raise APIRateLimitError("API rate limit was exceeded.") + + return response - @retry def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): """ Iterating over report pages, parsing them, and returning a list of iterators, @@ -255,7 +260,6 @@ def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): return chain(*all_responses) - @retry def get_node_values(self, breakdown_item_ids): """ Extracting dimension values from a full report response (all pages), From 83682600d06b155ac14abb1144754dd90e55e1d2 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 2 Jun 2020 10:46:55 +0200 Subject: [PATCH 41/46] Fix Facebook doc --- nck/readers/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nck/readers/README.md b/nck/readers/README.md index 8213fc8d..2dd6c8ba 100644 --- a/nck/readers/README.md +++ b/nck/readers/README.md @@ -33,7 +33,7 @@ python nck/entrypoint.py read_facebook --facebook-access-token -- |`--facebook-app-id`|Facebook App ID. *Not mandatory if Facebook Access Token is provided.*| |`--facebook-app-secret`|Facebook App Secret. *Not mandatory if Facebook Access Token is provided.*| |`--facebook-access-token`|Facebook App Access Token.| -|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Supported values: creative (available only for Facebook Object Node requests), ad (default), adset, campaign, account.*| +|`--facebook-object-type`|Nature of the root Facebook Object used to make the request. *Supported values: creative (available only for Facebook Object Node requests), ad, adset, campaign, account (default).*| |`--facebook-object-id`|ID of the root Facebook Object used to make the request.| |`--facebook-level`|Granularity of the response. *Supported values: creative (available only for Facebook Object Node requests), ad (default), adset, campaign or account.*| |`--facebook-ad-insights`|*True* (default) if *Facebook Ad Insights* request, *False* if *Facebook Object Node* request.| From c389271eb6fa863ff719763d2754406f2779bebc Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 2 Jun 2020 12:12:34 +0200 Subject: [PATCH 42/46] Removing date restrictions for Facebook Object Node queries --- nck/readers/facebook_reader.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/nck/readers/facebook_reader.py b/nck/readers/facebook_reader.py index 3d6e6c69..b8501d29 100644 --- a/nck/readers/facebook_reader.py +++ b/nck/readers/facebook_reader.py @@ -218,14 +218,6 @@ def __init__( "Wrong query. Facebook Object Node queries do not accept Breakdowns nor Action Breakdowns." ) - if self.level not in ["campaign", "adset", "ad"] and ( - (self.start_date and self.end_date) or self.date_preset - ): - raise ClickException( - "Wrong query. Facebook Object Node queries only accept the time_range\ - and date_preset parameters at the 'campaign', 'adset' or 'ad' levels." - ) - if self.time_increment: raise ClickException( "Wrong query. Facebook Object Node queries do not accept the time_increment parameter." From 97d3f6b5fbd7251838a321ccb78599bfd756774c Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 2 Jun 2020 13:58:54 +0200 Subject: [PATCH 43/46] Fix Facebook tests --- tests/readers/test_facebook_reader.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/readers/test_facebook_reader.py b/tests/readers/test_facebook_reader.py index affdfa1e..f2b4a282 100644 --- a/tests/readers/test_facebook_reader.py +++ b/tests/readers/test_facebook_reader.py @@ -76,19 +76,6 @@ class FacebookReaderTest(TestCase): "action_breakdown_for_object_node_query", {"ad_insights": False, "action_breakdown": ["action_type"]}, ), - ( - "time_range_for_object_node_query", - { - "ad_insights": False, - "level": "account", - "start_date": "2020-01-01", - "end_date": "2020-01-01", - }, - ), - ( - "date_preset_for_object_node_query", - {"ad_insights": False, "level": "account", "date_preset": "last_30d"}, - ), ( "time_increment_for_object_node_query", {"ad_insights": False, "time_increment": "1"}, From fc0e6157cfed469ff0a31489733e60cb6da286c4 Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 2 Jun 2020 18:49:18 +0200 Subject: [PATCH 44/46] Modifying adobe_reader_2_0 argument names --- .env | 2 +- nck/readers/adobe_reader_2_0.py | 107 +++++++++----------------------- 2 files changed, 32 insertions(+), 77 deletions(-) diff --git a/.env b/.env index 7c30bc2c..2cf00bdb 100644 --- a/.env +++ b/.env @@ -1,4 +1,4 @@ PROJECT_ID=artefact-docker-containers -DOCKER_IMAGE=nautilus-connectors-kit +DOCKER_IMAGE=nautilus-connectors-kit-dev DOCKER_TAG=v1.1 DOCKER_REGISTRY=eu.gcr.io diff --git a/nck/readers/adobe_reader_2_0.py b/nck/readers/adobe_reader_2_0.py index 54145d56..8aa25ab0 100644 --- a/nck/readers/adobe_reader_2_0.py +++ b/nck/readers/adobe_reader_2_0.py @@ -56,45 +56,41 @@ def format_key_if_needed(ctx, param, value): @click.command(name="read_adobe_2_0") @click.option( - "--adobe-client-id", + "--adobe-2-0-client-id", required=True, help="Client ID, that you can find in your integration section on Adobe Developper Console.", ) @click.option( - "--adobe-client-secret", + "--adobe-2-0-client-secret", required=True, help="Client Secret, that you can find in your integration section on Adobe Developper Console.", ) @click.option( - "--adobe-tech-account-id", + "--adobe-2-0-tech-account-id", required=True, help="Technical Account ID, that you can find in your integration section on Adobe Developper Console.", ) @click.option( - "--adobe-org-id", + "--adobe-2-0-org-id", required=True, help="Organization ID, that you can find in your integration section on Adobe Developper Console.", ) @click.option( - "--adobe-private-key", + "--adobe-2-0-private-key", required=True, callback=format_key_if_needed, help="Content of the private.key file, that you had to provide to create the integration. " "Make sure to enter the parameter in quotes, include headers, and indicate newlines as '\\n'.", ) @click.option( - "--adobe-global-company-id", + "--adobe-2-0-global-company-id", required=True, help="Global Company ID, to be requested to Discovery API. " "Doc: https://www.adobe.io/apis/experiencecloud/analytics/docs.html#!AdobeDocs/analytics-2.0-apis/master/discovery.md)", ) +@click.option("--adobe-2-0-report-suite-id", required=True, help="ID of the requested Adobe Report Suite") @click.option( - "--adobe-report-suite-id", - required=True, - help="ID of the requested Adobe Report Suite", -) -@click.option( - "--adobe-dimension", + "--adobe-2-0-dimension", required=True, multiple=True, help="To get dimension names, enable the Debugger feature in Adobe Analytics Workspace: " @@ -102,34 +98,24 @@ def format_key_if_needed(ctx, param, value): "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", ) @click.option( - "--adobe-metric", + "--adobe-2-0-metric", required=True, multiple=True, help="To get metric names, enable the Debugger feature in Adobe Analytics Workspace: " "it will allow you to visualize the back-end JSON requests made by Adobe Analytics UI to Reporting API 2.0. " "Doc: https://github.com/AdobeDocs/analytics-2.0-apis/blob/master/reporting-tricks.md", ) -@click.option( - "--adobe-start-date", - required=True, - type=click.DateTime(), - help="Start date of the report", -) -@click.option( - "--adobe-end-date", - required=True, - type=click.DateTime(), - help="End date of the report", -) +@click.option("--adobe-2-0-start-date", required=True, type=click.DateTime(), help="Start date of the report") +@click.option("--adobe-2-0-end-date", required=True, type=click.DateTime(), help="End date of the report") @processor( - "adobe_client_id", - "adobe_client_secret", - "adobe_tech_account_id", - "adobe_org_id", - "adobe_private_key", + "adobe_2_0_client_id", + "adobe_2_0_client_secret", + "adobe_2_0_tech_account_id", + "adobe_2_0_org_id", + "adobe_2_0_private_key", ) def adobe_2_0(**kwargs): - return AdobeReader_2_0(**extract_args("adobe_", kwargs)) + return AdobeReader_2_0(**extract_args("adobe_2_0_", kwargs)) class AdobeReader_2_0(Reader): @@ -147,9 +133,7 @@ def __init__( start_date, end_date, ): - self.adobe_client = AdobeClient( - client_id, client_secret, tech_account_id, org_id, private_key, - ) + self.adobe_client = AdobeClient(client_id, client_secret, tech_account_id, org_id, private_key) self.global_company_id = global_company_id self.report_suite_id = report_suite_id self.dimensions = list(dimension) @@ -172,19 +156,14 @@ def build_report_description(self, metrics, breakdown_item_ids=[]): rep_desc = { "rsid": self.report_suite_id, - "globalFilters": [ - {"type": "dateRange", "dateRange": self.build_date_range()} - ], + "globalFilters": [{"type": "dateRange", "dateRange": self.build_date_range()}], "metricContainer": {}, "dimension": f"variables/{self.dimensions[len(breakdown_item_ids)]}", "settings": {"countRepeatInstances": "true", "limit": "5000"}, } rep_desc = add_metric_container_to_report_description( - rep_desc=rep_desc, - dimensions=self.dimensions, - metrics=metrics, - breakdown_item_ids=breakdown_item_ids, + rep_desc=rep_desc, dimensions=self.dimensions, metrics=metrics, breakdown_item_ids=breakdown_item_ids ) return rep_desc @@ -196,19 +175,11 @@ def throttle(self): current_time = time.time() self.ingestion_tracker.append(current_time) - window_ingestion_tracker = [ - t - for t in self.ingestion_tracker - if t >= (current_time - API_WINDOW_DURATION) - ] + window_ingestion_tracker = [t for t in self.ingestion_tracker if t >= (current_time - API_WINDOW_DURATION)] if len(window_ingestion_tracker) >= API_REQUESTS_OVER_WINDOW_LIMIT: - sleep_time = ( - window_ingestion_tracker[0] + API_WINDOW_DURATION - current_time - ) - logging.warning( - f"Throttling activated: sleeping for {sleep_time} seconds..." - ) + sleep_time = window_ingestion_tracker[0] + API_WINDOW_DURATION - current_time + logging.warning(f"Throttling activated: sleeping for {sleep_time} seconds...") time.sleep(sleep_time) @retry @@ -254,9 +225,7 @@ def get_parsed_report(self, rep_desc, metrics, parent_dim_parsed={}): if first_response["totalPages"] > 1: for page_nb in range(1, first_response["totalPages"]): next_response = self.get_report_page(rep_desc, page_nb) - all_responses += [ - parse_response(next_response, metrics, parent_dim_parsed) - ] + all_responses += [parse_response(next_response, metrics, parent_dim_parsed)] return chain(*all_responses) @@ -267,17 +236,13 @@ def get_node_values(self, breakdown_item_ids): For instance: {'daterangeday_1200001': 'Jan 1, 2020'} """ - rep_desc = self.build_report_description( - metrics=["visits"], breakdown_item_ids=breakdown_item_ids - ) + rep_desc = self.build_report_description(metrics=["visits"], breakdown_item_ids=breakdown_item_ids) first_response = self.get_report_page(rep_desc) node_values = get_node_values_from_response(first_response) if first_response["totalPages"] > 1: for page_nb in range(1, first_response["totalPages"]): - next_node_values = get_node_values_from_response( - self.get_report_page(rep_desc, page_nb) - ) + next_node_values = get_node_values_from_response(self.get_report_page(rep_desc, page_nb)) node_values.update(next_node_values) return node_values @@ -336,13 +301,9 @@ def read_through_graph(self, graph=None, node=None): # If no remaining node children to explore: get report if len(path_to_node) == len(self.dimensions) - 1: - parent_dim_parsed = { - node.split("_")[0]: self.node_values[node] for node in path_to_node - } + parent_dim_parsed = {node.split("_")[0]: self.node_values[node] for node in path_to_node} breakdown_item_ids = get_item_ids_from_nodes(path_to_node) - rep_desc = self.build_report_description( - self.metrics, breakdown_item_ids - ) + rep_desc = self.build_report_description(self.metrics, breakdown_item_ids) data = self.get_parsed_report(rep_desc, self.metrics, parent_dim_parsed) yield from self.result_generator(data) @@ -351,9 +312,7 @@ def read_through_graph(self, graph=None, node=None): visited.append(node) # Update unvisited_childs - unvisited_childs = [ - child_node for child_node in graph[node] if child_node not in visited - ] + unvisited_childs = [child_node for child_node in graph[node] if child_node not in visited] # Read through child node children for child_node in unvisited_childs: @@ -369,10 +328,6 @@ def read_through_graph(self, graph=None, node=None): def read(self): if len(self.dimensions) == 1: - yield JSONStream( - "results_" + self.report_suite_id, self.read_one_dimension() - ) + yield JSONStream("results_" + self.report_suite_id, self.read_one_dimension()) elif len(self.dimensions) > 1: - yield JSONStream( - "results_" + self.report_suite_id, self.read_through_graph() - ) + yield JSONStream("results_" + self.report_suite_id, self.read_through_graph()) From a16842600bb8e0bcb4f0bbeb05cb3b50c4b2947d Mon Sep 17 00:00:00 2001 From: gabrielleberanger Date: Tue, 2 Jun 2020 19:43:51 +0200 Subject: [PATCH 45/46] Adding cryptography to requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4bad88fa..e0bacf59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,4 +61,5 @@ uritemplate==3.0.0 urllib3==1.25.7 Werkzeug==0.16.0 googleads==22.0.0 -pyjwt==1.7.1 \ No newline at end of file +pyjwt==1.7.1 +cryptography==2.9 \ No newline at end of file From 53b0cab2c5ea9fa52574e42ac756be2de88f12d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20B=C3=A9ranger?= <56034720+gabrielleberanger@users.noreply.github.com> Date: Fri, 5 Jun 2020 16:41:23 +0200 Subject: [PATCH 46/46] Harmonizing General README file --- README.md | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 79ec33e8..e61b141c 100644 --- a/README.md +++ b/README.md @@ -6,24 +6,25 @@ Nautilus connectors kit is a tool which aim is getting raw data from different s ### Readers -- Google DoubleClick Manager (DBM / DV360) -- Google Campaign Manager (CM / DCM) -- Google Search Ads 360 (SA360) +- Adobe Analytics 1.4 +- Adobe Analytics 2.0 +- Amazon S3 +- Facebook Marketing +- Google Ads - Google Analytics -- Google Search Console -- Google Sheets - Google Cloud Storage -- Google Adwords +- Google Campaign Manager +- Google Display & Video 360 +- Google Search Ads 360 - Google Search Console -- Facebook Business Manager -- Amazon S3 +- Google Sheets - Oracle -- SalesForce - MySQL - Radarly -- Adobe Analytics 1.4 -- Adobe Analytics 2.0 -- Yandex +- SalesForce +- Twitter Ads +- Yandex Campaign +- Yandex Statistics ### Writers @@ -98,4 +99,4 @@ It is advised to do the following in a virtual env * https://manikos.github.io/a-tour-on-python-packaging * http://lucumr.pocoo.org/2014/1/27/python-on-wheels/ -* https://pip.readthedocs.io/en/1.4.1/cookbook.html#controlling-setup-requires \ No newline at end of file +* https://pip.readthedocs.io/en/1.4.1/cookbook.html#controlling-setup-requires