Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/usage reporting/add active users #6627

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions sql_generators/glean_usage/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
glean_app_ping_views,
metrics_clients_daily,
metrics_clients_last_seen,
usage_reporting_clients_daily,
usage_reporting_clients_first_seen,
usage_reporting_clients_last_seen,
usage_reporting_active_users_aggregates,
)
from sql_generators.glean_usage.common import get_app_info, list_tables

Expand All @@ -43,6 +47,10 @@
event_error_monitoring.EventErrorMonitoring(),
event_flow_monitoring.EventFlowMonitoring(),
events_stream.EventsStreamTable(),
usage_reporting_clients_daily.UsageReportingClientsDailyTable(),
usage_reporting_clients_first_seen.UsageReportingClientsFirstSeenTable(),
usage_reporting_clients_last_seen.UsageReportingClientsLastSeenTable(),
usage_reporting_active_users_aggregates.UsageReportingActiveUsersAggregatesTable(),
]


Expand Down Expand Up @@ -136,7 +144,7 @@ def get_tables(table_name="baseline_v1"):
not in ConfigLoader.get("generate", "glean_usage", "skip_apps", fallback=[])
]

id_token=get_id_token()
id_token = get_id_token()

# Prepare parameters so that generation of all Glean datasets can be done in parallel

Expand All @@ -151,7 +159,7 @@ def get_tables(table_name="baseline_v1"):
use_cloud_function=use_cloud_function,
app_info=app_info,
parallelism=parallelism,
id_token=id_token
id_token=id_token,
),
baseline_table,
)
Expand All @@ -169,7 +177,7 @@ def get_tables(table_name="baseline_v1"):
output_dir=output_dir,
use_cloud_function=use_cloud_function,
parallelism=parallelism,
id_token=id_token
id_token=id_token,
),
info,
)
Expand Down
17 changes: 14 additions & 3 deletions sql_generators/glean_usage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,17 @@ def table_names_from_baseline(baseline_table, include_project_id=True):
events_view=f"{prefix}.events",
events_stream_table=f"{prefix}_derived.events_stream_v1",
events_stream_view=f"{prefix}.events_stream",
dau_reporting_stable_table=f"{prefix}_stable.dau_reporting_v1",
usage_reporting_stable_table=f"{prefix}_stable.usage_reporting_v1",
usage_reporting_clients_daily_table=f"{prefix}_derived.usage_reporting_clients_daily_v1",
usage_reporting_clients_first_seen_table=f"{prefix}_derived.usage_reporting_clients_first_seen_v1",
usage_reporting_clients_last_seen_table=f"{prefix}_derived.usage_reporting_clients_last_seen_v1",
usage_reporting_active_users_aggregates_table=f"{prefix}_derived.usage_reporting_active_users_aggregates_v1",
usage_reporting_clients_daily_view=f"{prefix}.usage_reporting_clients_daily",
usage_reporting_clients_first_seen_view=f"{prefix}.usage_reporting_clients_first_seen",
usage_reporting_clients_last_seen_view=f"{prefix}.usage_reporting_clients_last_seen",
usage_reporting_active_users_view=f"{prefix}.usage_reporting_active_users",
usage_reporting_active_users_aggregates_view=f"{prefix}.usage_reporting_active_users_aggregates",
)


Expand Down Expand Up @@ -234,7 +245,7 @@ def generate_per_app_id(
use_cloud_function=True,
app_info=[],
parallelism=8,
id_token=None
id_token=None,
):
"""Generate the baseline table query per app_id."""
if not self.per_app_id_enabled:
Expand Down Expand Up @@ -268,7 +279,7 @@ def generate_per_app_id(
derived_dataset=derived_dataset,
app_name=app_name,
has_distribution_id=app_name in APPS_WITH_DISTRIBUTION_ID,
has_profile_group_id= app_name in APPS_WITH_PROFILE_GROUP_ID,
has_profile_group_id=app_name in APPS_WITH_PROFILE_GROUP_ID,
)

render_kwargs.update(self.custom_render_kwargs)
Expand Down Expand Up @@ -364,7 +375,7 @@ def generate_per_app(
output_dir=None,
use_cloud_function=True,
parallelism=8,
id_token=None
id_token=None,
):
"""Generate the baseline table query per app_name."""
if not self.per_app_enabled:
Expand Down
4 changes: 2 additions & 2 deletions sql_generators/glean_usage/templates/cross_channel.view.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ AS
UNION ALL
{% endif -%}
{% if app_name == "fenix" -%}
SELECT
SELECT
"{{ dataset }}" AS normalized_app_id,
* REPLACE(mozfun.norm.fenix_app_info("{{ dataset }}", app_build).channel AS normalized_channel),
{% else -%}
SELECT
SELECT
"{{ dataset }}" AS normalized_app_id,
* REPLACE("{{ channel }}" AS normalized_channel)
{% endif -%}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{ header_yaml }}
friendly_name: Usage Reporting Active_Users
A daily client aggregation view for usage_reporting ping. Merges the computations for client first seen
and last seen metrics

owners:
- gkatre@mozilla.com
labels: {}
bigquery: null
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:dataops-managed/taar
- workgroup:mozilla-confidential
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
fields:
- mode: NULLABLE
name: submission_date
type: DATE
description: |
Logical date used for processing and paritioning.

- mode: NULLABLE
name: usage_profile_id
type: STRING
description:

# - mode: NULLABLE
# name: first_run_date
# type: DATE
# description: |
# The date of the first run of the application.

- mode: NULLABLE
name: normalized_channel
type: STRING
description: |
The channel the application is being distributed on.

- mode: NULLABLE
name: normalized_os
type: STRING
description: |
The name of the operating system.

- mode: NULLABLE
name: normalized_os_version
type: STRING
description: |
The user-visible version of the operating system (e.g. "1.2.3").
If the version detection fails, this metric gets set to Unknown.

# - mode: NULLABLE
# name: locale
# type: STRING
# description: |
# The locale of the application during initialization (e.g. "es-ES").
# If the locale can't be determined on the system, the value is "und", to indicate "undetermined".

- mode: NULLABLE
name: normalized_country_code
type: STRING
description: |
Country code

# - mode: NULLABLE
# name: app_build
# type: STRING
# description: |
# The build identifier generated by the CI system (e.g. "1234/A").
# If the value was not provided through configuration, this metric gets set to Unknown.

# - mode: NULLABLE
# name: app_display_version
# type: STRING
# description: |
# The user visible version string (e.g. "1.0.3").
# If the value was not provided through configuration, this metric gets set to Unknown.

- mode: NULLABLE
name: distribution_id
type: STRING
description: |
A string containing the distribution identifier. This was used to identify installs
from Mozilla Online, but now also identifies partnership deal distributions.

- mode: NULLABLE
name: is_active
type: BOOLEAN
description: |
A flag field indicating whether the specific client was active.

- mode: NULLABLE
name: first_seen_date
type: DATE
description: |
Logical date of when we observed the client for the first time in our warehouse.

- mode: NULLABLE
name: days_seen_bits
type: INTEGER
description: |
Bit field shows on which of the last 28 days a client sent us the usage_reporting ping.

- mode: NULLABLE
name: days_active_bits
type: INTEGER
description: |
Bit field shows on which of the last 28 days a client fulfilled the active criteria.

- mode: NULLABLE
name: days_created_profile_bits
type: INTEGER
description: |
bit field indicating how many days lapsed since profile creation.

- mode: NULLABLE
name: activity_segment
type: STRING
description: |
categorizing activity days into segments

- mode: NULLABLE
name: is_dau
type: BOOLEAN
description: |
A flag field indicating whether the specific client was active on the submission_date.

- mode: NULLABLE
name: is_wau
type: BOOLEAN
description: |
A flag field indicating whether the specific client was active on any of the 7 days prior to the submission_date.

- mode: NULLABLE
name: is_mau
type: BOOLEAN
description: |
A flag field indicating whether the specific client was active on any of the 28 days prior to the submission_date.

- mode: NULLABLE
name: is_daily_user
type: BOOLEAN
description: |
A flag field indicating whether the specific client sent the dau_reporting ping on the submission_date.

- mode: NULLABLE
name: is_weekly_user
type: BOOLEAN
description: |
A flag field indicating whether the specific client sent the dau_reporting ping on any of the 7 days prior to the submission_date.

- mode: NULLABLE
name: is_monthly_user
type: BOOLEAN
description: |
A flag field indicating whether the specific client sent the dau_reporting ping on any of the 28 days prior to the
submission_date.
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{{ header }}
CREATE OR REPLACE VIEW
`{{ project_id }}.{{ usage_reporting_active_users_view }}`
AS
SELECT
submission_date,
usage_profile_id,
-- first_run_date,
normalized_channel,
normalized_os,
normalized_os_version,
-- locale,
normalized_country_code,
-- app_build,
-- app_display_version,
distribution_id,
is_active,
first_seen_date,
days_seen_bits,
days_active_bits,
days_created_profile_bits,
CASE
WHEN BIT_COUNT(days_active_bits)
BETWEEN 1
AND 6
THEN 'infrequent_user'
WHEN BIT_COUNT(days_active_bits)
BETWEEN 7
AND 13
THEN 'casual_user'
WHEN BIT_COUNT(days_active_bits)
BETWEEN 14
AND 20
THEN 'regular_user'
WHEN BIT_COUNT(days_active_bits) >= 21
THEN 'core_user'
ELSE 'other'
END AS activity_segment,
IFNULL(mozfun.bits28.days_since_seen(days_active_bits) = 0, FALSE) AS is_dau,
IFNULL(mozfun.bits28.days_since_seen(days_active_bits) < 7, FALSE) AS is_wau,
IFNULL(mozfun.bits28.days_since_seen(days_active_bits) < 28, FALSE) AS is_mau,
IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) = 0, FALSE) AS is_daily_user,
IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) < 7, FALSE) AS is_weekly_user,
IFNULL(mozfun.bits28.days_since_seen(days_seen_bits) < 28, FALSE) AS is_monthly_user

--
-- TODO: uncomment once duration is added to the usage_reporting ping
--
-- -- Bit patterns capturing activity dates relative to the submission date.
-- days_seen_session_start_bits,
-- days_seen_session_end_bits,
--

-- -- TODO: verify if these fields are needed
-- app_version,
-- country,
-- city,
-- locale,
-- os,
-- windows_build_number,
-- scalar_parent_browser_engagement_total_uri_count_normal_and_private_mode_sum,
-- scalar_parent_browser_engagement_total_uri_count_sum,
-- is_default_browser,
-- isp_name,
-- CASE
-- WHEN isp_name = 'BrowserStack'
-- THEN CONCAT('Firefox Desktop', ' ', isp_name)
-- WHEN distribution_id = 'MozillaOnline'
-- THEN CONCAT('Firefox Desktop', ' ', distribution_id)
-- ELSE 'Firefox Desktop'
-- END AS app_name,
-- IF(
-- LOWER(IFNULL(isp_name, '')) <> "browserstack"
-- AND LOWER(IFNULL(distribution_id, '')) <> "mozillaonline",
-- TRUE,
-- FALSE
-- ) AS is_desktop


FROM
`{{ usage_reporting_clients_daily_table }}`
LEFT JOIN
`{{ usage_reporting_clients_first_seen_table }}`
USING (usage_profile_id)
LEFT JOIN
`{{ usage_reporting_clients_last_seen_table }}`
USING (usage_profile_id)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{ header_yaml }}
friendly_name: Usage Reporting Active Users Aggregates
description: |-
A daily aggregate of the usage_reporting ping representing user activity.

owners:
- gkatre@mozilla.com
labels: {}
bigquery: null
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:dataops-managed/taar
- workgroup:mozilla-confidential
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{{ header }}
CREATE OR REPLACE VIEW
`{{ project_id }}.{{ usage_reporting_active_users_aggregates_view }}`
AS
SELECT
*
FROM
`{{ project_id }}.{{ usage_reporting_active_users_aggregates_table }}`
Loading