til.duyet.net
  • 🤪Today I Learned
  • Data Engineering
    • ☁️AWS
      • Make an NVMe volume available for use on AWS EC2
      • AWS EMR
        • S3 Dist CP
        • Manage services
        • EMR - Tez
        • Issues
    • ⚒️Tools
    • 💻Shell
      • MacOS's Touch ID on Terminal
      • Using `sed` to find and replace in file
      • Merging contents of multiple .csv files into single .csv file
    • env from ConfigMap or Secrets
    • ☸️Kubernetes, Helm, Kustomize
      • initContainer to download file to pod
      • Kustomize: omission of resources
      • K8S: Services and Labels
      • K8S: PVC stuck in status “Terminating”
      • K8S: Port Forward
      • K8S: Pull an Image from a Private Registry
      • Happy helm
      • Helm: render manifest locally
      • Helm: Public Helm chart repository with GitHub Pages
    • 🔄Apache Airflow
      • Extend official Docker image
      • Generate offline SQL upgrade script
      • Airflow in Docker
      • Mastering Airflow UI
      • Best Practices for Airflow and ETLs
      • Airflow in Docker Compose
      • Useful SQL queries for Apache Airflow
    • 🐳Docker
      • "Distroless" Docker Images
      • Docker cleanup
      • Optimize the Docker Image Size
      • The best Docker base image for Python
  • Database
    • Google BigQuery
      • BigQuery Cancel Running Query
      • BigQuery - Split string and get the first part
      • BigQuery - UNNEST in SELECT
      • Bigquery - Sample queries for audiences based
      • BigQuery cookbook for Google Analytics Exported Data
    • Apache Hive
      • Hive - SHOW schemas/tables/create
    • AWS Redshift / Postgres
      • Amazon Redshift Utilities
      • Postgres - Index Summary
      • Postgres - List tables
      • Redshift - GRANT
      • Redshift - tables and their owners
      • Redshift - Check the table size
    • Presto
      • Aliyun Data Lake Analytics (Presto) - Add partition on non-existing location
      • AWS Athena - Add Partition
  • Programming
    • 🍪Rust
      • cheats.rs
      • Imperative vs Declarative
      • Generate Struct from JSON
    • 🐍Python
      • YAML config file with environment variables
      • date_range_generator
      • get_all_s3_keys
      • Pipenv
    • 👻Golang
      • Functions and Methods in Go?
      • Convert JSON to Go struct
    • 💎Javascript / Typescript
      • Intl.ListFormat
    • ✨FE / React
      • Beautiful icons, images, ..
      • Flexbox
      • Create hook to inject JS script
  • Unix
    • Git - Pretty git branch graphs
    • Checking files in Docker build context
    • Bash get the directory of the current script
    • Vim
    • Find and replace
  • Miscellaneous
    • [Fig] Single machine and distributed system structure
    • Deploying Machine Learning Models at Scale
    • Bypass a Chrome certificate/HSTS error
    • Articles
Powered by GitBook
On this page
  • N-day active users
  • N-day inactive users
  • Frequently active users
  • Highly active users
  • Acquired users
  • Cohorts with filters

Was this helpful?

Edit on Git
  1. Database
  2. Google BigQuery

Bigquery - Sample queries for audiences based

These queries return the number of users in the audience. If you'd like to get the list of user IDs in the audience instead, then remove the outermost COUNT() function, e.g., COUNT(DISTINCT user_id) -

PreviousBigQuery - UNNEST in SELECTNextBigQuery cookbook for Google Analytics Exported Data

Last updated 4 years ago

Was this helpful?

Source:

N-day active users

/**
 * Builds an audience of N-Day Active Users.
 *
 * N-day active users = users who have logged at least one user_engagement
 * event in the last N days.
*/
SELECT
  COUNT(DISTINCT user_id) AS n_day_active_users_count
FROM
  -- PLEASE REPLACE WITH YOUR TABLE NAME.
  `YOUR_TABLE.events_*`
WHERE
  event_name = 'user_engagement'
  -- Pick events in the last N = 20 days.
  AND event_timestamp >
      UNIX_MICROS(TIMESTAMP_SUB(CURRENT_TIMESTAMP, INTERVAL 20 DAY))
  -- PLEASE REPLACE WITH YOUR DESIRED DATE RANGE.
  AND _TABLE_SUFFIX BETWEEN '20180521' AND '20240131';

N-day inactive users

/**
 * Builds an audience of N-Day Inactive Users.
 *
 * N-Day inactive users = users in the last M days who have not logged a
 *   user_engagement event in the last N days where M > N.
 */
SELECT
  COUNT(DISTINCT MDaysUsers.user_id) AS n_day_inactive_users_count
FROM
  (
    SELECT
      user_id
    FROM
      /* PLEASE REPLACE WITH YOUR TABLE NAME */
      `YOUR_TABLE.events_*`
    WHERE
      event_name = 'user_engagement'
      /* Has engaged in last M = 7 days */
      AND event_timestamp >
          UNIX_MICROS(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY))
      /* PLEASE REPLACE WITH YOUR DESIRED DATE RANGE */
      AND _TABLE_SUFFIX BETWEEN '20180521' AND '20240131'
  ) AS MDaysUsers
-- EXCEPT ALL is not yet implemented in BigQuery. Use LEFT JOIN in the interim.
LEFT JOIN
  (
    SELECT
      user_id
    FROM
      /* PLEASE REPLACE WITH YOUR TABLE NAME */
      `YOUR_TABLE.events_*`
    WHERE
      event_name = 'user_engagement'
      /* Has engaged in last N = 2 days */
      AND event_timestamp >
          UNIX_MICROS(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 2 DAY))
      /* PLEASE REPLACE WITH YOUR DESIRED DATE RANGE */
      AND _TABLE_SUFFIX BETWEEN '20180521' AND '20240131'
  ) AS NDaysUsers
  ON MDaysUsers.user_id = NDaysUsers.user_id
WHERE
  NDaysUsers.user_id IS NULL;

Frequently active users

/**
 * Builds an audience of Frequently Active Users.
 *
 * Frequently Active Users = users who have logged at least once
 * 'user_engagement' event on N of the last M days where M > N.
 */
SELECT
  COUNT(DISTINCT user_id) AS frequent_active_users_count
FROM
  (
    SELECT
      user_id,
      COUNT(DISTINCT event_date)
    FROM
      -- PLEASE REPLACE WITH YOUR TABLE NAME.
      `YOUR_TABLE.events_*`
    WHERE
      event_name = 'user_engagement'
      -- User engagement in the last M = 10 days.
      AND event_timestamp >
          UNIX_MICROS(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 10 DAY))
      -- PLEASE REPLACE YOUR DESIRED DATE RANGE.  For optimal performance
      -- the _TABLE_SUFFIX range should match the INTERVAL value above.
      AND _TABLE_SUFFIX BETWEEN '20180521' AND '20240131'
    GROUP BY 1
    -- Having engaged in at least N = 4 days.
    HAVING COUNT(event_date) >= 4
  );

Highly active users

/**
 * Builds an audience of Highly Active Users.
 *
 * Highly Active Users = users who have been active for more than N minutes
 * in the last M days where M > N.
*/
SELECT
  COUNT(DISTINCT user_id) AS high_active_users_count
FROM
  (
    SELECT
      user_id,
      event_params.key,
      SUM(event_params.value.int_value)
    FROM
      -- PLEASE REPLACE WITH YOUR TABLE NAME.
      `YOUR_TABLE.events_*` AS T
    CROSS JOIN
      T.event_params
    WHERE
      event_name = 'user_engagement'
      -- User engagement in the last M = 10 days.
      AND event_timestamp >
          UNIX_MICROS(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 10 DAY))
      AND event_params.key = 'engagement_time_msec'
      -- PLEASE REPLACE YOUR DESIRED DATE RANGE.
      AND _TABLE_SUFFIX BETWEEN '20180521' AND '20240131'
    GROUP BY 1, 2
    HAVING
      -- Having engaged for more than N = 0.1 minutes.
      SUM(event_params.value.int_value) > 0.1 * 60 * 1000000
  );

Acquired users

/**
 * Builds an audience of Acquired Users.
 *
 * Acquired Users = users who were acquired via some Source/Medium/Campaign.
 */
SELECT
  COUNT(DISTINCT user_id) AS acquired_users_count
FROM
  -- PLEASE REPLACE WITH YOUR TABLE NAME.
  `YOUR_TABLE.events_*`
WHERE
  traffic_source.source = 'google'
  AND traffic_source.medium = 'cpc'
  AND traffic_source.name = 'VTA-Test-Android'
  -- PLEASE REPLACE YOUR DESIRED DATE RANGE.
  AND _TABLE_SUFFIX BETWEEN '20180521' AND '20240131';

Cohorts with filters

/**
 * Builds an audience composed of users acquired last week
 * through Google campaigns, i.e., cohorts with filters.
 *
 * Cohort is defined as users acquired last week, i.e. between 7 - 14
 * days ago. The cohort filter is for users acquired through a direct
 * campaign.
 */
SELECT
  COUNT(DISTINCT user_id) AS users_acquired_through_google_count
FROM
  -- PLEASE REPLACE WITH YOUR TABLE NAME.
  `YOUR_TABLE.events_*`
WHERE
  event_name = 'first_open'
  -- Cohort: opened app 1-2 weeks ago. One week of cohort, aka. weekly.
  AND event_timestamp >
      UNIX_MICROS(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 14 DAY))
  AND event_timestamp <
      UNIX_MICROS(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 7 DAY))
  -- Cohort filter: users acquired through 'google' source.
  AND traffic_source.source = 'google'
  -- PLEASE REPLACE YOUR DESIRED DATE RANGE.
  AND _TABLE_SUFFIX BETWEEN '20180501' AND '20240131';
https://support.google.com/firebase/answer/9037342?hl=en