๐Ÿ” GA4 BigQuery Audit Query Generator (view on GitHub)

This tool helps you audit your GA4 BigQuery data using pre-built SQL queries. The export schema is consistent for all GA4 users, but you can refer to this official documentation to understand how the data is structured.

Click โ€œโšก Update Queriesโ€ after entering your project details to generate ready-to-use queries.






๐Ÿ“Š Explore the Query Library

Below you'll find a collection of ready-to-use GA4 BigQuery audit queries โ€” organised by theme. Each one is designed to help you investigate tracking issues, spot unusual patterns, and surface strategic insights. Click a section to expand and copy any query to your clipboard.

๐Ÿค– Bot or Anomaly Detection
Abnormally Short or Long Sessions

Identifies sessions with unusually short (<2s) or long (>30min) durations that may indicate bots or tracking issues.

WITH session_data AS (
  SELECT
    user_pseudo_id,
    CAST((SELECT value.int_value 
          FROM UNNEST(event_params) 
          WHERE key = 'ga_session_id') AS INT64) AS ga_session_id,
    event_timestamp
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
)

SELECT
  CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS session_id,
  user_pseudo_id,
  MIN(event_timestamp) AS session_start,
  MAX(event_timestamp) AS session_end,
  COUNT(*) AS event_count,
  ROUND((MAX(event_timestamp) - MIN(event_timestamp)) / 1000000, 2) AS session_duration_seconds
FROM session_data
GROUP BY user_pseudo_id, ga_session_id
HAVING 
  event_count > 1 AND 
  (session_duration_seconds < 2 OR session_duration_seconds > 1800)
ORDER BY session_duration_seconds
    
Users with 100+ Sessions in One Day

Flags users with over 100 distinct sessions in a single day, a pattern often associated with non-human activity.

SELECT
  user_pseudo_id,
  event_date,
  COUNT(DISTINCT CONCAT(user_pseudo_id, '-', CAST((SELECT value.int_value 
                                                   FROM UNNEST(event_params) 
                                                   WHERE key = 'ga_session_id') AS STRING))) AS session_count
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
GROUP BY user_pseudo_id, event_date
HAVING session_count > 100
ORDER BY session_count DESC
    
Click Loop: Single Event Type Repeated

Detects sessions with only one repeated event name occurring more than 50 times โ€” a typical sign of automation or malfunction.

WITH session_data AS (
  SELECT
    user_pseudo_id,
    event_name,
    CAST((SELECT value.int_value 
          FROM UNNEST(event_params) 
          WHERE key = 'ga_session_id') AS INT64) AS ga_session_id
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
),

session_summary AS (
  SELECT
    user_pseudo_id,
    ga_session_id,
    CONCAT(user_pseudo_id, '-', CAST(ga_session_id AS STRING)) AS session_id,
    COUNT(*) AS total_events,
    COUNT(DISTINCT event_name) AS unique_event_names,
    ARRAY_AGG(event_name ORDER BY event_name LIMIT 1)[OFFSET(0)] AS repeated_event
  FROM session_data
  GROUP BY user_pseudo_id, ga_session_id
),

event_counts AS (
  SELECT
    user_pseudo_id,
    ga_session_id,
    event_name,
    COUNT(*) AS event_name_count
  FROM session_data
  GROUP BY user_pseudo_id, ga_session_id, event_name
)

SELECT
  s.session_id,
  s.user_pseudo_id,
  s.total_events,
  s.unique_event_names,
  s.repeated_event,
  ec.event_name_count AS repeated_event_count
FROM session_summary s
LEFT JOIN event_counts ec
  ON s.user_pseudo_id = ec.user_pseudo_id
  AND s.ga_session_id = ec.ga_session_id
  AND s.repeated_event = ec.event_name
WHERE s.unique_event_names = 1 AND s.total_events > 50
ORDER BY s.total_events DESC
    
๐Ÿ”— Broken Pathways & 404 Analysis
Top Pages Leading to a 404

Finds the most common pages users visited immediately before landing on a 404 error page. Helps identify broken internal links or navigation issues.

WITH page_views AS (
  SELECT
    user_pseudo_id,
    CAST((SELECT value.int_value 
          FROM UNNEST(event_params) 
          WHERE key = 'ga_session_id') AS INT64) AS ga_session_id,
    event_timestamp,
    (SELECT value.string_value 
     FROM UNNEST(event_params) 
     WHERE key = 'page_location') AS page_location
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name = 'page_view'
),

broken_pages AS (
  SELECT
    user_pseudo_id,
    ga_session_id,
    event_timestamp AS broken_time,
    page_location AS broken_page
  FROM page_views
  WHERE page_location LIKE '%/404%'
),

valid_pages AS (
  SELECT
    user_pseudo_id,
    ga_session_id,
    event_timestamp AS valid_time,
    page_location AS previous_page
  FROM page_views
  WHERE page_location NOT LIKE '%/404%'
)

SELECT
  CONCAT(bp.user_pseudo_id, '-', CAST(bp.ga_session_id AS STRING)) AS session_id,
  bp.broken_page,
  vp.previous_page,
  COUNT(*) AS hits
FROM broken_pages bp
JOIN valid_pages vp
  ON bp.user_pseudo_id = vp.user_pseudo_id
  AND bp.ga_session_id = vp.ga_session_id
  AND vp.valid_time < bp.broken_time
WHERE NOT EXISTS (
  SELECT 1
  FROM valid_pages vp2
  WHERE vp2.user_pseudo_id = vp.user_pseudo_id
    AND vp2.ga_session_id = vp.ga_session_id
    AND vp2.valid_time < bp.broken_time
    AND vp2.valid_time > vp.valid_time
)
GROUP BY session_id, bp.broken_page, vp.previous_page
ORDER BY hits DESC
    
Traffic Volume to 404 Pages (Daily)

Tracks the daily volume of page views to 404 pages, allowing you to monitor spikes in broken traffic over time.

SELECT
  PARSE_DATE('%Y%m%d', event_date) AS date,
  COUNT(*) AS hits_404
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`,
     UNNEST(event_params) AS event_params
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND event_name = 'page_view'
  AND event_params.key = 'page_location'
  AND event_params.value.string_value LIKE '%/404%'
GROUP BY date
ORDER BY date
    
Broken Pages by Source / Campaign

Shows which marketing sources and mediums are driving users to 404 pages, helping isolate issues caused by campaign links.

SELECT
  traffic_source.source,
  traffic_source.medium,
  event_params.value.string_value AS broken_page,
  COUNT(*) AS hits
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`,
     UNNEST(event_params) AS event_params
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND event_name = 'page_view'
  AND event_params.key = 'page_location'
  AND event_params.value.string_value LIKE '%/404%'
GROUP BY traffic_source.source, traffic_source.medium, broken_page
ORDER BY hits DESC
    
๐Ÿ“Š Campaign & Attribution Issues
Malformed or Missing UTM Parameters

Identifies sessions where source or medium values are missing or marked as '(not set)', which can indicate tracking gaps or misconfigured campaign links.

SELECT DISTINCT
  traffic_source.source,
  traffic_source.medium,
  traffic_source.name AS campaign
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND (
    traffic_source.medium IS NULL
    OR traffic_source.medium = '(not set)'
    OR traffic_source.source IS NULL
    OR traffic_source.source = '(not set)'
  )
    
Too Many Unique Campaign/Source/Medium

Counts the number of unique campaign names, sources, and mediums in your data. A very high count could suggest tag pollution or inconsistent naming conventions.

SELECT
  COUNT(DISTINCT traffic_source.name) AS unique_campaigns,
  COUNT(DISTINCT traffic_source.source) AS unique_sources,
  COUNT(DISTINCT traffic_source.medium) AS unique_mediums
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    
Sessions with gclid but Not Google as Source

Finds sessions that include a gclid parameter (indicating Google Ads traffic) but don't list Google as the source, suggesting broken auto-tagging or incorrect attribution.

SELECT
  traffic_source.source,
  traffic_source.medium,
  (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'gclid') AS gclid,
  COUNT(*) AS sessions
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'gclid') IS NOT NULL
  AND LOWER(traffic_source.source) NOT LIKE '%google%'
GROUP BY traffic_source.source, traffic_source.medium, gclid
ORDER BY sessions DESC
    
ChatGPT Sessions by Day

Counts daily sessions where traffic_source.source contains 'chatgpt', helping you track AIโ€‘driven traffic trends over time.

WITH sessions AS (
  SELECT
    CAST(
      (SELECT value.int_value
       FROM UNNEST(event_params)
       WHERE key = 'ga_session_id'
       LIMIT 1
      ) AS INT64
    ) AS ga_session_id,
    DATE(TIMESTAMP_MICROS(MIN(event_timestamp))) AS session_date
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE
    LOWER(traffic_source.source) LIKE '%chatgpt%'
    AND _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY
    ga_session_id
)
SELECT
  session_date,
  COUNT(*) AS sessions_count
FROM
  sessions
GROUP BY
  session_date
ORDER BY
  session_date DESC;
    
๐Ÿ” Consent Mode & GDPR Debugging
Session-Level Consent vs Purchases & Cookieless Events

This query compares the volume of sessions by consent status (ad_storage / analytics_storage) with the number of purchases in each group. It also highlights completely anonymous pings with no user or session identifiersโ€”often resulting from users declining consent or browser limitations.

๐Ÿงผ Custom Event & Parameter Hygiene
Events with Excessive Parameters (High Avg)

Identifies events with a high average number of parameters, which may cause row bloat, higher processing costs, or tracking inefficiencies. Useful for flagging events that may be over-engineered.

SELECT
  event_name,
  ROUND(AVG(param_count), 2) AS avg_params,
  MAX(param_count) AS max_params,
  COUNT(*) AS total_events
FROM (
  SELECT
    event_name,
    ARRAY_LENGTH(event_params) AS param_count
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
)
GROUP BY event_name
HAVING avg_params > 10
ORDER BY avg_params DESC
    
Top 20 Custom Events (Excludes GA Defaults)

Returns the most frequent non-standard GA4 events, excluding built-in ones like page_view or session_start. Helps you review your most active custom tracking points.

SELECT
  event_name,
  COUNT(*) AS event_count
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND event_name NOT IN (
    'page_view', 'session_start', 'user_engagement', 
    'scroll', 'click', 'view_search_results', 
    'purchase', 'add_to_cart', 'begin_checkout'
  )
GROUP BY event_name
ORDER BY event_count DESC
LIMIT 20
    
Missing or Invalid eCommerce Parameters

Audits key eCommerce events like purchase, add_to_cart, etc., for missing or invalid required parameters. It also checks if items arrays are missing entirely from purchase events.

WITH ecommerce_events AS (
  SELECT
    event_name,
    items.item_id,
    items.item_name,
    items.item_brand,
    items.item_variant,
    items.item_category,
    items.price_in_usd,
    items.price,
    items.quantity,
    items.item_revenue_in_usd,
    items.item_revenue,
    items.item_refund_in_usd,
    items.item_refund,
    ecommerce.transaction_id
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`,
       UNNEST(items) AS items
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name IN (
      'add_to_cart', 'remove_from_cart', 'view_item',
      'begin_checkout', 'add_payment_info',
      'purchase', 'refund'
    )
),

enhanced_ecommerce_audit AS (
  SELECT
    event_name,
    param_key,
    COUNT(*) AS total_events,
    COUNTIF(
      (param_key IN ('item_id', 'item_name', 'item_brand', 'item_variant', 'item_category', 'transaction_id')
        AND param_type = 'STRING' AND (string_value IS NULL OR string_value = ''))
      OR (param_key IN ('price_in_usd', 'price', 'item_revenue_in_usd', 'item_revenue', 'item_refund_in_usd', 'item_refund')
        AND param_type = 'FLOAT64' AND float_value IS NULL)
      OR (param_key = 'quantity' AND param_type = 'INTEGER' AND float_value IS NULL
        AND event_name IN ('purchase', 'add_to_cart', 'remove_from_cart'))
      OR (param_key = 'quantity' AND param_type = 'INTEGER' AND event_name = 'refund'
        AND (float_value IS NULL OR float_value >= 0))
      OR (param_key IN ('item_revenue_in_usd', 'item_revenue')
        AND param_type = 'FLOAT64' AND event_name != 'purchase' AND float_value IS NOT NULL)
      OR (param_key IN ('item_refund_in_usd', 'item_refund')
        AND param_type = 'FLOAT64' AND event_name != 'refund' AND float_value IS NOT NULL)
    ) AS missing_or_invalid_count
  FROM ecommerce_events,
       UNNEST([
         STRUCT('item_id' AS param_key, 'STRING' AS param_type, item_id AS string_value, CAST(NULL AS FLOAT64) AS float_value),
         STRUCT('item_name' AS param_key, 'STRING' AS param_type, item_name AS string_value, CAST(NULL AS FLOAT64) AS float_value),
         STRUCT('item_brand' AS param_key, 'STRING' AS param_type, item_brand AS string_value, CAST(NULL AS FLOAT64) AS float_value),
         STRUCT('item_variant' AS param_key, 'STRING' AS param_type, item_variant AS string_value, CAST(NULL AS FLOAT64) AS float_value),
         STRUCT('item_category' AS param_key, 'STRING' AS param_type, item_category AS string_value, CAST(NULL AS FLOAT64) AS float_value),
         STRUCT('price_in_usd' AS param_key, 'FLOAT64' AS param_type, CAST(NULL AS STRING) AS string_value, price_in_usd AS float_value),
         STRUCT('price' AS param_key, 'FLOAT64' AS param_type, CAST(NULL AS STRING) AS string_value, price AS float_value),
         STRUCT('quantity' AS param_key, 'INTEGER' AS param_type, CAST(NULL AS STRING) AS string_value, CAST(quantity AS FLOAT64) AS float_value),
         STRUCT('item_revenue_in_usd' AS param_key, 'FLOAT64' AS param_type, CAST(NULL AS STRING) AS string_value, item_revenue_in_usd AS float_value),
         STRUCT('item_revenue' AS param_key, 'FLOAT64' AS param_type, CAST(NULL AS STRING) AS string_value, item_revenue AS float_value),
         STRUCT('item_refund_in_usd' AS param_key, 'FLOAT64' AS param_type, CAST(NULL AS STRING) AS string_value, item_refund_in_usd AS float_value),
         STRUCT('item_refund' AS param_key, 'FLOAT64' AS param_type, CAST(NULL AS STRING) AS string_value, item_refund AS float_value),
         STRUCT('transaction_id' AS param_key, 'STRING' AS param_type, transaction_id AS string_value, CAST(NULL AS FLOAT64) AS float_value)
       ]) AS param
  GROUP BY event_name, param_key
),

purchase_events AS (
  SELECT
    event_name,
    ARRAY_LENGTH(items) AS item_count,
    ecommerce.purchase_revenue_in_usd,
    ecommerce.purchase_revenue,
    ecommerce.transaction_id
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name = 'purchase'
),

purchase_quality_checks AS (
  SELECT
    'purchase' AS event_name,
    param_key,
    COUNT(*) AS total_events,
    COUNTIF(
      (param_key = 'items_missing_or_empty' AND (item_count IS NULL OR item_count = 0))
      OR (param_key = 'purchase_revenue_in_usd' AND purchase_revenue_in_usd IS NULL)
      OR (param_key = 'purchase_revenue' AND purchase_revenue IS NULL)
      OR (param_key = 'transaction_id' AND (transaction_id IS NULL OR transaction_id = ''))
    ) AS missing_or_invalid_count
  FROM purchase_events,
       UNNEST([
         STRUCT('items_missing_or_empty' AS param_key),
         STRUCT('purchase_revenue_in_usd' AS param_key),
         STRUCT('purchase_revenue' AS param_key),
         STRUCT('transaction_id' AS param_key)
       ]) AS param
  GROUP BY event_name, param_key
)

SELECT
  event_name,
  param_key,
  'parameter_check' AS audit_type,
  total_events,
  missing_or_invalid_count,
  ROUND(SAFE_DIVIDE(missing_or_invalid_count, total_events) * 100, 2) AS pct_missing_or_invalid
FROM enhanced_ecommerce_audit

UNION ALL

SELECT
  event_name,
  param_key,
  'purchase_quality_check' AS audit_type,
  total_events,
  missing_or_invalid_count,
  ROUND(SAFE_DIVIDE(missing_or_invalid_count, total_events) * 100, 2)
FROM purchase_quality_checks

ORDER BY event_name, pct_missing_or_invalid DESC
    
Suspicious or Default Parameter Values

Finds parameters with placeholder, null, or otherwise suspicious values like 'undefined' or 'nan'. Useful for identifying tagging errors or poorly populated fields.

SELECT
  event_name,
  param.key AS parameter_name,
  param.value.string_value AS suspicious_value,
  COUNT(*) AS occurrences
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`,
     UNNEST(event_params) AS param
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND LOWER(param.value.string_value) IN ('null', 'undefined', 'nan', '')
GROUP BY event_name, parameter_name, suspicious_value
ORDER BY occurrences DESC
    
Page Views with Suspected PII

Extracts all page_view events where any parameter key is a common PII name (email, phone, etc.) or the value matches an email pattern.

-- PII in event_params
SELECT
  event_timestamp,
  user_pseudo_id,
  (SELECT value.string_value
   FROM UNNEST(event_params)
   WHERE key = 'page_location'
  ) AS page_location,
  ep.key AS param_key,
  ep.value.string_value AS param_value,
  CASE
    WHEN LOWER(ep.key) IN ('email', 'phone', 'name', 'address') THEN 'PII in param_key'
    WHEN REGEXP_CONTAINS(ep.value.string_value, r'[\w\.-]+@[\w\.-]+\.\w+') THEN 'PII in param_value (email)'
  END AS pii_source
FROM
  `{{PROJECT_ID}}.{{DATASET}}.events_*`,
  UNNEST(event_params) AS ep
WHERE
  event_name = 'page_view'
  AND _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND (
    LOWER(ep.key) IN ('email', 'phone', 'name', 'address')
    OR REGEXP_CONTAINS(ep.value.string_value, r'[\w\.-]+@[\w\.-]+\.\w+')
  )

UNION ALL

-- PII in page_location
SELECT
  event_timestamp,
  user_pseudo_id,
  (SELECT value.string_value
   FROM UNNEST(event_params)
   WHERE key = 'page_location'
  ) AS page_location,
  NULL AS param_key,
  NULL AS param_value,
  'PII in page_location' AS pii_source
FROM
  `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE
  event_name = 'page_view'
  AND _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
LIMIT 1000;
    
๐Ÿ–ฅ Device & Platform QA
Conversion Rate & Traffic Share by Device

Shows session counts, purchases, conversion rate, and traffic share for each device/platform/browser/OS combination. Use it to compare how well different devices convert and which contribute most to your traffic.

WITH sessions AS (
  SELECT
    user_pseudo_id,
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value FROM UNNEST(event_params)
      WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    platform,
    device.web_info.browser AS browser,
    device.operating_system AS os,
    device.category AS device_type,
    COUNTIF(event_name = 'purchase') AS purchases
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY user_pseudo_id, session_id, platform, browser, os, device_type
)

SELECT
  platform,
  browser,
  os,
  device_type,
  COUNT(*) AS sessions,
  SUM(purchases) AS conversions,
  ROUND(SAFE_DIVIDE(SUM(purchases), COUNT(*)) * 100, 2) AS conversion_rate,
  ROUND(SAFE_DIVIDE(COUNT(*), SUM(COUNT(*)) OVER ()) * 100, 2) AS traffic_share_pct
FROM sessions
GROUP BY platform, browser, os, device_type
ORDER BY traffic_share_pct DESC
    
Unusual Browser & OS Combinations

Flags unusual or suspicious combinations of browsers and operating systems that don't typically appear together. Helpful for bot detection or debugging client-side implementation errors.

WITH event_device_data AS (
  SELECT
    device.web_info.browser AS browser,
    device.operating_system AS operating_system,
    COUNT(*) AS event_count
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY browser, operating_system
),

known_valid_pairs AS (
  SELECT 'Chrome' AS browser, 'Windows' AS operating_system UNION ALL
  SELECT 'Chrome', 'macOS' UNION ALL
  SELECT 'Chrome', 'Android' UNION ALL
  SELECT 'Chrome', 'iOS' UNION ALL
  SELECT 'Chrome', 'Chrome OS' UNION ALL
  SELECT 'Safari', 'iOS' UNION ALL
  SELECT 'Safari', 'macOS' UNION ALL
  SELECT 'Safari', 'Macintosh' UNION ALL
  SELECT 'Safari (in-app)', 'iOS' UNION ALL
  SELECT 'Edge', 'Windows' UNION ALL
  SELECT 'Firefox', 'Windows' UNION ALL
  SELECT 'Firefox', 'macOS' UNION ALL
  SELECT 'Samsung Internet', 'Android' UNION ALL
  SELECT 'Internet Explorer', 'Windows' UNION ALL
  SELECT 'Android Webview', 'Android' UNION ALL
  SELECT 'Android Browser', 'Android'
)

SELECT
  e.browser,
  e.operating_system,
  e.event_count,
  IF(k.browser IS NULL, 'โš ๏ธ Unusual Combination', 'Valid') AS validity
FROM event_device_data e
LEFT JOIN known_valid_pairs k
  ON e.browser = k.browser AND e.operating_system = k.operating_system
ORDER BY validity DESC, e.event_count DESC
    
๐Ÿ›’ E-commerce & Transaction Validation
Duplicate transaction_ids across sessions or days

Identifies transaction IDs that appear in more than one session or on multiple dates โ€” a potential sign of deduplication issues or tracking bugs.

SELECT
  ecommerce.transaction_id AS transaction_id,
  COUNT(*) AS transaction_count,
  COUNT(DISTINCT CONCAT(user_pseudo_id, '-', CAST((
    SELECT value.int_value FROM UNNEST(event_params)
    WHERE key = 'ga_session_id'
  ) AS STRING))) AS unique_sessions,
  COUNT(DISTINCT event_date) AS unique_days
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND event_name = 'purchase'
GROUP BY ecommerce.transaction_id
HAVING transaction_count > 1
ORDER BY transaction_count DESC
    
Compare GA4 Revenue to Backend Source

Compares GA4 revenue values (based on the 'value' parameter in purchase events) against your backend data to identify discrepancies.

SELECT
  ga.transaction_id,
  ga.ga4_revenue,
  backend.revenue AS backend_revenue,
  ga.ga4_revenue - backend.revenue AS variance,
  ROUND(SAFE_DIVIDE(ga.ga4_revenue - backend.revenue, backend.revenue) * 100, 2) AS variance_pct
FROM (
  SELECT
    ecommerce.transaction_id AS transaction_id,
    SUM(ecommerce.purchase_revenue) AS ga4_revenue
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name = 'purchase'
  GROUP BY ecommerce.transaction_id
) ga
JOIN `your_project.your_dataset.backend_transactions` AS backend
  ON ga.transaction_id = backend.transaction_id
ORDER BY ABS(variance_pct) DESC
    
Purchases Without Funnel Events (add_to_cart / begin_checkout)

Flags sessions where a purchase occurred but the user did not trigger an add_to_cart or begin_checkout event โ€” often caused by broken funnel instrumentation or third-party integrations.

WITH all_events_with_sessions AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value FROM UNNEST(event_params)
      WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    user_pseudo_id,
    event_name
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
),

session_event_counts AS (
  SELECT
    session_id,
    COUNTIF(event_name = 'add_to_cart') AS add_to_cart_count,
    COUNTIF(event_name = 'begin_checkout') AS begin_checkout_count,
    COUNTIF(event_name = 'purchase') AS purchase_count
  FROM all_events_with_sessions
  GROUP BY session_id
),

purchasing_sessions AS (
  SELECT *
  FROM session_event_counts
  WHERE purchase_count > 0
)

SELECT
  COUNT(*) AS purchasing_sessions,
  COUNTIF(add_to_cart_count = 0) AS sessions_with_no_add_to_cart,
  COUNTIF(begin_checkout_count = 0) AS sessions_with_no_begin_checkout
FROM purchasing_sessions
    
Purchases Without Items

Checks for purchase events where the items array is either missing or empty. These transactions will show up in GA4 totals but wonโ€™t include product-level data.

SELECT
  COUNT(DISTINCT (
    SELECT value.string_value FROM UNNEST(event_params)
    WHERE key = 'transaction_id'
  )) AS transactions_without_items
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND event_name = 'purchase'
  AND (items IS NULL OR ARRAY_LENGTH(items) = 0)
    
๐Ÿ“ˆ Event & Session Quality
Session Event Volume Distribution

Categorises sessions based on the number of events they contain. Useful for identifying sessions that are unusually light or heavy in event activity, which can indicate bots, passive sessions, or broken tracking.

WITH sessions AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value
      FROM UNNEST(event_params)
      WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    COUNT(*) AS total_events
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY session_id, user_pseudo_id
),

bucketed_sessions AS (
  SELECT
    CASE
      WHEN total_events < 50 THEN '< 50'
      WHEN total_events BETWEEN 50 AND 99 THEN '50โ€“99'
      WHEN total_events BETWEEN 100 AND 149 THEN '100โ€“149'
      ELSE '150+'
    END AS event_bracket
  FROM sessions
)

SELECT
  event_bracket,
  COUNT(*) AS session_count
FROM bucketed_sessions
GROUP BY event_bracket
ORDER BY 
  CASE event_bracket
    WHEN '< 50' THEN 1
    WHEN '50โ€“99' THEN 2
    WHEN '100โ€“149' THEN 3
    WHEN '150+' THEN 4
  END
    
Sessions Starting With Each Event

Lists the first event triggered in each session and counts how often each event was the first interaction. Helpful for identifying unexpected session entry points or broken landing page logic.

WITH events_with_session_id AS (
  SELECT
    user_pseudo_id,
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value 
      FROM UNNEST(event_params)
      WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    event_name,
    event_timestamp
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
),

first_event_per_session AS (
  SELECT
    session_id,
    event_name AS first_event
  FROM (
    SELECT
      session_id,
      event_name,
      ROW_NUMBER() OVER (PARTITION BY session_id ORDER BY event_timestamp) AS rn
    FROM events_with_session_id
  )
  WHERE rn = 1
)

SELECT
  first_event AS event_name,
  COUNT(DISTINCT session_id) AS sessions_starting_with_event
FROM first_event_per_session
GROUP BY first_event
ORDER BY sessions_starting_with_event DESC
    
Sessions With No Engagement Events

Calculates the percentage of sessions that lack key engagement events like user_engagement, scroll, or click, and only have one or two total events. These may be bounces or sessions from passive users or bots.

WITH sessions AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value FROM UNNEST(event_params)
      WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    ARRAY_AGG(DISTINCT event_name) AS event_list,
    COUNT(DISTINCT event_name) AS unique_events
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY session_id
),

summary AS (
  SELECT
    COUNT(*) AS total_sessions,
    COUNTIF(
      NOT 'user_engagement' IN UNNEST(event_list)
      AND NOT 'scroll' IN UNNEST(event_list)
      AND NOT 'click' IN UNNEST(event_list)
      AND unique_events <= 2
    ) AS non_engaged_sessions
  FROM sessions
)

SELECT
  total_sessions,
  non_engaged_sessions,
  ROUND(SAFE_DIVIDE(non_engaged_sessions, total_sessions) * 100, 2) AS non_engaged_pct
FROM summary
    
๐Ÿ” MVT Test Conversion Analysis
MVT Test Performance by Variant

Analyses sessions with MVT Test events, showing the count of total sessions and sessions with specific conversion events (purchase, generate_lead, sign_up, begin_checkout, add_to_cart) by test code and variant. Helps evaluate the impact of multivariate test variants on user conversions.

WITH MvtEvents AS (
  SELECT
    event_name,
    (SELECT value.string_value 
     FROM UNNEST(event_params) 
     WHERE key = 'eventAction') AS eventAction,
    (SELECT value.string_value 
     FROM UNNEST(event_params) 
     WHERE key = 'eventLabel') AS eventLabel,
    CONCAT(
      user_pseudo_id, 
      '-', 
      CAST(
        (SELECT ep.value.int_value 
         FROM UNNEST(event_params) ep 
         WHERE ep.key = 'ga_session_id') AS STRING)
    ) AS session_id
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE event_name = 'MVT Test'
    AND _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
),
ConversionSessions AS (
  SELECT
    CONCAT(
      user_pseudo_id, 
      '-', 
      CAST(
        (SELECT ep.value.int_value 
         FROM UNNEST(event_params) ep 
         WHERE ep.key = 'ga_session_id') AS STRING)
    ) AS session_id,
    MAX(CASE WHEN event_name = 'purchase' THEN 1 ELSE 0 END) AS has_purchase,
    MAX(CASE WHEN event_name = 'generate_lead' THEN 1 ELSE 0 END) AS has_generate_lead,
    MAX(CASE WHEN event_name = 'sign_up' THEN 1 ELSE 0 END) AS has_sign_up,
    MAX(CASE WHEN event_name = 'begin_checkout' THEN 1 ELSE 0 END) AS has_begin_checkout,
    MAX(CASE WHEN event_name = 'add_to_cart' THEN 1 ELSE 0 END) AS has_add_to_cart
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE event fรถrรคld

    event_name IN ('purchase', 'generate_lead', 'sign_up', 'begin_checkout', 'add_to_cart')
    AND _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY session_id
)
SELECT
  m.event_name,
  m.eventAction,
  m.eventLabel,
  COUNT(DISTINCT m.session_id) AS session_count,
  COUNT(DISTINCT CASE WHEN c.has_purchase = 1 THEN m.session_id END) AS sessions_with_purchase,
  COUNT(DISTINCT CASE WHEN c.has_generate_lead = 1 THEN m.session_id END) AS sessions_with_generate_lead,
  COUNT(DISTINCT CASE WHEN c.has_sign_up = 1 THEN m.session_id END) AS sessions_with_sign_up,
  COUNT(DISTINCT CASE WHEN c.has_begin_checkout = 1 THEN m.session_id END) AS sessions_with_begin_checkout,
  COUNT(DISTINCT CASE WHEN c.has_add_to_cart = 1 THEN m.session_id END) AS sessions_with_add_to_cart,
  COUNT(DISTINCT CASE 
    WHEN c.has_purchase = 1 OR c.has_generate_lead = 1 OR c.has_sign_up = 1 
    OR c.has_begin_checkout = 1 OR c.has_add_to_cart = 1 THEN m.session_id 
  END) AS sessions_with_any_conversion
FROM MvtEvents m
LEFT JOIN ConversionSessions c
  ON m.session_id = c.session_id
GROUP BY m.event_name, m.eventAction, m.eventLabel
ORDER BY session_count DESC
    
MVT Test Significance vs Control

Compares each MVT Test variantโ€™s conversion rates to the control variant, calculating statistical significance using a two-proportion Z-test. Identifies which variants perform significantly better or worse than the control for each conversion event (purchase, generate_lead, sign_up, begin_checkout, add_to_cart, and any conversion).

WITH MvtEvents AS (
  SELECT
    event_name,
    (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'eventAction') AS eventAction,
    (SELECT value.string_value FROM UNNEST(event_params) WHERE key = 'eventLabel') AS eventLabel,
    CONCAT(
      user_pseudo_id, 
      '-', 
      CAST(
        (SELECT ep.value.int_value FROM UNNEST(event_params) ep WHERE ep.key = 'ga_session_id') AS STRING)
    ) AS session_id
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE event_name = 'MVT Test'
    AND _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
),
ConversionSessions AS (
  SELECT
    CONCAT(
      user_pseudo_id, 
      '-', 
      CAST(
        (SELECT ep.value.int_value FROM UNNEST(event_params) ep WHERE ep.key = 'ga_session_id') AS STRING)
    ) AS session_id,
    MAX(CASE WHEN event_name = 'purchase' THEN 1 ELSE 0 END) AS has_purchase,
    MAX(CASE WHEN event_name = 'generate_lead' THEN 1 ELSE 0 END) AS has_generate_lead,
    MAX(CASE WHEN event_name = 'sign_up' THEN 1 ELSE 0 END) AS has_sign_up,
    MAX(CASE WHEN event_name = 'begin_checkout' THEN 1 ELSE 0 END) AS has_begin_checkout,
    MAX(CASE WHEN event_name = 'add_to_cart' THEN 1 ELSE 0 END) AS has_add_to_cart
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE event_name IN ('purchase', 'generate_lead', 'sign_up', 'begin_checkout', 'add_to_cart')
    AND _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY session_id
),
BaseMetrics AS (
  SELECT
    m.event_name,
    m.eventAction,
    m.eventLabel,
    COUNT(DISTINCT m.session_id) AS session_count,
    COUNT(DISTINCT CASE WHEN c.has_purchase = 1 THEN m.session_id END) AS sessions_with_purchase,
    COUNT(DISTINCT CASE WHEN c.has_generate_lead = 1 THEN m.session_id END) AS sessions_with_generate_lead,
    COUNT(DISTINCT CASE WHEN c.has_sign_up = 1 THEN m.session_id END) AS sessions_with_sign_up,
    COUNT(DISTINCT CASE WHEN c.has_begin_checkout = 1 THEN m.session_id END) AS sessions_with_begin_checkout,
    COUNT(DISTINCT CASE WHEN c.has_add_to_cart = 1 THEN m.session_id END) AS sessions_with_add_to_cart,
    COUNT(DISTINCT CASE 
      WHEN c.has_purchase = 1 OR c.has_generate_lead = 1 OR c.has_sign_up = 1 
      OR c.has_begin_checkout = 1 OR c.has_add_to_cart = 1 THEN m.session_id 
    END) AS sessions_with_any_conversion
  FROM MvtEvents m
  LEFT JOIN ConversionSessions c
    ON m.session_id = c.session_id
  GROUP BY m.event_name, m.eventAction, m.eventLabel
),
ControlMetrics AS (
  SELECT
    session_count AS control_session_count,
    sessions_with_purchase AS control_sessions_with_purchase,
    sessions_with_generate_lead AS control_sessions_with_generate_lead,
    sessions_with_sign_up AS control_sessions_with_sign_up,
    sessions_with_begin_checkout AS control_sessions_with_begin_checkout,
    sessions_with_add_to_cart AS control_sessions_with_add_to_cart,
    sessions_with_any_conversion AS control_sessions_with_any_conversion
  FROM BaseMetrics
  WHERE eventLabel LIKE '%control%'
  LIMIT 1
),
SignificanceTests AS (
  SELECT
    b.event_name,
    b.eventAction,
    b.eventLabel,
    b.session_count,
    b.sessions_with_purchase,
    b.sessions_with_generate_lead,
    b.sessions_with_sign_up,
    b.sessions_with_begin_checkout,
    b.sessions_with_add_to_cart,
    b.sessions_with_any_conversion,
    b.sessions_with_purchase / b.session_count AS purchase_conversion_rate,
    c.control_sessions_with_purchase / c.control_session_count AS control_purchase_conversion_rate,
    CASE
      WHEN b.session_count > 0 AND c.control_session_count > 0 THEN
        1 - NORM.DIST(
          ABS(
            (b.sessions_with_purchase / b.session_count - c.control_sessions_with_purchase / c.control_session_count) /
            SQRT(
              (b.sessions_with_purchase / b.session_count * (1 - b.sessions_with_purchase / b.session_count) / b.session_count) +
              (c.control_sessions_with_purchase / c.control_session_count * (1 - c.control_sessions_with_purchase / c.control_session_count) / c.control_session_count)
            )
          ),
          0,
          1,
          TRUE
        ) * 2
      ELSE NULL
    END AS purchase_p_value,
    b.sessions_with_generate_lead / b.session_count AS generate_lead_conversion_rate,
    c.control_sessions_with_generate_lead / c.control_session_count AS control_generate_lead_conversion_rate,
    CASE
      WHEN b.session_count > 0 AND c.control_session_count > 0 THEN
        1 - NORM.DIST(
          ABS(
            (b.sessions_with_generate_lead / b.session_count - c.control_sessions_with_generate_lead / c.control_session_count) /
            SQRT(
              (b.sessions_with_generate_lead / b.session_count * (1 - b.sessions_with_generate_lead / b.session_count) / b.session_count) +
              (c.control_sessions_with_generate_lead / c.control_session_count * (1 - c.control_sessions_with_generate_lead / c.control_session_count) / c.control_session_count)
            )
          ),
          0,
          1,
          TRUE
        ) * 2
      ELSE NULL
    END AS generate_lead_p_value,
    b.sessions_with_sign_up / b.session_count AS sign_up_conversion_rate,
    c.control_sessions_with_sign_up / c.control_session_count AS control_sign_up_conversion_rate,
    CASE
      WHEN b.session_count > 0 AND c.control_session_count > 0 THEN
        1 - NORM.DIST(
          ABS(
            (b.sessions_with_sign_up / b.session_count - c.control_sessions_with_sign_up / c.control_session_count) /
            SQRT(
              (b.sessions_with_sign_up / b.session_count * (1 - b.sessions_with_sign_up / b.session_count) / b.session_count) +
              (c.control_sessions_with_sign_up / c.control_session_count * (1 - c.control_sessions_with_sign_up / c.control_session_count) / c.control_session_count)
            )
          ),
          0,
          1,
          TRUE
        ) * 2
      ELSE NULL
    END AS sign_up_p_value,
    b.sessions_with_begin_checkout / b.session_count AS begin_checkout_conversion_rate,
    c.control_sessions_with_begin_checkout / c.control_session_count AS control_begin_checkout_conversion_rate,
    CASE
      WHEN b.session_count > 0 AND c.control_session_count > 0 THEN
        1 - NORM.DIST(
          ABS(
            (b.sessions_with_begin_checkout / b.session_count - c.control_sessions_with_begin_checkout / c.control_session_count) /
            SQRT(
              (b.sessions_with_begin_checkout / b.session_count * (1 - b.sessions_with_begin_checkout / b.session_count) / b.session_count) +
              (c.control_sessions_with_begin_checkout / c.control_session_count * (1 - c.control_sessions_with_begin_checkout / c.control_session_count) / c.control_session_count)
            )
          ),
          0,
          1,
          TRUE
        ) * 2
      ELSE NULL
    END AS begin_checkout_p_value,
    b.sessions_with_add_to_cart / b.session_count AS add_to_cart_conversion_rate,
    c.control_sessions_with_add_to_cart / c.control_session_count AS control_add_to_cart_conversion_rate,
    CASE
      WHEN b.session_count > 0 AND c.control_session_count > 0 THEN
        1 - NORM.DIST(
          ABS(
            (b.sessions_with_add_to_cart / b.session_count - c.control_sessions_with_add_to_cart / c.control_session_count) /
            SQRT(
              (b.sessions_with_add_to_cart / b.session_count * (1 - b.sessions_with_add_to_cart / b.session_count) / b.session_count) +
              (c.control_sessions_with_add_to_cart / c.control_session_count * (1 - c.control_sessions_with_add_to_cart / c.control_session_count) / c.control_session_count)
            )
          ),
          0,
          1,
          TRUE
        ) * 2
      ELSE NULL
    END AS add_to_cart_p_value,
    b.sessions_with_any_conversion / b.session_count AS any_conversion_rate,
    c.control_sessions_with_any_conversion / c.control_session_count AS control_any_conversion_rate,
    CASE
      WHEN b.session_count > 0 AND c.control_session_count > 0 THEN
        1 - NORM.DIST(
          ABS(
            (b.sessions_with_any_conversion / b.session_count - c.control_sessions_with_any_conversion / c.control_session_count) /
            SQRT(
              (b.sessions_with_any_conversion / b.session_count * (1 - b.sessions_with_any_conversion / b.session_count) / b.session_count) +
              (c.control_sessions_with_any_conversion / c.control_session_count * (1 - c.control_sessions_with_any_conversion / c.control_session_count) / c.control_session_count)
            )
          ),
          0,
          1,
          TRUE
        ) * 2
      ELSE NULL
    END AS any_conversion_p_value
  FROM BaseMetrics b
  CROSS JOIN ControlMetrics c
  WHERE b.eventLabel NOT LIKE '%control%'
)
SELECT
  event_name,
  eventAction,
  eventLabel,
  session_count,
  sessions_with_purchase,
  purchase_conversion_rate,
  control_purchase_conversion_rate,
  purchase_p_value,
  CASE 
    WHEN purchase_p_value < 0.05 AND purchase_conversion_rate > control_purchase_conversion_rate THEN 'Significantly Better'
    WHEN purchase_p_value < 0.05 AND purchase_conversion_rate < control_purchase_conversion_rate THEN 'Significantly Worse'
    ELSE 'Not Significant'
  END AS purchase_significance,
  sessions_with_generate_lead,
  generate_lead_conversion_rate,
  control_generate_lead_conversion_rate,
  generate_lead_p_value,
  CASE 
    WHEN generate_lead_p_value < 0.05 AND generate_lead_conversion_rate > control_generate_lead_conversion_rate THEN 'Significantly Better'
    WHEN generate_lead_p_value < 0.05 AND generate_lead_conversion_rate < control_generate_lead_conversion_rate THEN 'Significantly Worse'
    ELSE 'Not Significant'
  END AS generate_lead_significance,
  sessions_with_sign_up,
  sign_up_conversion_rate,
  control_sign_up_conversion_rate,
  sign_up_p_value,
  CASE 
    WHEN sign_up_p_value < 0.05 AND sign_up_conversion_rate > control_sign_up_conversion_rate THEN 'Significantly Better'
    WHEN sign_up_p_value < 0.05 AND sign_up_conversion_rate < control_sign_up_conversion_rate THEN 'Significantly Worse'
    ELSE 'Not Significant'
  END AS sign_up_significance,
  sessions_with_begin_checkout,
  begin_checkout_conversion_rate,
  control_begin_checkout_conversion_rate,
  begin_checkout_p_value,
  CASE 
    WHEN begin_checkout_p_value < 0.05 AND begin_checkout_conversion_rate > control_begin_checkout_conversion_rate THEN 'Significantly Better'
    WHEN begin_checkout_p_value < 0.05 AND begin_checkout_conversion_rate < control_begin_checkout_conversion_rate THEN 'Significantly Worse'
    ELSE 'Not Significant'
  END AS begin_checkout_significance,
  sessions_with_add_to_cart,
  add_to_cart_conversion_rate,
  control_add_to_cart_conversion_rate,
  add_to_cart_p_value,
  CASE 
    WHEN add_to_cart_p_value < 0.05 AND add_to_cart_conversion_rate > control_add_to_cart_conversion_rate THEN 'Significantly Better'
    WHEN add_to_cart_p_value < 0.05 AND add_to_cart_conversion_rate < control_add_to_cart_conversion_rate THEN 'Significantly Worse'
    ELSE 'Not Significant'
  END AS add_to_cart_significance,
  sessions_with_any_conversion,
  any_conversion_rate,
  control_any_conversion_rate,
  any_conversion_p_value,
  CASE 
    WHEN any_conversion_p_value < 0.05 AND any_conversion_rate > control_any_conversion_rate THEN 'Significantly Better'
    WHEN any_conversion_p_value < 0.05 AND any_conversion_rate < control_any_conversion_rate THEN 'Significantly Worse'
    ELSE 'Not Significant'
  END AS any_conversion_significance
FROM SignificanceTests
ORDER BY session_count DESC
    
โฐ Seasonality Analysis
Monthly Seasonality Multipliers

Aggregates all yearsโ€™ data by calendar month to compute average sessions and transactions per month, then derives seasonality multipliers (and conversionโ€‘rate index) rounded to two decimals.

WITH
  sessions_per_month_year AS (
    SELECT
      FORMAT_TIMESTAMP('%Y-%m', TIMESTAMP_MICROS(event_timestamp)) AS month_year,
      EXTRACT(MONTH FROM TIMESTAMP_MICROS(event_timestamp))       AS month_num,
      FORMAT_TIMESTAMP('%B', TIMESTAMP_MICROS(event_timestamp))   AS month_name,
      COUNT(DISTINCT CONCAT(
        user_pseudo_id, '-',
        CAST((SELECT value.int_value FROM UNNEST(event_params) WHERE key = 'ga_session_id') AS STRING)
      ))                                                          AS sessions_count
    FROM
      `{{PROJECT_ID}}.{{DATASET}}.events_*`
    WHERE
      event_name = 'session_start'
      AND TIMESTAMP_MICROS(event_timestamp) BETWEEN 
        TIMESTAMP_SUB(CURRENT_DATE, INTERVAL 365 DAY) AND CURRENT_DATE
    GROUP BY month_year, month_num, month_name
  ),
  transactions_per_month_year AS (
    SELECT
      FORMAT_TIMESTAMP('%Y-%m', TIMESTAMP_MICROS(event_timestamp)) AS month_year,
      EXTRACT(MONTH FROM TIMESTAMP_MICROS(event_timestamp))       AS month_num,
      FORMAT_TIMESTAMP('%B', TIMESTAMP_MICROS(event_timestamp))   AS month_name,
      COUNT(*)                                                    AS transactions_count
    FROM 
      `{{PROJECT_ID}}.{{DATASET}}.events_*`
    WHERE
      event_name = 'purchase'
      AND TIMESTAMP_MICROS(event_timestamp) BETWEEN 
        TIMESTAMP_SUB(CURRENT_DATE, INTERVAL 365 DAY) AND CURRENT_DATE
    GROUP BY month_year, month_num, month_name
  ),
  combined_month_year AS (
    SELECT
      s.month_year,
      s.month_num,
      s.month_name,
      s.sessions_count,
      COALESCE(t.transactions_count, 0) AS transactions_count
    FROM sessions_per_month_year s
    LEFT JOIN transactions_per_month_year t
      USING(month_year, month_num, month_name)
  ),
  stats AS (
    SELECT
      COUNT(*)                                      AS months_count,
      SAFE_DIVIDE(SUM(sessions_count), COUNT(*))    AS overall_avg_sessions,
      SAFE_DIVIDE(SUM(transactions_count), COUNT(*)) AS overall_avg_transactions,
      SAFE_DIVIDE(
        SUM(transactions_count),
        SUM(sessions_count)
      )                                             AS overall_avg_conv_rate
    FROM combined_month_year
  ),
  agg_by_month AS (
    SELECT
      month_num,
      month_name,
      AVG(sessions_count)     AS avg_sessions_by_month,
      AVG(transactions_count) AS avg_transactions_by_month,
      SAFE_DIVIDE(
        AVG(transactions_count),
        AVG(sessions_count)
      )                         AS avg_conv_rate_by_month
    FROM combined_month_year
    GROUP BY month_num, month_name
  )
SELECT
  month_name                                        AS month,
  ROUND(
    SAFE_DIVIDE(avg_sessions_by_month, stats.overall_avg_sessions),
    2
  )                                                  AS session_seasonality,
  ROUND(
    SAFE_DIVIDE(avg_transactions_by_month, stats.overall_avg_transactions),
    2
  )                                                  AS transaction_seasonality,
  ROUND(
    SAFE_DIVIDE(avg_conv_rate_by_month, stats.overall_avg_conv_rate),
    2
  )                                                  AS conversion_rate_seasonality
FROM agg_by_month
CROSS JOIN stats
ORDER BY month_num;
    
๐Ÿ”ฎ Strategic Insights
Conversion Propensity by Event

Calculates how often sessions that include each event lead to a purchase. This is useful for identifying which events are most correlated with conversions and can help prioritise key interactions in the funnel.

-- Conversion Propensity by Event
WITH session_purchases AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value 
      FROM UNNEST(event_params) WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    MAX(IF(event_name = 'purchase', 1, 0)) AS converted
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY session_id
),

event_in_sessions AS (
  SELECT
    event_name,
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value 
      FROM UNNEST(event_params) WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
),

event_conversion_propensity AS (
  SELECT
    eis.event_name,
    COUNTIF(sp.converted = 1) AS conversions,
    COUNT(*) AS total_events,
    SAFE_DIVIDE(COUNTIF(sp.converted = 1), COUNT(*)) AS conversion_propensity
  FROM event_in_sessions eis
  JOIN session_purchases sp ON eis.session_id = sp.session_id
  GROUP BY eis.event_name
)

SELECT *
FROM event_conversion_propensity
ORDER BY conversion_propensity DESC
    
Total & Average Revenue When Event Present

Assigns fractional revenue to each event based on the number of events in a session. Helps reveal which events tend to appear in higher-value sessions and gives a proxy for their revenue contribution.

WITH sessions_with_revenue AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value 
      FROM UNNEST(event_params) WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    SUM(ecommerce.purchase_revenue) AS session_revenue
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name = 'purchase'
  GROUP BY session_id
),

session_event_counts AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value 
      FROM UNNEST(event_params) WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    COUNT(*) AS event_count
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  GROUP BY session_id
),

events_with_context AS (
  SELECT
    event_name,
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value 
      FROM UNNEST(event_params) WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
),

combined AS (
  SELECT
    e.event_name,
    s.session_revenue,
    c.event_count,
    SAFE_DIVIDE(s.session_revenue, c.event_count) AS fractional_revenue
  FROM events_with_context e
  LEFT JOIN sessions_with_revenue s ON e.session_id = s.session_id
  LEFT JOIN session_event_counts c ON e.session_id = c.session_id
  WHERE s.session_revenue IS NOT NULL AND c.event_count > 0
)

SELECT
  event_name,
  COUNT(*) AS event_count,
  ROUND(SUM(fractional_revenue), 2) AS total_revenue_when_present,
  ROUND(SAFE_DIVIDE(SUM(fractional_revenue), COUNT(*)), 2) AS avg_revenue_per_event_instance
FROM combined
GROUP BY event_name
ORDER BY avg_revenue_per_event_instance DESC
    
Event Presence in Purchasing vs Non-Purchasing Sessions

Compares how frequently each event appears in purchasing vs non-purchasing sessions. Useful for spotting events that are common in successful user journeys versus those that arenโ€™t contributing to conversions.

-- Event Appearance in Purchase vs Non-Purchase Sessions
WITH session_event_map AS (
  SELECT
    user_pseudo_id,
    CONCAT(user_pseudo_id, '-', CAST((
      SELECT value.int_value 
      FROM UNNEST(event_params)
      WHERE key = 'ga_session_id'
    ) AS STRING)) AS session_id,
    event_name,
    MAX(IF(event_name = 'purchase', 1, 0)) OVER (
      PARTITION BY user_pseudo_id, CONCAT(user_pseudo_id, '-', CAST((
        SELECT value.int_value 
        FROM UNNEST(event_params)
        WHERE key = 'ga_session_id'
      ) AS STRING))
    ) AS purchase_session
  FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
)

SELECT
  event_name,
  COUNTIF(purchase_session = 1) AS events_in_purchasing_sessions,
  COUNTIF(purchase_session = 0) AS events_in_non_purchasing_sessions,
  ROUND(SAFE_DIVIDE(COUNTIF(purchase_session = 1), COUNT(*)), 4) AS purchase_session_ratio
FROM session_event_map
GROUP BY event_name
ORDER BY purchase_session_ratio DESC
    
Landing Page Performance by Conversion Events (Session-Based)

Shows landing pages by session count and calculates how many sessions resulted in key conversion events (purchase, lead, sign-up, etc.). Each event is counted once per session. The total conversion count is the sum of all sessions that triggered each event type. Also includes the purchase conversion rate (purchase sessions / total sessions).

-- Landing Page Performance by Conversion Events (Session-Based)
WITH base_events AS (
  SELECT
    (SELECT ep.value.string_value FROM UNNEST(event_params) ep WHERE ep.key = 'page_location') AS page_location,
    CONCAT(
      REGEXP_REPLACE(
        REGEXP_EXTRACT(
          (SELECT ep.value.string_value FROM UNNEST(event_params) ep WHERE ep.key = 'page_location'),
          r'https?://[^/]+(/[^?]*)'
        ),
        r'/$',
        ''
      ),
      '/'
    ) AS page_path,
    CONCAT(user_pseudo_id, '-', CAST(
      (SELECT ep.value.int_value FROM UNNEST(event_params) ep WHERE ep.key = 'ga_session_id') AS STRING)) AS full_session_id
  FROM
    `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name = 'session_start'
),

conversion_events AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST(
      (SELECT ep.value.int_value FROM UNNEST(event_params) ep WHERE ep.key = 'ga_session_id') AS STRING)) AS full_session_id,
    event_name
  FROM
    `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name IN ('purchase', 'generate_lead', 'sign_up', 'begin_checkout', 'add_to_cart')
)

SELECT
  base.page_path,
  base.page_location,
  COUNT(DISTINCT base.full_session_id) AS sessions,

  COUNT(DISTINCT CASE WHEN conv.event_name = 'purchase' THEN base.full_session_id END) AS purchases,
  COUNT(DISTINCT CASE WHEN conv.event_name = 'generate_lead' THEN base.full_session_id END) AS leads,
  COUNT(DISTINCT CASE WHEN conv.event_name = 'sign_up' THEN base.full_session_id END) AS sign_ups,
  COUNT(DISTINCT CASE WHEN conv.event_name = 'begin_checkout' THEN base.full_session_id END) AS begin_checkouts,
  COUNT(DISTINCT CASE WHEN conv.event_name = 'add_to_cart' THEN base.full_session_id END) AS add_to_carts,

  (
    COUNT(DISTINCT CASE WHEN conv.event_name = 'purchase' THEN base.full_session_id END) +
    COUNT(DISTINCT CASE WHEN conv.event_name = 'generate_lead' THEN base.full_session_id END) +
    COUNT(DISTINCT CASE WHEN conv.event_name = 'sign_up' THEN base.full_session_id END) +
    COUNT(DISTINCT CASE WHEN conv.event_name = 'begin_checkout' THEN base.full_session_id END) +
    COUNT(DISTINCT CASE WHEN conv.event_name = 'add_to_cart' THEN base.full_session_id END)
  ) AS total_conversions,

  SAFE_DIVIDE(
    COUNT(DISTINCT CASE WHEN conv.event_name = 'purchase' THEN base.full_session_id END),
    COUNT(DISTINCT base.full_session_id)
  ) AS purchase_conversion_rate

FROM
  base_events AS base
LEFT JOIN
  conversion_events AS conv
ON
  base.full_session_id = conv.full_session_id
GROUP BY
  base.page_path,
  base.page_location
Product Detail Page Conversion Rate

Shows the number of sessions in which each product was viewed on a product detail page and how many of those sessions went on to purchase the same product. This helps measure product-level conversion performance from view to purchase.

-- Product Detail Page Conversion Rate
WITH product_views AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST(
      (SELECT value.int_value 
       FROM UNNEST(event_params)
       WHERE key = 'ga_session_id') AS STRING)) AS full_session_id,
    item.item_name AS product_name,
    item.item_id AS product_id
  FROM
    `{{PROJECT_ID}}.{{DATASET}}.events_*`,
    UNNEST(items) AS item
  WHERE
    _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name = 'view_item'
),

product_purchases AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST(
      (SELECT value.int_value 
       FROM UNNEST(event_params)
       WHERE key = 'ga_session_id') AS STRING)) AS full_session_id,
    item.item_name AS product_name,
    item.item_id AS product_id
  FROM
    `{{PROJECT_ID}}.{{DATASET}}.events_*`,
    UNNEST(items) AS item
  WHERE
    _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name = 'purchase'
)

SELECT
  v.product_id,
  v.product_name,
  COUNT(DISTINCT v.full_session_id) AS sessions_viewed,
  COUNT(DISTINCT p.full_session_id) AS sessions_purchased,
  SAFE_DIVIDE(COUNT(DISTINCT p.full_session_id), COUNT(DISTINCT v.full_session_id)) AS conversion_rate
FROM
  product_views v
LEFT JOIN
  product_purchases p
ON
  v.full_session_id = p.full_session_id AND v.product_id = p.product_id
GROUP BY
  v.product_id,
  v.product_name
ORDER BY
  conversion_rate DESC
Exit Rate by Page Path

Calculates the number of sessions that viewed each page path and how many of those sessions ended (exited) on that page. Exit rate is computed as the percentage of sessions that had this page as their final page view. Page paths are derived from page_location by stripping query parameters and enforcing consistent slashes.

-- Exit Rate by Page Path
WITH page_views AS (
  SELECT
    CONCAT(user_pseudo_id, '-', CAST(
      (SELECT value.int_value 
       FROM UNNEST(event_params) 
       WHERE key = 'ga_session_id') AS STRING)) AS full_session_id,
    TIMESTAMP_MICROS(event_timestamp) AS event_time,
    (SELECT value.string_value 
     FROM UNNEST(event_params) 
     WHERE key = 'page_location') AS page_location
  FROM
    `{{PROJECT_ID}}.{{DATASET}}.events_*`
  WHERE
    _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
    AND event_name = 'page_view'
),

cleaned_pages AS (
  SELECT
    full_session_id,
    event_time,
    page_location,
    CONCAT(
      REGEXP_REPLACE(
        REGEXP_EXTRACT(page_location, r'https?://[^/]+(/[^?]*)'),
        r'/$', ''
      ),
      '/'
    ) AS page_path
  FROM page_views
  WHERE page_location IS NOT NULL
),

ranked_pages AS (
  SELECT *,
    ROW_NUMBER() OVER (PARTITION BY full_session_id ORDER BY event_time DESC) AS page_rank
  FROM cleaned_pages
),

exit_pages AS (
  SELECT DISTINCT full_session_id, page_path
  FROM ranked_pages
  WHERE page_rank = 1
)

SELECT
  pv.page_path,
  COUNT(DISTINCT pv.full_session_id) AS sessions_viewed,
  COUNT(DISTINCT ep.full_session_id) AS sessions_exited,
  SAFE_DIVIDE(COUNT(DISTINCT ep.full_session_id), COUNT(DISTINCT pv.full_session_id)) AS exit_rate
FROM
  cleaned_pages pv
LEFT JOIN
  exit_pages ep
ON
  pv.full_session_id = ep.full_session_id AND pv.page_path = ep.page_path
GROUP BY
  pv.page_path
HAVING
  COUNT(DISTINCT pv.full_session_id) > 0
ORDER BY
  sessions_viewed DESC
๐Ÿ‘ค User Identification & Login
Anonymous Key Events Breakdown

Identifies how often key actions (e.g. purchases, sign-ups, form submissions) occur without a recognised user ID. Useful for diagnosing gaps in login or identity stitching.

SELECT
  event_name,
  COUNTIF(user_id IS NULL) AS anonymous_event_count,
  COUNTIF(user_id IS NOT NULL) AS identified_event_count,
  ROUND(SAFE_DIVIDE(COUNTIF(user_id IS NULL), COUNT(*)) * 100, 2) AS percent_anonymous
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
  AND event_name IN ('purchase', 'generate_lead', 'login', 'sign_up', 'form_submit')
GROUP BY event_name
ORDER BY percent_anonymous DESC
    
Missing Identifier Coverage by Event

Evaluates how often each event is missing key user identifiers (`user_id`, `user_pseudo_id`, or both). Helps identify where identity tracking may be incomplete or broken across specific events.

SELECT
  event_name,
  COUNTIF(user_id IS NULL) AS null_user_id,
  COUNTIF(user_id IS NOT NULL) AS valid_user_id,
  COUNTIF(user_pseudo_id IS NULL) AS null_user_pseudo_id,
  COUNTIF(user_pseudo_id IS NOT NULL) AS valid_user_pseudo_id,
  COUNTIF(user_id IS NULL AND user_pseudo_id IS NULL) AS null_both_ids
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
GROUP BY event_name
ORDER BY null_both_ids DESC
    
Sessions with Multiple Logins

Flags sessions where a user triggered the `login` event more than once. May indicate tracking duplication or issues with login/logout behaviour on your site.

SELECT
  CONCAT(user_pseudo_id, '-', CAST((
    SELECT value.int_value
    FROM UNNEST(event_params)
    WHERE key = 'ga_session_id'
  ) AS STRING)) AS session_id,
  user_pseudo_id,
  COUNTIF(event_name = 'login') AS login_count
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
GROUP BY session_id, user_pseudo_id
HAVING login_count > 1
ORDER BY login_count DESC
    
Multiple user_ids for One user_pseudo_id

Detects if a single anonymous user (`user_pseudo_id`) has been linked to multiple `user_id` valuesโ€”common when login IDs change or if multiple users share a device.

SELECT
  user_pseudo_id,
  ARRAY_AGG(DISTINCT user_id IGNORE NULLS) AS user_ids
FROM `{{PROJECT_ID}}.{{DATASET}}.events_*`
WHERE _TABLE_SUFFIX BETWEEN '{{START_DATE}}' AND '{{END_DATE}}'
GROUP BY user_pseudo_id
HAVING ARRAY_LENGTH(user_ids) > 1
    

Contact us

As a specialist analytics and CRO agency, we work closely with clients to make data a driving force behind smarter decisions and stronger performance.
Thank you! Your submission has been received!
Oops! Something went wrong while submitting the form.