BigQuery UDF for A/B-testing

4 min readMar 25, 2023

Automate the A/B-test Calculator in BigQuery via User Defined Function

Create a dataset in your BigQuery project named «udf»,
Create several persistent UDF:

CREATE OR REPLACE FUNCTION udf.rate_of_change(before FLOAT64, after FLOAT64)
AS ( (after - before) / before * 100 );

CREATE OR REPLACE FUNCTION udf.z_score(n_A FLOAT64, conv_A FLOAT64, n_B FLOAT64, conv_B FLOAT64, confidence_level FLOAT64)
AS ( 
    ROUND( (conv_B/n_B - conv_A/n_A)
           / SQRT( ((SQRT((conv_B/n_B) * (1 - (conv_B/n_B))) * SQRT((conv_B/n_B) * (1 - (conv_B/n_B))) ) / n_B)
               + ((SQRT((conv_A/n_A) * (1 - (conv_A/n_A))) * SQRT((conv_A/n_A) * (1 - (conv_A/n_A))) ) / n_A) 
          ) , 2)
);

CREATE OR REPLACE FUNCTION udf.normal_cdf(x FLOAT64, mean FLOAT64, std FLOAT64)
RETURNS FLOAT64
LANGUAGE js
OPTIONS (library=["gs://isb-cgc-bq-library/jstat/dist/jstat.min.js"], description="Returns the value of x in the cdf of the Normal distribution with parameters mean and std\nPARAMETERS: x (the value, FLOAT64), mean ( mean, FLOAT64), and std (standard deviation, FLOAT64)\nOUTPUT: the cdf of the normal distribution\nVERSION: 1.0\nEXAMPLE: This function is used by the udf isb-cgc-bq.functions.mannwhitneyu_v1_0")
AS """
//return jStat.ztest(z, sides); //jStat.ztest(z, sides)
return jStat.normal.cdf( x, mean, std )
""";

CREATE OR REPLACE FUNCTION udf.p_value(n_A FLOAT64, conv_A FLOAT64, n_B FLOAT64, conv_B FLOAT64, confidence_level FLOAT64)
AS (
     (
      SELECT (
              ROUND(
                     (1 - udf.normal_cdf( udf.z_score(n_A, conv_A, n_B, conv_B, confidence_level), 0, 1 ) ) * 2  -- 양측 검정
                   , 4)
             )
     )
);

CREATE OR REPLACE FUNCTION udf.ab_result(n_A FLOAT64, conv_A FLOAT64, n_B FLOAT64, conv_B FLOAT64, confidence_level FLOAT64)
AS (
     (
     SELECT CASE WHEN udf.p_value(n_A, conv_A, n_B, conv_B, confidence_level) >= (1 - confidence_level/100)
                 THEN 'Conversion A : ' || CAST(ROUND(conv_A/n_A * 100, 1) AS STRING) || ' (' || CAST(conv_A AS STRING) || '/' || CAST(n_A AS STRING) ||')\nConversion B : ' || CAST(ROUND(conv_B/n_B * 100, 1) AS STRING)|| ' (' || CAST(conv_B AS STRING) || '/' || CAST(n_B AS STRING) || ')\nThe result is not significant. There is no real difference in performance between A and B or you need to collect more data.\n P value : ' || CAST(udf.p_value(n_A, conv_A, n_B, conv_B, confidence_level) AS STRING)
                 WHEN udf.p_value(n_A, conv_A, n_B, conv_B, confidence_level) <= (1 - confidence_level/100) AND conv_A/n_A >= conv_B/n_B
                 THEN 'Conversion A : ' || CAST(ROUND(conv_A/n_A * 100, 1) AS STRING) || ' (' || CAST(conv_A AS STRING) || '/' || CAST(n_A AS STRING) ||')\nConversion B : ' || CAST(ROUND(conv_B/n_B * 100, 1) AS STRING)|| ' (' || CAST(conv_B AS STRING) || '/' || CAST(n_B AS STRING) || ')\nThe result is significant. Test A converted '  || CAST(ROUND((conv_A/n_A - conv_B/n_B) / (conv_B/n_B) * 100, 1) AS STRING) || '% better than Test B. \n ' || CAST(confidence_level AS STRING) || '% confident that this result is a consequence of the changes and not a result of random chance.\n P value : ' || CAST(udf.p_value(n_A, conv_A, n_B, conv_B, confidence_level) AS STRING)
                 WHEN udf.p_value(n_A, conv_A, n_B, conv_B, confidence_level) <= (1 - confidence_level/100) AND conv_A/n_A <= conv_B/n_B
                 THEN 'Conversion A : ' || CAST(ROUND(conv_A/n_A * 100, 1) AS STRING) || ' (' || CAST(conv_A AS STRING) || '/' || CAST(n_A AS STRING) ||')\nConversion B : ' || CAST(ROUND(conv_B/n_B * 100, 1) AS STRING)|| ' (' || CAST(conv_B AS STRING) || '/' || CAST(n_B AS STRING) || ')\nThe result is significant. Test B converted ' || CAST(ROUND((conv_B/n_B - conv_A/n_A) / (conv_A/n_A) * 100, 1) AS STRING) || '% better than Test A. \n' || CAST(confidence_level AS STRING) || '% confident that this result is a consequence of the changes and not a result of random chance.\n P value : ' || CAST(udf.p_value(n_A, conv_A, n_B, conv_B, confidence_level) AS STRING) 
                 END
        )
);

As a result, we should get the following persistent function:

3. Aggregated data of the results of the experiment can be transferred to the query:

with ab as (
  select 70000 as n_A, 1400 as conv_A, 70000 as n_B, 1560 as conv_B, 95 as confidence_level
)

select *, udf.ab_result(n_A, conv_A, n_B, conv_B, confidence_level) as result
from ab

and receive the result in BigQuery:

which matches the result of the A/B-test Calculator

Now we can automate the calculations of experimental results and use them for visualization in Looker Studio.

More useful UDFs can be found in the community.

If you liked it, subscribe to my BigQuery Insights Telegram channel, where I share interesting analytics solutions in Google BigQuery.

BigQuery UDF for A/B-testing

Written by Aleksandr Osiyuk