Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- """Chi-square Power, effect size and sample size calculations."""
- from scipy.stats import chi2
- from scipy.stats import ncx2
- from scipy.optimize import fsolve
- from math import sqrt
- def effect_size(H_1, H_0=None):
- """Effect size for Chi-square tests.
- Let :math:`(X_1, X_2, \\ldots, X_k)` be a multinomial random variable with
- parameters :math:`p_1, p_2, \\ldots, p_k`. Suppose we wish to test
- .. math:: H_0: p_i = p_{i,0}, \\qquad i = 1, 2, \\ldots, k
- against :math:`H_1`: not all :math:`p`'s given by :math:`H_0`.
- Then the effect size :math:`w` is given by
- .. math:: w = \\sqrt{\\sum_{i=1}^k \\frac{(p_{i,1} - p_{i,0})^2}{p_{i,0}}}
- :param H_1: Alternative hypothesis. Array of probabilities. I.e., array of floats each in the range from 0.0 to 1.0, which sum to 1.0.
- :param H_0: Optional Null hypothesis. Array of probabilities. I.e., array of floats each in the range from 0.0 to 1.0, which sum to 1.0. Must be same length as H_1. Defaults to equiprobable distribution: [1.0/float(len(H_1))]*len(H_1).
- :returns: Effect size
- """
- k = len(H_1)
- # Default H_0 is equiprobable outcomes:
- if H_0 is None:
- H_0 = [1.0/float(k)]*k
- return sqrt(sum([(H_1[i] - H_0[i])**2 / H_0[i] for i in range(k)]))
- def power(w, n, df, alpha=0.05):
- """Power of Chi-square tests.
- .. math:: 1 - \\beta = 1 - F_{\\mathit{df},\\mathit{nc}}(x_{\\mathit{crit}})
- where
- - alpha is the significance level :math:`\\alpha`, i.e., the probability of
- a Type I error (rejecting the Null Hypothesis :math:`H_0` when it is true)
- - :math:`\\beta` is the probability of a Type II error (not rejecting
- the Null Hypothesis :math:`H_0` when it is false)
- - :math:`1 - \\beta` is the power, i.e., the probability of rejecting the
- Null Hypothesis :math:`H_0` when it is false.
- - :math:`F` is the cumulative distribution function (cdf) for the noncentral
- chi-square distribution
- - :math:`\\mathit{nc} = n w^2` is the noncentrality parameter
- - :math:`n` is the sample size
- - :math:`w` is the effect size
- - :math:`x_{\\mathit{crit}}` is the :math:`\\chi^2(\\mathit{df})` critical
- value for the given value of alpha, i.e.,
- :math:`P(\\chi^2(\\mathit{df}) \\leq x_{\\mathit{crit}}) = 1 - \\alpha`
- :param w: Effect size
- :param n: Sample size
- :param df: Degrees of freedom
- :param alpha: Significance level. Defaults to 0.05.
- :returns: Statistical power.
- """
- nc = float(n) * w * w
- # print("[debug] noncentrality paramter is {0:f}".format(nc))
- x = chi2.ppf(1.0 - alpha, df)
- # print("[debug] chi2.cdf(x={0:f},df={1:d})={2:f}".format(x,df,chi2.cdf(x,df)))
- return 1.0 - ncx2.cdf(x, df, nc)
- def sample_size(w, df, alpha=0.05, pwr=0.80):
- """Minimum sample size required to obtain power for Chi-square tests.
- :param w: Effect size.
- :param df: Degrees of freedom.
- :param alpha: Significance level. Defaults to 0.05.
- :param pwr: Statistical power. Defaults to 0.8.
- :returns: Minimum sample size to achieve given statistical power.
- """
- def fn(n):
- return power(w, n, df, alpha) - pwr
- return fsolve(fn, 100)
- def Example():
- """**Example 1** excerpted from [Gunt77]_, p. 83.
- The example has two parts:
- 1. Suppose that we wish to test a six-sided die for unbiasedness so that
- all six :math:`p_{i,0}`'s are :math:`\\frac{1}{6}`. If side six is loaded
- so that actually :math:`p_{6,1} = \\frac{1}{4}` and the other five sides
- retain equal probability of occurrrence, find the power for this
- alternative if the significance level is 0.05 and :math:`n=120`.
- 2. Then find the minimum *n* needed to make the power at the alternative
- be at least 0.90.
- *Solution*:
- 1. The noncentrality parameter is :math:`\\mathit{nc} = \\frac{n}{20} = 6`,
- which implies effect size is :math:`w = \\sqrt{\\frac{1}{20}} = 0.223607`
- and the power is approximately 0.432876.
- 2. To achieve power of 0.90 we need :math:`n \\geq 329.389213`.
- .. rubric:: References
- .. [Gunt77] Guenther, William C. "Power and Sample Size for Approximate
- Chi-Square Tests." *The American Statistician*, Vol. 31, No. 2
- (May, 1977), pp. 83-85. www2.stat.duke.edu/~zo2/dropbox/pdf/2683047.pdf
- """
- H_1 = [0.75/5]*5
- H_1.append(0.25)
- # print("[debug] Sum of probs of H_1 is {0:f}".format(sum(H_1)))
- w = effect_size(H_1)
- # print("[debug] effect size of test is {0:f}".format(w))
- n = 120
- df = 5
- alpha = 0.05
- print("1. The power of the test is {0:f}".format(power(w, n, df, alpha)))
- print("2. The sample size to achieve power of 0.90 is {0:f}".format(sample_size(w, df, alpha, 0.90)[0]))
- # print("[debug] The power when n=329.4 is {0:f}".format(power(w,329.4,df,alpha)))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement