-
Notifications
You must be signed in to change notification settings - Fork 87
Subtypes of HypothesisTest can have optional fields :tail and :alpha #100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 6 commits
ef60cb7
5f82c9c
14f15d1
c3363a1
596444f
15282b5
8007b1e
e63c378
c882e25
c1ac852
36a9e76
e6bc7a6
bda80a7
480e152
96eb8cf
7f66aa8
86626ec
1938ffb
5d38a50
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -68,35 +68,54 @@ function check_alpha(alpha::Float64) | |
| end | ||
| end | ||
|
|
||
| # Utility to get an optional field (e.g., :tail and :alpha) | ||
| getfield(value::HypothesisTest, name::Symbol, default::Any) = | ||
| if name in fieldnames(value) | ||
| Base.getfield(value, name) | ||
| else | ||
| default | ||
| end | ||
|
|
||
| get_tail(test::HypothesisTest) = getfield(test, :tail, default_tail(test)) | ||
| get_alpha(test::HypothesisTest) = getfield(test, :alpha, 0.05) | ||
|
|
||
| # Utility for pretty-printing: Append white space so that length(with_trailing_whitespace(s)) = max(len, length(s)) | ||
|
||
| with_trailing_whitespace(s::String, len::Int) = s * join(repeat([" "], outer=max(len - length(s), 0)), "") | ||
|
||
|
|
||
| # Pretty-print | ||
| function Base.show{T<:HypothesisTest}(io::IO, test::T) | ||
| println(io, testname(test)) | ||
| println(io, repeat("-", length(testname(test)))) | ||
|
|
||
| alpha = get_alpha(test) | ||
| tail = get_tail(test) | ||
| conf_string = string((1 - alpha) * 100) * "%" | ||
|
||
|
|
||
| # population details | ||
| has_ci = applicable(StatsBase.confint, test) | ||
| label_len = has_ci ? length(conf_string) + 21 : 22 # 21 is length of ' confidence interval:', 22 that of 'parameter of interest:' | ||
|
||
| (param_name, param_under_h0, param_estimate) = population_param_of_interest(test) | ||
| println(io, "Population details:") | ||
| println(io, " parameter of interest: $param_name") | ||
| println(io, " value under h_0: $param_under_h0") | ||
| println(io, " point estimate: $param_estimate") | ||
| println(io, " $(with_trailing_whitespace("parameter of interest:", label_len)) $param_name") | ||
| println(io, " $(with_trailing_whitespace("value under h_0", label_len)) $param_under_h0") | ||
| println(io, " $(with_trailing_whitespace("point estimate", label_len)) $param_estimate") | ||
| if has_ci | ||
| println(io, " 95% confidence interval: $(StatsBase.confint(test))") | ||
| println(io, " $conf_string confidence interval: $(StatsBase.confint(test, alpha, tail=tail))") | ||
| end | ||
| println(io) | ||
|
|
||
| # test summary | ||
| p = pvalue(test) | ||
| outcome = if p > 0.05 "fail to reject" else "reject" end | ||
| tail = default_tail(test) | ||
| p = pvalue(test, tail=tail) | ||
| outcome = if p > alpha "fail to reject" else "reject" end | ||
| println(io, "Test summary:") | ||
| println(io, " outcome with 95% confidence: $outcome h_0") | ||
| label_len = length(conf_string) + 25 # 25 is length of 'outcome with confidence:' | ||
| println(io, " outcome with $conf_string confidence: $outcome h_0") | ||
| if tail == :both | ||
| println(io, " two-sided p-value: $p") | ||
| println(io, " $(with_trailing_whitespace("two-sided p-value:", label_len)) $p") | ||
| elseif tail == :left || tail == :right | ||
| println(io, " one-sided p-value: $p") | ||
| println(io, " $(with_trailing_whitespace("one-sided p-value:", label_len)) $p") | ||
| else | ||
| println(io, " p-value: $p") | ||
| println(io, " $(with_trailing_whitespace("p-value:", label_len)) $p") | ||
| end | ||
| println(io) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -207,6 +207,7 @@ end | |
| testname(::ADFTest) = "Augmented Dickey-Fuller unit root test" | ||
| population_param_of_interest(x::ADFTest) = | ||
| ("coefficient on lagged non-differenced variable", 0, x.coef) | ||
| default_tail(test::ADFTest) = :left | ||
|
|
||
| function show_params(io::IO, x::ADFTest, ident) | ||
| println(io, ident, "sample size in regression: ", x.n) | ||
|
|
@@ -215,4 +216,4 @@ function show_params(io::IO, x::ADFTest, ident) | |
| println(io, ident, "Critical values at 1%, 5%, and 10%: ", x.cv') | ||
| end | ||
|
|
||
| pvalue(x::ADFTest) = HypothesisTests.pvalue(Normal(0, 1), adf_pv_aux(x.stat, x.deterministic); tail=:left) | ||
| pvalue(x::ADFTest; tail=default_tail(x)) = HypothesisTests.pvalue(Normal(0, 1), adf_pv_aux(x.stat, x.deterministic); tail=tail) | ||
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,7 +60,10 @@ function show_params(io::IO, x::RayleighTest, ident="") | |
| println(io, ident, "test statistic: $(x.Rbar^2 * x.n)") | ||
| end | ||
|
|
||
| function pvalue(x::RayleighTest) | ||
| function pvalue(x::RayleighTest; tail=default_tail(x)) | ||
| if tail != default_tail(x) # warn that tail is not used here | ||
| warn("tail=$tail has no effect on the computation of the p value") | ||
|
||
| end | ||
| Z = x.Rbar^2 * x.n | ||
| x.n > 1e6 ? exp(-Z) : | ||
| exp(-Z)*(1+(2*Z-Z^2)/(4*x.n)-(24*Z - 132*Z^2 + 76*Z^3 - 9*Z^4)/(288*x.n^2)) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,13 +33,13 @@ pvalue(x::TTest; tail=:both) = pvalue(TDist(x.df), x.t; tail=tail) | |
| default_tail(test::TTest) = :both | ||
|
|
||
| # confidence interval by inversion | ||
| function StatsBase.confint(x::TTest, alpha::Float64=0.05; tail=:both) | ||
| function StatsBase.confint(x::TTest, alpha::Float64=get_alpha(x); tail=get_tail(x)) | ||
| check_alpha(alpha) | ||
|
|
||
| if tail == :left | ||
| (-Inf, StatsBase.confint(x, alpha*2)[2]) | ||
| (-Inf, StatsBase.confint(x, alpha*2, tail=:both)[2]) # tail=:both required as recursive anchor | ||
|
||
| elseif tail == :right | ||
| (StatsBase.confint(x, alpha*2)[1], Inf) | ||
| (StatsBase.confint(x, alpha*2, tail=:both)[1], Inf) | ||
| elseif tail == :both | ||
| q = quantile(TDist(x.df), 1-alpha/2) | ||
| (x.xbar-q*x.stderr, x.xbar+q*x.stderr) | ||
|
|
@@ -58,6 +58,8 @@ immutable OneSampleTTest <: TTest | |
| stderr::Real # empirical standard error | ||
| t::Real # t-statistic | ||
| μ0::Real # mean under h_0 | ||
| tail::Symbol # :left, :right, or :both | ||
| alpha::Real # alpha value | ||
| end | ||
|
|
||
| testname(::OneSampleTTest) = "One sample t-test" | ||
|
|
@@ -70,19 +72,21 @@ function show_params(io::IO, x::OneSampleTTest, ident="") | |
| println(io, ident, "empirical standard error: $(x.stderr)") | ||
| end | ||
|
|
||
| function OneSampleTTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0) | ||
| function OneSampleTTest(xbar::Real, stddev::Real, n::Int, μ0::Real=0; tail::Symbol=:both, alpha::Real=0.05) | ||
| stderr = stddev/sqrt(n) | ||
| t = (xbar-μ0)/stderr | ||
| df = n-1 | ||
| OneSampleTTest(n, xbar, df, stderr, t, μ0) | ||
| OneSampleTTest(n, xbar, df, stderr, t, μ0, tail, alpha) | ||
| end | ||
|
|
||
| OneSampleTTest{T<:Real}(v::AbstractVector{T}, μ0::Real=0) = OneSampleTTest(mean(v), std(v), length(v), μ0) | ||
| OneSampleTTest{T<:Real}(v::AbstractVector{T}, μ0::Real=0; tail::Symbol=:both, alpha::Real=0.05) = | ||
| OneSampleTTest(mean(v), std(v), length(v), μ0, tail=tail, alpha=alpha) | ||
|
|
||
| function OneSampleTTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) | ||
| function OneSampleTTest{T<:Real, S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0; | ||
| tail::Symbol=:both, alpha::Real=0.05) | ||
| check_same_length(x, y) | ||
|
|
||
| OneSampleTTest(x - y, μ0) | ||
| OneSampleTTest(x - y, μ0, tail=tail, alpha=alpha) | ||
| end | ||
|
|
||
|
|
||
|
|
@@ -96,6 +100,8 @@ immutable EqualVarianceTTest <: TwoSampleTTest | |
| stderr::Real # empirical standard error | ||
| t::Real # t-statistic | ||
| μ0::Real # mean difference under h_0 | ||
| tail::Symbol # :left, :right, or :both | ||
| alpha::Real # alpha value | ||
| end | ||
|
|
||
| function show_params(io::IO, x::TwoSampleTTest, ident="") | ||
|
|
@@ -108,14 +114,15 @@ end | |
| testname(::EqualVarianceTTest) = "Two sample t-test (equal variance)" | ||
| population_param_of_interest(x::TwoSampleTTest) = ("Mean difference", x.μ0, x.xbar) # parameter of interest: name, value under h0, point estimate | ||
|
|
||
| function EqualVarianceTTest{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) | ||
| function EqualVarianceTTest{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0; | ||
| tail::Symbol=:both, alpha::Real=0.05) | ||
| nx, ny = length(x), length(y) | ||
| xbar = mean(x) - mean(y) | ||
| stddev = sqrt(((nx - 1) * var(x) + (ny - 1) * var(y)) / (nx + ny - 2)) | ||
| stderr = stddev * sqrt(1/nx + 1/ny) | ||
| t = (xbar - μ0) / stderr | ||
| df = nx + ny - 2 | ||
| EqualVarianceTTest(nx, ny, xbar, df, stderr, t, μ0) | ||
| EqualVarianceTTest(nx, ny, xbar, df, stderr, t, μ0, tail, alpha) | ||
| end | ||
|
|
||
|
|
||
|
|
@@ -129,16 +136,19 @@ immutable UnequalVarianceTTest <: TwoSampleTTest | |
| stderr::Real # empirical standard error | ||
| t::Real # t-statistic | ||
| μ0::Real # mean under h_0 | ||
| tail::Symbol # :left, :right, or :both | ||
| alpha::Real # alpha value | ||
| end | ||
|
|
||
| testname(::UnequalVarianceTTest) = "Two sample t-test (unequal variance)" | ||
|
|
||
| function UnequalVarianceTTest{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0) | ||
| function UnequalVarianceTTest{T<:Real,S<:Real}(x::AbstractVector{T}, y::AbstractVector{S}, μ0::Real=0; | ||
| tail::Symbol=:both, alpha::Real=0.05) | ||
| nx, ny = length(x), length(y) | ||
| xbar = mean(x)-mean(y) | ||
| varx, vary = var(x), var(y) | ||
| stderr = sqrt(varx/nx + vary/ny) | ||
| t = (xbar-μ0)/stderr | ||
| df = (varx / nx + vary / ny)^2 / ((varx / nx)^2 / (nx - 1) + (vary / ny)^2 / (ny - 1)) | ||
| UnequalVarianceTTest(nx, ny, xbar, df, stderr, t, μ0) | ||
| UnequalVarianceTTest(nx, ny, xbar, df, stderr, t, μ0, tail, alpha) | ||
| end | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd rather not use the name
getfield, which is associated with thea.bsyntax and is currently not overloadable. Also, this approach isn't really Julian. Better define fallbacks forget_tailandget_alphawhich return the default (or maybe better, print a warning), and define specific methods for the distributions which support it (and which are very few).Also, remove the "get_" prefix.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing the "get_" prefix, I simply called the functions
tailandalpha. The tail is:undefined, which is the value returned bydefault_tailif that method is not overloaded. For all tests that always use some default tail, thetailmethod returns that tail, just likedefault_taildoes. For t-tests, which can be configured to use another tail, that other tail is returned (or the default, if no other tail is specified).I feel that the method
default_tailis superseded by this implementation of thetailmethod.default_tailcan still be used (and behaves just liketail) but it emits a deprecation warning.