### Try at a simple example of the problem ### Partition (x,y) into 2 parts and test whether the part is 1 ### This works - the distribution of p-values is uniform. Trials=5000 out=array(0, dim=c(Trials)) for(trials in 1:Trials){ n = 20 x = rnorm(n) y = rnorm(n) region=1 ## did it once. Now bootstrap the confidence/p-value. ## B = 1000 for(b in 1:B){ my_sample = sample(c(1:n), n, replace=TRUE) my_x = x[my_sample] my_y = y[my_sample] my_region=0 if(mean(my_x)>0){my_region=1} if(my_region==0){my_region=2} if(my_region==region){out[trials]=out[trials]+1} } } hist(out/B, breaks=c(0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1)) ### Three Parts - test whether part is 1 ### The bootstrap p-values here are NOT uniform ### This demonstrates the problem of regions Trials=2000 out=array(0, dim=c(Trials)) for(trials in 1:Trials){ n = 20 x = rnorm(n) y = rnorm(n) region=1 ## did it once. Now bootstrap the confidence/p-value. ## B = 1000 for(b in 1:B){ my_sample = sample(c(1:n), n, replace=TRUE) my_x = x[my_sample] my_y = y[my_sample] my_region=0 if(atan2(mean(my_y), mean(my_x))>-pi/2 && atan2(mean(my_y), mean(my_x))< pi/6){my_region=1} if(atan2(mean(my_y), mean(my_x))>pi/6 && atan2(mean(my_y), mean(my_x))< 5*pi/6){my_region=2} if(my_region==0){my_region=3} if(my_region==region){out[trials]=out[trials]+1} } } hist(out/B, breaks=c(0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1)) qqplot(out/B, 1- sqrt(1 - runif(length(out)))) ### Two parts - but now ask if region agrees with the one given by the data ### Demonstrates the problem of a post-hoc analysis Trials=1000 out=array(0, dim=c(Trials)) for(trials in 1:Trials){ n = 20 x = rnorm(n) y = rnorm(n) region=0 if(mean(x)>0){region=1} if(region==0){region=2} ## did it once. Now bootstrap the confidence/p-value. ## B = 1000 for(b in 1:B){ my_sample = sample(c(1:n), n, replace=TRUE) my_x = x[my_sample] my_y = y[my_sample] my_region=0 if(mean(my_x)>0){my_region=1} if(my_region==0){my_region=2} if(my_region==region){out[trials]=out[trials]+1} } } hist(out/B, breaks=c(0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1)) ### Post-hoc analysis with 3 parts Trials=1000 out=array(0, dim=c(Trials)) for(trials in 1:Trials){ n = 20 x = rnorm(n) y = rnorm(n) region=0 if(atan2(mean(y), mean(x))>-pi/2 && atan2(mean(y), mean(x))< pi/6){region=1} if(atan2(mean(y), mean(x))>pi/6 && atan2(mean(y), mean(x))< 5*pi/6){region=2} if(region==0){region=3} ## did it once. Now bootstrap the confidence/p-value. ## B = 1000 for(b in 1:B){ my_sample = sample(c(1:n), n, replace=TRUE) my_x = x[my_sample] my_y = y[my_sample] my_region=0 if(atan2(mean(my_y), mean(my_x))>-pi/2 && atan2(mean(my_y), mean(my_x))< pi/6){my_region=1} if(atan2(mean(my_y), mean(my_x))>pi/6 && atan2(mean(my_y), mean(my_x))< 5*pi/6){my_region=2} if(my_region==0){my_region=3} if(my_region==region){out[trials]=out[trials]+1} } } hist(out/B, breaks=c(0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1)) ### Four Part Trial - mainly for my own benefit. Trials=2000 out=array(0, dim=c(Trials)) for(trials in 1:Trials){ n = 20 x = rnorm(n) y = rnorm(n) region=0 if(mean(x)>0 && mean(y)>0){region=1} if(mean(x)<0 && mean(y)>0){region=2} if(mean(x)<0 && mean(y)<0){region=3} if(region==0){region=4} region=1 ## did it once. Now bootstrap the confidence/p-value. ## B = 1000 for(b in 1:B){ my_sample = sample(c(1:n), n, replace=TRUE) my_x = x[my_sample] my_y = y[my_sample] my_region=0 if(mean(my_x)>0 && mean(my_y)>0){my_region=1} if(mean(my_x)<0 && mean(my_y)>0){my_region=2} if(mean(my_x)<0 && mean(my_y)<0){my_region=3} if(my_region==0){my_region=4} if(my_region==region){out[trials]=out[trials]+1} } } hist(out/B, breaks=c(0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1)) qqplot(out/B, 1- (1 - runif(length(out)))^(1/3))