── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.4.4 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Code
library(mvtnorm)# Ensure reproducibilityset.seed(130)# Draw n_obs residuals.# If normal is true, draw normal.# If normal is false, draw mean zero log-normal.# If x is not null, multiply the residual by the provided x.DrawEps <-function(n_obs, normal, x=NULL) { eps_var <-1if (normal) { eps <-rnorm(n_obs, 0, sd=sqrt(eps_var)) } else { z <-rnorm(n_obs, 0, sd=sqrt(eps_var))return(exp(z) -exp(eps_var /2)) }if (!is.null(x)) {# Induce heteroskedasticity eps <- eps * x }return(eps)}DrawX <-function(n_obs) {rnorm(n_obs) %>%as.matrix(ncol=1)}if (FALSE) {# Sanity check x <-DrawX(10000)mean(DrawEps(10000, TRUE))mean(DrawEps(10000, FALSE))mean(DrawEps(10000, TRUE, x=x))mean(DrawEps(10000, FALSE, x=x))}# Return a dataframe with y, x1, and the true epsilon and ey.# DrawEpsFun() should take a single argument, n_obs, and return a draw# of n_obs residuals.DrawData <-function(x_mat, DrawEpsFun, beta_true) { n_obs <-nrow(x_mat) eps <-DrawEpsFun(n_obs) ey <-cbind(rep(1, n_obs), x_mat) %*% beta_true y <- ey + eps df <-data.frame(y=y, x1=x_mat[,1], eps_true=eps, ey_true=ey)return(df)}if (FALSE) {# Sanity check n_obs <-100 x_mat <-DrawX(n_obs) beta_true <-c("(Intercept)"=1, "x1"=2) DrawEpsFun <- \(n_obs) { DrawEps(n_obs, normal=TRUE, x=x_mat[, 1])} df_sim <-DrawData(x_mat, DrawEpsFun, beta_true)plot(df_sim$x1, df_sim$eps_true)}# Run n_sims simulations with the provided DrawDataFun, which should take# a single argument, beta_true. The argument `level` specifies the level of the# test. RunExperiment <-function(DrawDataFun, level, num_sims, beta_true) {#pb <- txtProgressBar(max=num_sims, style=3) # qmd doesn't like this sim_df <-data.frame()for (n_sim in1:num_sims) {#setTxtProgressBar(pb, n_sim) df_sim <-DrawDataFun(beta_true) reg_fit_sim <-lm(y ~1+ x1, df_sim) ci <-confint(reg_fit_sim, parm="x1", level=level) covered <- (beta_true["x1"] > ci[1]) && (beta_true["x1"] < ci[2]) betahat <-coefficients(reg_fit_sim)["x1"] sim_df <-bind_rows( sim_df, data.frame(sim=n_sim, covered=covered, cil=ci[1], ciu=ci[2], betahat=betahat)) }#close(pb)return(sim_df)}
`summarise()` has grouped output by 'heteroskedastic', 'normal'. You can
override using the `.groups` argument.
# A tibble: 8 × 7
# Groups: heteroskedastic, normal [4]
heteroskedastic normal n_obs covered_lower covered covered_upper ci_width
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 hetero gamma 10 0.798 0.809 0.821 1.70
2 hetero gamma 500 0.781 0.792 0.804 0.237
3 hetero normal 10 0.573 0.585 0.596 0.805
4 hetero normal 500 0.545 0.556 0.568 0.115
5 homo gamma 10 0.797 0.808 0.820 1.73
6 homo gamma 500 0.781 0.793 0.804 0.237
7 homo normal 10 0.789 0.800 0.812 0.977
8 homo normal 500 0.785 0.797 0.808 0.111
We see that coverage is quite good except for the heteroskedastic normal case. As might be expected, increased variance increases the ci width, as does lower numbers of observations.
Code
# Even with only 10 observations the betahat looks pretty goodggplot(sim_df %>%filter(n_obs <100)) +geom_histogram(aes(x=betahat, fill=n_obs, group=n_obs)) +facet_grid(normal ~ heteroskedastic)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Source Code
---title: "STAT151A Homework 5 problem 2 solution"author: Your name herenumber-sections: true number-depth: 1 format: html: code-fold: true code-tools: trueformat-links: falsesolutions: true---```{r}library(tidyverse)library(mvtnorm)# Ensure reproducibilityset.seed(130)# Draw n_obs residuals.# If normal is true, draw normal.# If normal is false, draw mean zero log-normal.# If x is not null, multiply the residual by the provided x.DrawEps <-function(n_obs, normal, x=NULL) { eps_var <-1if (normal) { eps <-rnorm(n_obs, 0, sd=sqrt(eps_var)) } else { z <-rnorm(n_obs, 0, sd=sqrt(eps_var))return(exp(z) -exp(eps_var /2)) }if (!is.null(x)) {# Induce heteroskedasticity eps <- eps * x }return(eps)}DrawX <-function(n_obs) {rnorm(n_obs) %>%as.matrix(ncol=1)}if (FALSE) {# Sanity check x <-DrawX(10000)mean(DrawEps(10000, TRUE))mean(DrawEps(10000, FALSE))mean(DrawEps(10000, TRUE, x=x))mean(DrawEps(10000, FALSE, x=x))}# Return a dataframe with y, x1, and the true epsilon and ey.# DrawEpsFun() should take a single argument, n_obs, and return a draw# of n_obs residuals.DrawData <-function(x_mat, DrawEpsFun, beta_true) { n_obs <-nrow(x_mat) eps <-DrawEpsFun(n_obs) ey <-cbind(rep(1, n_obs), x_mat) %*% beta_true y <- ey + eps df <-data.frame(y=y, x1=x_mat[,1], eps_true=eps, ey_true=ey)return(df)}if (FALSE) {# Sanity check n_obs <-100 x_mat <-DrawX(n_obs) beta_true <-c("(Intercept)"=1, "x1"=2) DrawEpsFun <- \(n_obs) { DrawEps(n_obs, normal=TRUE, x=x_mat[, 1])} df_sim <-DrawData(x_mat, DrawEpsFun, beta_true)plot(df_sim$x1, df_sim$eps_true)}# Run n_sims simulations with the provided DrawDataFun, which should take# a single argument, beta_true. The argument `level` specifies the level of the# test. RunExperiment <-function(DrawDataFun, level, num_sims, beta_true) {#pb <- txtProgressBar(max=num_sims, style=3) # qmd doesn't like this sim_df <-data.frame()for (n_sim in1:num_sims) {#setTxtProgressBar(pb, n_sim) df_sim <-DrawDataFun(beta_true) reg_fit_sim <-lm(y ~1+ x1, df_sim) ci <-confint(reg_fit_sim, parm="x1", level=level) covered <- (beta_true["x1"] > ci[1]) && (beta_true["x1"] < ci[2]) betahat <-coefficients(reg_fit_sim)["x1"] sim_df <-bind_rows( sim_df, data.frame(sim=n_sim, covered=covered, cil=ci[1], ciu=ci[2], betahat=betahat)) }#close(pb)return(sim_df)}``````{r}level <-0.8n_sims <-5000beta_true <-c("(Intercept)"=1, "x1"=2)sim_df <-data.frame() # Simulation resultsdata_df <-data.frame() # Collect an example dataset for (n_obs inc(10, 500)) { x_mat <-DrawX(n_obs)for (heteroskedastic inc(FALSE, TRUE)) {for (normal inc(FALSE, TRUE)) {if (heteroskedastic) { x_arg <- x_mat[, 1] } else { x_arg <-NULL } DrawEpsFun <- \(n_obs) { DrawEps(n_obs, normal=normal, x=x_arg) } DrawDataFun <- \(beta_true) { DrawData(x_mat, DrawEpsFun, beta_true) } this_sim_df <-RunExperiment( DrawDataFun, num_sims=n_sims, level=level, beta_true=beta_true) data_df <-bind_rows( data_df,DrawDataFun(beta_true) %>%mutate(heteroskedastic=ifelse(heteroskedastic, "hetero", "homo"), normal=ifelse(normal, "normal", "gamma"),n_obs=n_obs) ) sim_df <-bind_rows( sim_df, this_sim_df %>%mutate(heteroskedastic=ifelse(heteroskedastic, "hetero", "homo"), normal=ifelse(normal, "normal", "gamma"),n_obs=n_obs))}}}if (FALSE) { data_df %>%group_by(normal, n_obs, heteroskedastic) %>%summarize(eps_mean=mean(eps_true), eps_sd=sd(eps_true))ggplot(data_df) +geom_histogram(aes(x=eps_true)) +facet_grid(normal ~ heteroskedastic)}``````{r}sim_df %>%group_by(heteroskedastic, normal, n_obs) %>%summarize(cil=mean(cil), ciu=mean(ciu), covered=mean(covered)) %>%mutate(ci_width=ciu - cil, covered_ci=sqrt(level * (1- level) / n_sims),covered_lower=covered -2* covered_ci,covered_upper=covered +2* covered_ci) %>%select(heteroskedastic, normal, n_obs, covered_lower, covered, covered_upper, ci_width)```We see that coverage is quite good except for the heteroskedasticnormal case. As might be expected, increased variance increasesthe ci width, as does lower numbers of observations.```{r}# Even with only 10 observations the betahat looks pretty goodggplot(sim_df %>%filter(n_obs <100)) +geom_histogram(aes(x=betahat, fill=n_obs, group=n_obs)) +facet_grid(normal ~ heteroskedastic)```