--- title: "Simulation" output: html_document: toc: true --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` ```{r, include=FALSE} # packages used listofpackages <- c("dygraphs") for (j in listofpackages){ if(sum(installed.packages()[, 1] == j) == 0) { install.packages(j) } library(j, character.only = T) } # setting the seed for replication set.seed(77) ``` ## Monte Carlo simulations and bootstrapping To run this script, we first need to load the data that we prepared in the previous class. ```{r} # this chunk is dedicated to the loading of the necessary data data01 <- read.table("data/data01.csv", stringsAsFactors = F, header = T, sep = ",") ``` The first step is to see how the realized cumulative returns performed. Let us consider the arc of time from the beginning of 1950 to the end of 2015. ```{r} TT <- 12*(2015 - 1950 + 1) data02 <- data.frame(array(1, TT)) colnames(data02) <- c("port_c") data02[, 2] <- subset(data01, select = ret_m_c, date >= 1950 & date < 2016) # the subset command is tricky, as it passes also the name of the column # to calculate the cumulative return, we can a loop for (i in 2:TT){ data02$port_c[i] <- data02$port_c[i-1] * (1+data02$ret_m_c[i]) } dygraph(ts(data02$port_c, start = c(1950, 1), end = c(2015, 12), frequency = 12)) ``` Now we will simulate the series. This permits to see how (im)plausible the assumptions of the absence of serial correlation and of return normality are. ```{r} # parameter estimation and choice of the number of replications in the simulation vol <- sd(data02$ret_m_c) alpha <- mean(data02$ret_m_c) nrep <- 10^3 # here I create the containers to be filled with the generated data. y_bt <- y_mc <- array(1, c(TT, nrep)) x_bt <- x_mc <- array(alpha, c(TT, nrep)) # now, the loop for (i in 1:nrep){ u <- rnorm(TT-1) res <- sample(data02$ret_m_c[-1], replace = T) # this (re)samples from the data x_mc[-1, i] <- alpha + vol * u # the Monte Carlo way x_bt[-1, i] <- res # the bootstrap way # now we simply construct and store the bootstrapped and MC cumulative returns for (j in 2:TT){ y_mc[j, i] <- y_mc[j-1, i] * (1 + x_mc[j, i]) y_bt[j, i] <- y_bt[j-1, i] * (1 + x_bt[j, i]) } } # now we want to construct the series of means and quantiles of the resulting collection of drawn series for (i in 1:TT){ # obtaining the means data02$y_bt_mean[i] <- mean(y_bt[i, ]) data02$x_bt_mean[i] <- mean(x_bt[i, ]) data02$y_mc_mean[i] <- mean(y_mc[i, ]) data02$x_mc_mean[i] <- mean(x_mc[i, ]) # and the quantiles data02$y_bt_q05[i] <- quantile(y_bt[i, ], 0.05) data02$x_bt_q05[i] <- quantile(x_bt[i, ], 0.05) data02$y_mc_q05[i] <- quantile(y_mc[i, ], 0.05) data02$x_mc_q05[i] <- quantile(x_mc[i, ], 0.05) data02$y_bt_q95[i] <- quantile(y_bt[i, ], 0.95) data02$x_bt_q95[i] <- quantile(x_bt[i, ], 0.95) data02$y_mc_q95[i] <- quantile(y_mc[i, ], 0.95) data02$x_mc_q95[i] <- quantile(x_mc[i, ], 0.95) } ``` Now we can finally see what we have got. ```{r} # some default plotting plot(data02$port_c, x = seq(from = 1950, to = 2016-1/12, length.out = TT), main = "Cumulative return: Monte Carlo", type = "l", col = "blue", ylab = "return", xlab = "time", ylim = c(0, 6000)) lines(data02$y_mc_mean, x = seq(from = 1950, to = 2016-1/12, length.out = TT), col = "green", lwd = 1.6) lines(data02$y_mc_q05, x = seq(from = 1950, to = 2016-1/12, length.out = TT), col = "grey", lty = 2) lines(data02$y_mc_q95, x = seq(from = 1950, to = 2016-1/12, length.out = TT), col = "grey", lty = 2) # for those who like to loose some time in fancy plotting dygraph(ts(data02[, c("port_c", "y_bt_mean", "y_bt_q05", "y_bt_q95")], start = c(1950, 1), end = c(2015, 12), frequency = 12), main = "Cumulative return") %>% dySeries(c("y_bt_q05", "y_bt_mean", "y_bt_q95"), label = "bootstrapped return") %>% dySeries("port_c", label = "realised return") dygraph(ts(data02[, c("port_c", "y_mc_mean", "y_mc_q05", "y_mc_q95")], start = c(1950, 1), end = c(2015, 12), frequency = 12), main = "Cumulative return") %>% dySeries(c("y_mc_q05", "y_mc_mean", "y_mc_q95"), label = "Monte Carlo return") %>% dySeries("port_c", label = "realised return") # and what about the distribution of returns for the next period ?? #a) via bootstrap s1_bt=x_bt[2,] hist(s1_bt, breaks = seq(min(s1_bt), max(s1_bt), l = 20+1),prob=TRUE, main = "histogram of monthly returns") curve(dnorm(x,mean=mean(s1_bt),sd=sd(s1_bt)),col='darkblue',lwd=2,add=TRUE) VaR_bt <- quantile(s1_bt, 0.05) # via Monte Carlo simulation s1_mc=x_bt[2,] hist(s1_mc, breaks = seq(min(s1_mc), max(s1_mc), l = 20+1),prob=TRUE, main = "histogram of monthly returns") curve(dnorm(x,mean=mean(s1_mc),sd=sd(s1_mc)),col='darkblue',lwd=2,add=TRUE) VaR_mc <- quantile(s1_mc, 0.05) ``` ## CAPM, estimation and resampling methods First we need to load the necessary data. In this exercise we will use the Fama and French database. ```{r} ffdata <- read.table("ffdata.csv", header = T, sep = ",", stringsAsFactors = F) # to get a glance of how the data are structured, don't forget to check the summary statistics summary(na.omit(ffdata[, c("r_mkt", "RF", "PR15", "PR51", "MOM", "SMB", "HML")])) cor(na.omit(ffdata[, c("r_mkt", "RF", "PR15", "PR51", "MOM", "SMB", "HML")])) plot(x = ffdata$exret_mkt, y = ffdata$er_15, xlab = "excess return", ylab = "portfolio 15 return") abline(lm(ffdata$exret_mkt~ffdata$er_15), col = "red") # this fits a straight line to the points on the plot ``` ### Estimating CER and CAPM models For the sake of estimation, we will use the sample from jan 2000 to june 2014. We will estimate both these models and verify whether the default regression commands obey the algebra behind them. ```{r} ret <- as.matrix(subset(ffdata, select = er_15, date >= 2000 & date < 2014.5)) mkt <- as.matrix(subset(ffdata, select = exret_mkt, date >= 2000 & date < 2014.5)) # we define these variables separately for the reason of convenience ### CER ### cer_mkt <- lm(mkt ~ 1) # as you can see, model specification has specific syntax cer_ret <- lm(ret ~ 1) # obviously you could just take the sample mean to get the estimate summary(cer_mkt) ### CAPM ### capm_ret <- lm(ret ~ mkt) # constant included by default (no need to explicitly add it)! summary(capm_ret) # storing the residuals resid_ret <- capm_ret$residuals resid_mkt <- cer_mkt$residuals # we can also estimate is by hands using matrix algebra! Y <- ret X <- cbind(1, mkt) beta <- solve(t(X) %*% X) %*% t(X) %*% Y beta # CAPM in two steps capm2_ret <- lm(ret ~ resid_mkt) summary(capm2_ret) # notice the change in the intercept ``` ### Value-at-Risk: MC and bootstraping We want to simulate the behavior of the market in the (pseudo) future for the period from july 2014 to june 2016. The assumption is that the market returns follow **CER** and that portfolio returns follow **CAPM**. That is $$ r_{mkt, t} = \alpha_{mkt} + \varepsilon_{mkt, t} \\ r_{pf, t} = \alpha_{pf} + \beta_{pf} r_{mkt} + \varepsilon_{pf, t} $$ ```{r} # first, we create the arrays which will contain the sampled series nrep <- 10^3 tT <- 12*2 # two years fcst_mc <- fcst_bt <- array(0, c(tT, nrep)) # the containers for (i in 1:nrep){ # generating the residuals ### bootstrapping res_mkt_bt <- sample(resid_mkt, size = tT, replace = T) res_ret_bt <- sample(resid_ret, size = tT, replace = T) ### Monte Carlo res_ret_mc <- rnorm(n = tT, sd = summary(capm_ret)$sigma) # standard normal times the standard error of the residuals of the relevant regression res_mkt_mc <- rnorm(n = tT, sd = summary(cer_mkt)$sigma) # generating the series ### BT mkt_bt <- cer_mkt$coefficients + res_mkt_bt fcst_bt[, i] <- capm_ret$coefficients[1] + capm_ret$coefficients[2]*mkt_bt + res_ret_bt ### MC mkt_mc <- cer_mkt$coefficients + res_mkt_mc fcst_mc[, i] <- capm_ret$coefficients[1] + capm_ret$coefficients[2]*mkt_mc + res_ret_mc } # calculating VaR var_bt <- var_mc <- array(0, tT) for (j in 1:tT){ var_bt[j] <- quantile(fcst_bt[j, ], 0.05) var_mc[j] <- quantile(fcst_mc[j, ], 0.05) } ``` Now the simulations are done and we can check the results. ```{r} plot(y = ret, x = seq(from = 2000, to = 2014 + 5/12, length.out = nrow(ret)), type = "l", xlim = c(2000, 2017), ylab = "excess return", xlab = "time") lines(y = apply(fcst_mc, 1, mean), x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), col = "blue", lwd = 1) lines(y = var_mc, col = "red", x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), lwd = 2) lines(y = as.matrix(subset(ffdata, select = er_15, date >= 2014.5 & date < 2016.5)), x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), lty = 2) legend("bottomleft", lwd = c(1, 1, 2, 1), col = c("black", "blue", "red", "black"), legend = c("data", "mean forecast", "Value-at-Risk: MC", "actual series"), lty = c(1, 1, 1, 2)) plot(y = ret, x = seq(from = 2000, to = 2014 + 5/12, length.out = nrow(ret)), type = "l", xlim = c(2000, 2017), ylab = "excess return", xlab = "time") lines(y = apply(fcst_bt, 1, mean), x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), col = "blue", lwd = 1) lines(y = var_bt, col = "red", x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), lwd = 2) lines(y = as.matrix(subset(ffdata, select = er_15, date >= 2014.5 & date < 2016.5)), x = seq(from = 2014 + 6/12, to = 2016 + 5/12, length.out = tT), lty = 2) legend("bottomleft", lwd = c(1, 1, 2, 1), col = c("black", "blue", "red", "black"), legend = c("data", "mean forecast", "Value-at-Risk: bootstrap", "actual series"), lty = c(1, 1, 1, 2)) ```