CAMP: estimation and simulation

Estimation

First, load the necessary data.

##############################################################
dataff <- read.table("data/FF_Data_2016_11.csv", sep = ";", header = T, stringsAsFactors = F)
summary(dataff)
##       X                  X.1           exret_mkt             SMB          
##  Length:1085        Min.   :192607   Min.   :-29.1300   Min.   :-16.7000  
##  Class :character   1st Qu.:194902   1st Qu.: -2.0100   1st Qu.: -1.5550  
##  Mode  :character   Median :197108   Median :  0.9900   Median :  0.0700  
##                     Mean   :197119   Mean   :  0.6499   Mean   :  0.2168  
##                     3rd Qu.:199403   3rd Qu.:  3.6500   3rd Qu.:  1.7750  
##                     Max.   :201609   Max.   : 38.8500   Max.   : 36.7000  
##                     NA's   :2        NA's   :2          NA's   :2         
##       HML                 RF               RMW                CMA         
##  Min.   :-13.2800   Min.   :-0.0600   Min.   :-17.5700   Min.   :-6.8100  
##  1st Qu.: -1.2950   1st Qu.: 0.0300   1st Qu.: -0.8850   1st Qu.:-0.9400  
##  Median :  0.2100   Median : 0.2400   Median :  0.1700   Median : 0.1700  
##  Mean   :  0.3828   Mean   : 0.2785   Mean   :  0.2466   Mean   : 0.3053  
##  3rd Qu.:  1.7350   3rd Qu.: 0.4300   3rd Qu.:  1.2900   3rd Qu.: 1.5350  
##  Max.   : 35.4600   Max.   : 1.3500   Max.   : 12.1900   Max.   : 9.5100  
##  NA's   :2          NA's   :2         NA's   :446        NA's   :446      
##       MOM                PR11               PR12         
##  Min.   :-52.2600   Min.   :-49.4700   Min.   :-35.2400  
##  1st Qu.: -0.8500   1st Qu.: -4.7150   1st Qu.: -3.3500  
##  Median :  0.8500   Median :  0.7300   Median :  0.8400  
##  Mean   :  0.6667   Mean   :  0.8465   Mean   :  0.9757  
##  3rd Qu.:  2.9300   3rd Qu.:  5.7150   3rd Qu.:  5.2800  
##  Max.   : 18.3800   Max.   :147.5000   Max.   :126.0500  
##  NA's   :8          NA's   :2          NA's   :2         
##       PR13              PR14              PR15              PR21         
##  Min.   :-33.930   Min.   :-35.690   Min.   :-33.150   Min.   :-32.7100  
##  1st Qu.: -2.605   1st Qu.: -1.900   1st Qu.: -2.160   1st Qu.: -3.5100  
##  Median :  1.240   Median :  1.470   Median :  1.460   Median :  1.2700  
##  Mean   :  1.289   Mean   :  1.449   Mean   :  1.638   Mean   :  0.9049  
##  3rd Qu.:  4.835   3rd Qu.:  4.500   3rd Qu.:  5.110   3rd Qu.:  5.0550  
##  Max.   : 82.880   Max.   :103.200   Max.   :100.500   Max.   : 76.5000  
##  NA's   :2         NA's   :2         NA's   :2         NA's   :2         
##       PR22              PR23              PR24              PR25        
##  Min.   :-31.690   Min.   :-35.260   Min.   :-30.450   Min.   :-37.390  
##  1st Qu.: -2.460   1st Qu.: -2.020   1st Qu.: -2.085   1st Qu.: -2.010  
##  Median :  1.530   Median :  1.460   Median :  1.430   Median :  1.700  
##  Mean   :  1.189   Mean   :  1.287   Mean   :  1.349   Mean   :  1.514  
##  3rd Qu.:  4.965   3rd Qu.:  4.535   3rd Qu.:  4.705   3rd Qu.:  5.190  
##  Max.   : 78.120   Max.   : 75.510   Max.   : 80.320   Max.   : 89.970  
##  NA's   :2         NA's   :2         NA's   :2         NA's   :2        
##       PR31               PR32              PR33              PR34        
##  Min.   :-29.7900   Min.   :-30.500   Min.   :-31.350   Min.   :-33.290  
##  1st Qu.: -2.9300   1st Qu.: -1.945   1st Qu.: -1.920   1st Qu.: -1.845  
##  Median :  1.3500   Median :  1.450   Median :  1.460   Median :  1.540  
##  Mean   :  0.9834   Mean   :  1.170   Mean   :  1.227   Mean   :  1.281  
##  3rd Qu.:  4.8300   3rd Qu.:  4.615   3rd Qu.:  4.480   3rd Qu.:  4.470  
##  Max.   : 56.9700   Max.   : 39.960   Max.   : 60.890   Max.   : 69.410  
##  NA's   :2          NA's   :2         NA's   :2         NA's   :2        
##       PR35              PR41               PR42              PR43        
##  Min.   :-35.860   Min.   :-30.9900   Min.   :-28.830   Min.   :-29.730  
##  1st Qu.: -2.035   1st Qu.: -2.3450   1st Qu.: -1.885   1st Qu.: -2.045  
##  Median :  1.390   Median :  1.1800   Median :  1.370   Median :  1.520  
##  Mean   :  1.426   Mean   :  0.9955   Mean   :  1.021   Mean   :  1.146  
##  3rd Qu.:  4.940   3rd Qu.:  4.5850   3rd Qu.:  4.120   3rd Qu.:  4.330  
##  Max.   : 77.240   Max.   : 35.2400   Max.   : 53.580   Max.   : 65.880  
##  NA's   :2         NA's   :2          NA's   :2         NA's   :2        
##       PR44              PR45              PR51               PR52         
##  Min.   :-34.070   Min.   :-41.460   Min.   :-29.2800   Min.   :-24.8700  
##  1st Qu.: -1.985   1st Qu.: -2.250   1st Qu.: -1.8150   1st Qu.: -1.7000  
##  Median :  1.490   Median :  1.450   Median :  1.0800   Median :  1.0400  
##  Mean   :  1.226   Mean   :  1.313   Mean   :  0.8951   Mean   :  0.9126  
##  3rd Qu.:  4.180   3rd Qu.:  4.700   3rd Qu.:  3.8900   3rd Qu.:  3.8400  
##  Max.   : 68.890   Max.   : 86.600   Max.   : 32.5600   Max.   : 39.3500  
##  NA's   :2         NA's   :2         NA's   :2          NA's   :2         
##       PR53               PR54               PR55        
##  Min.   :-31.7800   Min.   :-36.7100   Min.   :-45.560  
##  1st Qu.: -1.6850   1st Qu.: -1.8750   1st Qu.: -2.495  
##  Median :  1.1400   Median :  1.1000   Median :  1.380  
##  Mean   :  0.9503   Mean   :  0.9608   Mean   :  1.198  
##  3rd Qu.:  3.7000   3rd Qu.:  3.9650   3rd Qu.:  4.605  
##  Max.   : 50.2600   Max.   : 60.7300   Max.   : 98.040  
##  NA's   :2          NA's   :2          NA's   :2
regdata <- subset(dataff, select = c(X.1, PR15, exret_mkt, RF), X.1 >= 200001 &  X.1 <= 201406)
names(regdata)[1] <- "date"

regdata$er_15 <- regdata$PR15 - regdata$RF # constructing the excess return.

cer_mkt <- lm(exret_mkt ~ 1, data = regdata)
capm_15 <- lm(er_15 ~ exret_mkt, data = regdata)
summary(cer_mkt)
## 
## Call:
## lm(formula = exret_mkt ~ 1, data = regdata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.5445  -2.4170   0.8155   2.9230  11.0355 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   0.3145     0.3506   0.897    0.371
## 
## Residual standard error: 4.624 on 173 degrees of freedom
summary(capm_15)
## 
## Call:
## lm(formula = er_15 ~ exret_mkt, data = regdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.346  -2.291  -0.187   1.844  10.565 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.85044    0.28826    2.95  0.00362 ** 
## exret_mkt    1.14718    0.06237   18.39  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.794 on 172 degrees of freedom
## Multiple R-squared:  0.663,  Adjusted R-squared:  0.661 
## F-statistic: 338.3 on 1 and 172 DF,  p-value: < 2.2e-16

VaR: MC and bootstrapping

Now we need to simulate what would happen in July 2014 under the assumption of the correctness of our model. The model is: \[ \begin{align} r_{er\_mkt, t} =& \mu + \varepsilon_{mkt} \; , \; & \varepsilon_{mkt} \sim n.i.d. (0, \sigma^{2}_{mkt}) \\ r_{er\_pr, t} =& \alpha + \beta r_{er\_mkt} + \varepsilon_{pr} \; , \; & \varepsilon_{pr} \sim n.i.d. (0, \sigma^{2}_{pr}) \\ & cov(\varepsilon_{mkt}, \varepsilon_{pr}) = 0 \end{align}\]

Monte-Carlo:

For the Monte-Carlo simulation we use an additional assumption that \[ \begin{align} \varepsilon_{mkt} \sim \mathcal{N} (0, \sigma^{2}_{mkt}) \\ \varepsilon_{pr} \sim \mathcal{N} (0, \sigma^{2}_{pr}) \end{align}\] Moreover, given that we do not know the “true” values of the parameters, we use the estimates obtained in the previous step.

# to be clear, we first express the estimates of our parameters 
set.seed(77) # for replicability

## market simulation parameters
mu_mc <- cer_mkt$coefficients[1]
sigma_mkt_mc <- summary(cer_mkt)$sigma

## portfolio simulation parameters
alpha_mc <- capm_15$coefficients[1]
beta_mc <- capm_15$coefficients[2]
sigma_pr_mc <- summary(capm_15)$sigma

# now we create the containers (empty arrays) to store the results of MC simulation
nrep <- 10^3
sim_mc <- array(0, c(nrep, 2))
colnames(sim_mc) <- c("mkt_mc", "pr_mc") # specifying the column names 

for (i in 1:nrep){
  # the market equation
  sim_mc[i, 1] <- mu_mc + sigma_mkt_mc * rnorm(1, mean = 0, sd = 1)
  
  # the portfolio equation
  sim_mc[i, 2] <- alpha_mc + beta_mc * sim_mc[i, 1] + sigma_pr_mc * rnorm(1, mean = 0, sd = 1)
  
}

# now we present our results

hist(sim_mc[, 2], main = "histogram of MC simulated returns", xlab = "return * 100")

Bootstrap

With bootstrap, we remove the assumption of error normality. What we sacrifice is the ability to sample directly from the fistribution of errors. Instead, we get a sample from the empirical distribution of the errors.

set.seed(7)

## market simulation parameters
mu_bt <- cer_mkt$coefficients[1]
errors_mkt_bt <- cer_mkt$residuals

## portfolio simulation parameters
alpha_bt <- capm_15$coefficients[1]
beta_bt <- capm_15$coefficients[2]
errors_pr_bt <- capm_15$residuals

# now we create the containers (empty arrays) to store the results of bt simulation
nrep <- 10^3
sim_bt <- array(0, c(nrep, 2))
colnames(sim_bt) <- c("mkt_bt", "pr_bt") # specifying the column names 

for (i in 1:nrep){
  # the market equation
  sim_bt[i, 1] <- mu_bt + sample(errors_mkt_bt, size = 1)
  
  # the portfolio equation
  sim_bt[i, 2] <- alpha_bt + beta_bt * sim_bt[i, 1] + sample(errors_pr_bt, size = 1)
  
}

# now we present our results

hist(sim_bt[, 2], main = "histogram of bt simulated returns", xlab = "return * 100")

CAPM: manual approach

The system to be estimated is the following \[ y_{t} = \alpha + \beta x_{t} + \varepsilon_{t} \; \; \; , \; \varepsilon_{t} \sim i.i.d. (0, 1) \] 1. Estimation

# first, we create the data
y <- matrix(c(1, 0), nrow = 2, ncol = 1)
regressors <- matrix(c(0, -1), nrow = 2, ncol = 1)
x <- cbind(c(1, 1), regressors) # need to add a column of ones, since the intercept is included
y; x
##      [,1]
## [1,]    1
## [2,]    0
##      [,1] [,2]
## [1,]    1    0
## [2,]    1   -1
# OLS estimation
my_coeffs <- solve(t(x) %*% x) %*% t(x) %*% y

rownames(my_coeffs) <- c("alpha", "beta")
colnames(my_coeffs) <- "coefficients"

my_coeffs
##       coefficients
## alpha            1
## beta             1
# or, we can use the standars regression functions
summary(lm(y ~ -1 + x))
## 
## Call:
## lm(formula = y ~ -1 + x)
## 
## Residuals:
## ALL 2 residuals are 0: no residual degrees of freedom!
## 
## Coefficients:
##    Estimate Std. Error t value Pr(>|t|)
## x1        1         NA      NA       NA
## x2        1         NA      NA       NA
## 
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:    NaN 
## F-statistic:   NaN on 2 and 0 DF,  p-value: NA
  1. \(R^{2}\) calculation
# the manual way
my_resids <- y - x %*% my_coeffs

my_r2 <- 1 - var(my_resids)/var(y)

my_r2
##              coefficients
## coefficients            1
# the R way
summary(lm(y ~ -1 + x))$r.squared # can also be seen from the previous point
## [1] 1