First, load the necessary data.
##############################################################
dataff <- read.table("data/FF_Data_2016_11.csv", sep = ";", header = T, stringsAsFactors = F)
summary(dataff)
## X X.1 exret_mkt SMB
## Length:1085 Min. :192607 Min. :-29.1300 Min. :-16.7000
## Class :character 1st Qu.:194902 1st Qu.: -2.0100 1st Qu.: -1.5550
## Mode :character Median :197108 Median : 0.9900 Median : 0.0700
## Mean :197119 Mean : 0.6499 Mean : 0.2168
## 3rd Qu.:199403 3rd Qu.: 3.6500 3rd Qu.: 1.7750
## Max. :201609 Max. : 38.8500 Max. : 36.7000
## NA's :2 NA's :2 NA's :2
## HML RF RMW CMA
## Min. :-13.2800 Min. :-0.0600 Min. :-17.5700 Min. :-6.8100
## 1st Qu.: -1.2950 1st Qu.: 0.0300 1st Qu.: -0.8850 1st Qu.:-0.9400
## Median : 0.2100 Median : 0.2400 Median : 0.1700 Median : 0.1700
## Mean : 0.3828 Mean : 0.2785 Mean : 0.2466 Mean : 0.3053
## 3rd Qu.: 1.7350 3rd Qu.: 0.4300 3rd Qu.: 1.2900 3rd Qu.: 1.5350
## Max. : 35.4600 Max. : 1.3500 Max. : 12.1900 Max. : 9.5100
## NA's :2 NA's :2 NA's :446 NA's :446
## MOM PR11 PR12
## Min. :-52.2600 Min. :-49.4700 Min. :-35.2400
## 1st Qu.: -0.8500 1st Qu.: -4.7150 1st Qu.: -3.3500
## Median : 0.8500 Median : 0.7300 Median : 0.8400
## Mean : 0.6667 Mean : 0.8465 Mean : 0.9757
## 3rd Qu.: 2.9300 3rd Qu.: 5.7150 3rd Qu.: 5.2800
## Max. : 18.3800 Max. :147.5000 Max. :126.0500
## NA's :8 NA's :2 NA's :2
## PR13 PR14 PR15 PR21
## Min. :-33.930 Min. :-35.690 Min. :-33.150 Min. :-32.7100
## 1st Qu.: -2.605 1st Qu.: -1.900 1st Qu.: -2.160 1st Qu.: -3.5100
## Median : 1.240 Median : 1.470 Median : 1.460 Median : 1.2700
## Mean : 1.289 Mean : 1.449 Mean : 1.638 Mean : 0.9049
## 3rd Qu.: 4.835 3rd Qu.: 4.500 3rd Qu.: 5.110 3rd Qu.: 5.0550
## Max. : 82.880 Max. :103.200 Max. :100.500 Max. : 76.5000
## NA's :2 NA's :2 NA's :2 NA's :2
## PR22 PR23 PR24 PR25
## Min. :-31.690 Min. :-35.260 Min. :-30.450 Min. :-37.390
## 1st Qu.: -2.460 1st Qu.: -2.020 1st Qu.: -2.085 1st Qu.: -2.010
## Median : 1.530 Median : 1.460 Median : 1.430 Median : 1.700
## Mean : 1.189 Mean : 1.287 Mean : 1.349 Mean : 1.514
## 3rd Qu.: 4.965 3rd Qu.: 4.535 3rd Qu.: 4.705 3rd Qu.: 5.190
## Max. : 78.120 Max. : 75.510 Max. : 80.320 Max. : 89.970
## NA's :2 NA's :2 NA's :2 NA's :2
## PR31 PR32 PR33 PR34
## Min. :-29.7900 Min. :-30.500 Min. :-31.350 Min. :-33.290
## 1st Qu.: -2.9300 1st Qu.: -1.945 1st Qu.: -1.920 1st Qu.: -1.845
## Median : 1.3500 Median : 1.450 Median : 1.460 Median : 1.540
## Mean : 0.9834 Mean : 1.170 Mean : 1.227 Mean : 1.281
## 3rd Qu.: 4.8300 3rd Qu.: 4.615 3rd Qu.: 4.480 3rd Qu.: 4.470
## Max. : 56.9700 Max. : 39.960 Max. : 60.890 Max. : 69.410
## NA's :2 NA's :2 NA's :2 NA's :2
## PR35 PR41 PR42 PR43
## Min. :-35.860 Min. :-30.9900 Min. :-28.830 Min. :-29.730
## 1st Qu.: -2.035 1st Qu.: -2.3450 1st Qu.: -1.885 1st Qu.: -2.045
## Median : 1.390 Median : 1.1800 Median : 1.370 Median : 1.520
## Mean : 1.426 Mean : 0.9955 Mean : 1.021 Mean : 1.146
## 3rd Qu.: 4.940 3rd Qu.: 4.5850 3rd Qu.: 4.120 3rd Qu.: 4.330
## Max. : 77.240 Max. : 35.2400 Max. : 53.580 Max. : 65.880
## NA's :2 NA's :2 NA's :2 NA's :2
## PR44 PR45 PR51 PR52
## Min. :-34.070 Min. :-41.460 Min. :-29.2800 Min. :-24.8700
## 1st Qu.: -1.985 1st Qu.: -2.250 1st Qu.: -1.8150 1st Qu.: -1.7000
## Median : 1.490 Median : 1.450 Median : 1.0800 Median : 1.0400
## Mean : 1.226 Mean : 1.313 Mean : 0.8951 Mean : 0.9126
## 3rd Qu.: 4.180 3rd Qu.: 4.700 3rd Qu.: 3.8900 3rd Qu.: 3.8400
## Max. : 68.890 Max. : 86.600 Max. : 32.5600 Max. : 39.3500
## NA's :2 NA's :2 NA's :2 NA's :2
## PR53 PR54 PR55
## Min. :-31.7800 Min. :-36.7100 Min. :-45.560
## 1st Qu.: -1.6850 1st Qu.: -1.8750 1st Qu.: -2.495
## Median : 1.1400 Median : 1.1000 Median : 1.380
## Mean : 0.9503 Mean : 0.9608 Mean : 1.198
## 3rd Qu.: 3.7000 3rd Qu.: 3.9650 3rd Qu.: 4.605
## Max. : 50.2600 Max. : 60.7300 Max. : 98.040
## NA's :2 NA's :2 NA's :2
regdata <- subset(dataff, select = c(X.1, PR15, exret_mkt, RF), X.1 >= 200001 & X.1 <= 201406)
names(regdata)[1] <- "date"
regdata$er_15 <- regdata$PR15 - regdata$RF # constructing the excess return.
cer_mkt <- lm(exret_mkt ~ 1, data = regdata)
capm_15 <- lm(er_15 ~ exret_mkt, data = regdata)
summary(cer_mkt)
##
## Call:
## lm(formula = exret_mkt ~ 1, data = regdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.5445 -2.4170 0.8155 2.9230 11.0355
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3145 0.3506 0.897 0.371
##
## Residual standard error: 4.624 on 173 degrees of freedom
summary(capm_15)
##
## Call:
## lm(formula = er_15 ~ exret_mkt, data = regdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.346 -2.291 -0.187 1.844 10.565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.85044 0.28826 2.95 0.00362 **
## exret_mkt 1.14718 0.06237 18.39 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.794 on 172 degrees of freedom
## Multiple R-squared: 0.663, Adjusted R-squared: 0.661
## F-statistic: 338.3 on 1 and 172 DF, p-value: < 2.2e-16
Now we need to simulate what would happen in July 2014 under the assumption of the correctness of our model. The model is: \[ \begin{align} r_{er\_mkt, t} =& \mu + \varepsilon_{mkt} \; , \; & \varepsilon_{mkt} \sim n.i.d. (0, \sigma^{2}_{mkt}) \\ r_{er\_pr, t} =& \alpha + \beta r_{er\_mkt} + \varepsilon_{pr} \; , \; & \varepsilon_{pr} \sim n.i.d. (0, \sigma^{2}_{pr}) \\ & cov(\varepsilon_{mkt}, \varepsilon_{pr}) = 0 \end{align}\]
For the Monte-Carlo simulation we use an additional assumption that \[ \begin{align} \varepsilon_{mkt} \sim \mathcal{N} (0, \sigma^{2}_{mkt}) \\ \varepsilon_{pr} \sim \mathcal{N} (0, \sigma^{2}_{pr}) \end{align}\] Moreover, given that we do not know the “true” values of the parameters, we use the estimates obtained in the previous step.
# to be clear, we first express the estimates of our parameters
set.seed(77) # for replicability
## market simulation parameters
mu_mc <- cer_mkt$coefficients[1]
sigma_mkt_mc <- summary(cer_mkt)$sigma
## portfolio simulation parameters
alpha_mc <- capm_15$coefficients[1]
beta_mc <- capm_15$coefficients[2]
sigma_pr_mc <- summary(capm_15)$sigma
# now we create the containers (empty arrays) to store the results of MC simulation
nrep <- 10^3
sim_mc <- array(0, c(nrep, 2))
colnames(sim_mc) <- c("mkt_mc", "pr_mc") # specifying the column names
for (i in 1:nrep){
# the market equation
sim_mc[i, 1] <- mu_mc + sigma_mkt_mc * rnorm(1, mean = 0, sd = 1)
# the portfolio equation
sim_mc[i, 2] <- alpha_mc + beta_mc * sim_mc[i, 1] + sigma_pr_mc * rnorm(1, mean = 0, sd = 1)
}
# now we present our results
hist(sim_mc[, 2], main = "histogram of MC simulated returns", xlab = "return * 100")
With bootstrap, we remove the assumption of error normality. What we sacrifice is the ability to sample directly from the fistribution of errors. Instead, we get a sample from the empirical distribution of the errors.
set.seed(7)
## market simulation parameters
mu_bt <- cer_mkt$coefficients[1]
errors_mkt_bt <- cer_mkt$residuals
## portfolio simulation parameters
alpha_bt <- capm_15$coefficients[1]
beta_bt <- capm_15$coefficients[2]
errors_pr_bt <- capm_15$residuals
# now we create the containers (empty arrays) to store the results of bt simulation
nrep <- 10^3
sim_bt <- array(0, c(nrep, 2))
colnames(sim_bt) <- c("mkt_bt", "pr_bt") # specifying the column names
for (i in 1:nrep){
# the market equation
sim_bt[i, 1] <- mu_bt + sample(errors_mkt_bt, size = 1)
# the portfolio equation
sim_bt[i, 2] <- alpha_bt + beta_bt * sim_bt[i, 1] + sample(errors_pr_bt, size = 1)
}
# now we present our results
hist(sim_bt[, 2], main = "histogram of bt simulated returns", xlab = "return * 100")
The system to be estimated is the following \[ y_{t} = \alpha + \beta x_{t} + \varepsilon_{t} \; \; \; , \; \varepsilon_{t} \sim i.i.d. (0, 1) \] 1. Estimation
# first, we create the data
y <- matrix(c(1, 0), nrow = 2, ncol = 1)
regressors <- matrix(c(0, -1), nrow = 2, ncol = 1)
x <- cbind(c(1, 1), regressors) # need to add a column of ones, since the intercept is included
y; x
## [,1]
## [1,] 1
## [2,] 0
## [,1] [,2]
## [1,] 1 0
## [2,] 1 -1
# OLS estimation
my_coeffs <- solve(t(x) %*% x) %*% t(x) %*% y
rownames(my_coeffs) <- c("alpha", "beta")
colnames(my_coeffs) <- "coefficients"
my_coeffs
## coefficients
## alpha 1
## beta 1
# or, we can use the standars regression functions
summary(lm(y ~ -1 + x))
##
## Call:
## lm(formula = y ~ -1 + x)
##
## Residuals:
## ALL 2 residuals are 0: no residual degrees of freedom!
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x1 1 NA NA NA
## x2 1 NA NA NA
##
## Residual standard error: NaN on 0 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: NaN
## F-statistic: NaN on 2 and 0 DF, p-value: NA
# the manual way
my_resids <- y - x %*% my_coeffs
my_r2 <- 1 - var(my_resids)/var(y)
my_r2
## coefficients
## coefficients 1
# the R way
summary(lm(y ~ -1 + x))$r.squared # can also be seen from the previous point
## [1] 1