#setwd(path)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#clear the environment
rm(list=ls())

# load packages
listofpackages = c("ellipse","reshape2","ggplot2","dygraphs", "dplyr","forecast", "aod")

for (j in listofpackages){
  if(sum(installed.packages()[, 1] == j) == 0) {
    install.packages(j)
  }
  library(j, character.only = T)
}

NBAdata=read.csv("C:/Users/favero/Dropbox/exam/SPORTMAN/R/data_L_2020/Teams_overall2020.csv", header = T, stringsAsFactors = F, sep = ";")
head(NBAdata)
typeof(NBAdata)#to check the type of data

#DATA TRANSFORMATION

NBAdata$F1=NBAdata$X1_OF-NBAdata$X1_DEF
NBAdata$F2=(NBAdata$X2_OF-NBAdata$X2_DEF)
NBAdata$F3=(NBAdata$X3_OF+NBAdata$X3_DEF)
NBAdata$F3_W=(NBAdata$X3_OF-NBAdata$X3_DEF)
NBAdata$F4=NBAdata$X4_OF-NBAdata$X4_DEF



#extract data for the sample of interest : season 2006/2007
NBA_4F_2007= subset(NBAdata,(Season==2007),select=c(Team,W,F1,F2,F3,F3_W,F4,X1_OF,X2_OF,X3_OF,X4_OF,X1_DEF,X2_DEF,X3_DEF,X4_DEF,AST,OAST))
rownames(NBA_4F_2007) <- c("ATL","BOS","CHA","CHI","CLE","DAL","DEN","DET","GSW","HOU","IND","LAC","LAL","MEM","MIA","MIL","MIN","NJN","NOH","NYK","ORL","PHI","PHO","POR","SAC","SAS","SEA","TOR","UTA","WAS") 

NBA_4F_2006= subset(NBAdata,(Season==2006),select=c(Team,W,F1,F2,F3,F3_W,F4,X1_OF,X2_OF,X3_OF,X4_OF,X1_DEF,X2_DEF,X3_DEF,X4_DEF,AST,OAST))
rownames(NBA_4F_2006) <- c("ATL","BOS","CHA","CHI","CLE","DAL","DEN","DET","GSW","HOU","IND","LAC","LAL","MEM","MIA","MIL","MIN","NJN","NOH","NYK","ORL","PHI","PHO","POR","SAC","SAS","SEA","TOR","UTA","WAS") 



#REGRESSION ANALYSIS 

reg_4F_07UNR = lm(NBA_4F_2007$W ~ NBA_4F_2007$X1_OF+ NBA_4F_2007$X2_OF+NBA_4F_2007$X3_OF+NBA_4F_2007$X4_OF+NBA_4F_2007$X1_DEF+ NBA_4F_2007$X2_DEF+NBA_4F_2007$X3_DEF+NBA_4F_2007$X4_DEF)
summary(reg_4F_07UNR)

reg_4F_06UNR = lm(NBA_4F_2006$W ~ NBA_4F_2006$X1_OF+ NBA_4F_2006$X2_OF+NBA_4F_2006$X3_OF+NBA_4F_2006$X4_OF+NBA_4F_2006$X1_DEF+ NBA_4F_2006$X2_DEF+NBA_4F_2006$X3_DEF+NBA_4F_2006$X4_DEF)
summary(reg_4F_06UNR)


reg.list <- lapply(list(NBA_4F_2006, NBA_4F_2007),lm,formula=W ~ X1_OF+ X2_OF+X3_OF+X4_OF+X1_DEF+ X2_DEF+X3_DEF+X4_DEF)
summaries <- lapply(reg.list, summary)
summary (reg.list[[1]])

#HYPOTHESIS TESTING 

wald.test(vcov(reg_4F_UNR), coef(reg_4F_UNR), H0 = c(0), df = 21, 
          L = matrix(c(0, 1, 0, 0,
                       0, 1, 0, 0,0), ncol = 9), verbose = T)
reg_4F_R1 = lm(NBA_4F$W ~ NBA_4F$F1+ NBA_4F$X2_OF+NBA_4F$X3_OF+NBA_4F$X4_OF+ NBA_4F$X2_DEF+NBA_4F$X3_DEF+NBA_4F$X4_DEF)
summary(reg_4F_R1)
wald.test(vcov(reg_4F_UNR), coef(reg_4F_UNR), H0 = c(0,0), df = 21, 
          L = matrix(c(0,0,1,0,0,0,0,1,0,0,
                       1,0,0,0,0,-1, 0, 0), ncol = 9), verbose = T)

reg_4F_R2 = lm(NBA_4F$W ~ NBA_4F$F1+ NBA_4F$X2_OF+NBA_4F$F3+NBA_4F$X4_OF+ NBA_4F$X2_DEF+NBA_4F$X4_DEF)
summary(reg_4F_R2)

#RESTRICTED MODELS 
reg_4F_07R = lm(NBA_4F_2007$W ~ NBA_4F_2007$F1+NBA_4F_2007$F2+ NBA_4F_2007$F3+NBA_4F_2007$F4)
summary(reg_4F_07R)

reg_4F_06R = lm(NBA_4F_2006$W ~ NBA_4F_2006$F1+NBA_4F_2006$F2+ NBA_4F_2006$F3+NBA_4F_2006$F4)
summary(reg_4F_06R)


plot(NBA_4F_2007$W, pch=20, ylim=c(20, 82), 
     xaxt="n", xlab="Team", ylab="Wins",col = "blue")
lines(reg_4F_07R$fitted.values,col = "red", lwd = 2,type="l")
# Plot the axis separately
axis(1, at=1:30, labels=rownames(NBA_4F_2007))

plot(NBA_4F_2006$W, pch=20, ylim=c(20, 82), 
     xaxt="n", xlab="Team", ylab="Wins",col = "blue")
lines(reg_4F_06R$fitted.values,col = "red", lwd = 2,type="l")
# Plot the axis separately
axis(1, at=1:30, labels=rownames(NBA_4F_2006))

#TIME SERIES MODELS FOR BOSTON CELTICS 
NBA_4F_BC= subset(NBAdata,(Team=="Boston Celtics"),select=c(Team,Season,W,F1,F2,F3,F3_W,F4,X1_OF,X2_OF,X3_OF,X4_OF,X1_DEF,X2_DEF,X3_DEF,X4_DEF,AST,OAST))
reg_4F_BCUNR = lm(NBA_4F_BC$W ~ NBA_4F_BC$X1_OF+ NBA_4F_BC$X2_OF+NBA_4F_BC$X3_OF+NBA_4F_BC$X4_OF+NBA_4F_BC$X1_DEF+ NBA_4F_BC$X2_DEF+NBA_4F_BC$X3_DEF+NBA_4F_BC$X4_DEF)
summary(reg_4F_BCUNR)
reg_4F_BCR = lm(NBA_4F_BC$W ~ NBA_4F_BC$F1+NBA_4F_BC$F2+ NBA_4F_BC$F3+NBA_4F_BC$F4)
summary(reg_4F_BCR)

plot(y=NBA_4F_BC$W,x=NBA_4F_BC$Season, pch=20, ylim=c(20, 82), xlab="Season", ylab="BC Wins",col = "blue")
lines(y=reg_4F_BCR$fitted.values,x=NBA_4F_BC$Season,col = "red", lwd = 2,type="l")

#PARTIONED REGRESSION  

reg_Step1 = lm(NBA_4F_2007$F1~NBA_4F_2007$F2+ NBA_4F_2007$F3+NBA_4F_2007$F4)
summary(reg_Step1)
reg_Step2 = lm(NBA_4F_2007$W~reg_Step1$residuals)
summary(reg_Step2)

#MIS-SPECIFICATION 
# FOULS 
reg_4F_R_MISS = lm(NBA_4F_2007$W ~ NBA_4F_2007$F1+NBA_4F_2007$F2+ NBA_4F_2007$F3+NBA_4F_2007$F4+NBA_4F_2007$AST+NBA_4F_2007$OAST)
summary(reg_4F_R_MISS)






wald.test(vcov(reg_4F_R_MISS), coef(reg_4F_R_MISS), H0 = c(0,0), df = 23, 
          L = matrix(c(0,0,0,0,0,0,0,0,0,0,1,0,0,1), ncol = 7), verbose = T)




