#setwd(path)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#clear the environment
rm(list=ls())

# load packages
listofpackages = c("ellipse","reshape2","ggplot2","dygraphs", "dplyr","forecast", "aod")

for (j in listofpackages){
  if(sum(installed.packages()[, 1] == j) == 0) {
    install.packages(j)
  }
  library(j, character.only = T)
}

NBAdata=read.csv("C:/Users/favero/Dropbox/exam/SPORTMAN/R/data_L_2020/Teams_overall2020.csv", header = T, stringsAsFactors = F, sep = ";")
head(NBAdata)
typeof(NBAdata)#to check the type of data

#DATA TRANSFORMATION

NBAdata$F1=NBAdata$X1_OF-NBAdata$X1_DEF
NBAdata$F2=0.01*(NBAdata$X2_OF-NBAdata$X2_DEF)
NBAdata$F3=0.01*(NBAdata$X3_OF+NBAdata$X3_DEF)
NBAdata$F3_W=0.01*(NBAdata$X3_OF-NBAdata$X3_DEF)
NBAdata$F4=NBAdata$X4_OF-NBAdata$X4_DEF

#additional transformation
NBAdata$EFF32=3*NBAdata$X3P./(2*NBAdata$X2P.)
NBAdata$RAT32=NBAdata$X3PA/NBAdata$X2PA
NBAdata$OEFF32=3*NBAdata$O3P./(2*NBAdata$O2P.)
NBAdata$ORAT32=NBAdata$O3PA/NBAdata$O2PA
NBAdata$X5_OF=NBAdata$EFF32-NBAdata$RAT32
NBAdata$X5_DEF=NBAdata$OEFF32-NBAdata$ORAT32
NBAdata$F5=NBAdata$X5_OF-NBAdata$X5_DEF


#extract data for the sample of interest : season 2006/2007
NBA_4F= subset(NBAdata,(Season==2007),select=c(Team,W,F1,F2,F3,F3_W,F4,F5,X1_OF,X2_OF,X3_OF,X4_OF,X5_OF,X1_DEF,X2_DEF,X3_DEF,X4_DEF,X5_DEF,EFF32,RAT32,AST,OAST))
rownames(NBA_4F) <- c("ATL","BOS","CHA","CHI","CLE","DAL","DEN","DET","GSW","HOU","IND","LAC","LAL","MEM","MIA","MIL","MIN","NJN","NO_OK","NYK","OMA","PHI","PHS","PTB","SAK","SAS","SSU","TOR","UTJ","WAW")

## ------------------------------------------------------------------------
#DESCRIPTIVE STATISTICS
## ------------------------------------------------------------------------

#Correlation
datacor00 = subset(NBA_4F,select=c(X1_OF,X2_OF,X3_OF,X4_OF,X5_OF,X1_DEF,X2_DEF,X3_DEF,X4_DEF,X5_DEF))
datacor00 = na.omit(datacor00)
summary(datacor00)
cor.datacor = cor(datacor00, use="complete.obs")
cor.datacor

# nice graphics presentation of correlations needs package ellipse
ord = order(cor.datacor[1,])
ordered.cor.datacor = cor.datacor[ord, ord]
plotcorr(ordered.cor.datacor, col=cm.colors(11)[5*ordered.cor.datacor + 6])

# even nicer correlation heatmap  needs packages reshape2 ggplot2
cormat = round(cor(datacor00),2)
head(cormat)
melted_cormat = melt(cormat)
head(melted_cormat)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) +
  geom_tile()
# Get lower triangle of the correlation matrix
get_lower_tri=function(cormat){
  cormat[upper.tri(cormat)] = NA
  return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri = function(cormat){
  cormat[lower.tri(cormat)] = NA
  return(cormat)
}
upper_tri = get_upper_tri(cormat)
upper_tri
# Melt the correlation matrix
melted_cormat = melt(upper_tri, na.rm = TRUE)
# Heatmap
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
  geom_tile(color = "white")+
  scale_fill_gradient2(low = "blue", high = "red", mid = "white",
                       midpoint = 0, limit = c(-1,1), space = "Lab",
                       name="Pearson\nCorrelation") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, vjust = 1,
                                   size = 12, hjust = 1))+
  coord_fixed()

#SORTING DATA
TABLE_1=subset(NBA_4F,select=c(W,X1_OF,X1_DEF))
rank_TABLE_1 = TABLE_1[order(TABLE_1$W, decreasing = TRUE),]

#GRAPHICAL ANALYSIS
plot(NBA_4F$W, pch=20, ylim=c(20, 82),
     xaxt="n", xlab="Team", ylab="Wins",col = "blue", lwd = 2,type="l")
lines(NBA_4F$X3_OF,col = "red", lwd = 2,type="l")
# Plot the axis separately
axis(1, at=1:30, labels=rownames(NBA_4F))


plot(NBA_4F$W/82, pch=20, ylim=c(0, 1.2),
     xaxt="n", xlab="Season", ylab="Wins",col = "blue", lwd = 2,type="l")
lines(NBA_4F$F5,col = "red", lwd = 2,type="l")
# Plot the axis separately
axis(1, at=1:30, labels=rownames(NBA_4F))

# Plot the axis separately
axis(1, at=1:30, labels=rownames(NBA_4F))
barplot(NBA_4F$W, horiz=T, main="WINS", col="blue", cex.names = 0.75, las=1,names.arg=rownames(NBA_4F))
ggplot(data=NBA_4F, aes(NBA_4F$W,NBA_4F$X3_OF))+geom_point(colour = 'red', size = 3)


## ------------------------------------------------------------------------
# REGRESSION:INTRO
## ------------------------------------------------------------------------
plot(y=NBA_4F$W,x =NBA_4F$F1, main = "WINS vs F1", ylab = "WINS",col="blue", lty = 2) # plot of the series agains the predictable part
lm1 <- lm(NBA_4F$W ~ NBA_4F$F1 + 1)
summary(lm1)
Wrongfit1=41+250*NBA_4F$F1
lines(y = lm1$fitted.values, x = NBA_4F$F1, col = "red",type="l", lwd = 2)
lines(y = Wrongfit1, x = NBA_4F$F1, col = "green",type="l", lwd = 2)
legend("bottomright", legend = c("Actual", "Fitted", "Wrong"), col = c("blue", "red", "green"), lty = c(2, 1, 1), lwd = c(1, 1, 2))

NBA_4F$team_acr=rownames(NBA_4F)

p <- ggplot(NBA_4F, aes(F1,W)) + geom_point(colour="green",size=3) + geom_text(aes(label=team_acr),hjust=0, vjust=0,colour="blue")
# Add regression line
p + geom_smooth(method = lm,colour="red")

## ------------------------------------------------------------------------
# REGRESSION:MULTIVARIATE
## ------------------------------------------------------------------------

#FOUR FACTOR MODELS MAthletics p193 
datacor4F = subset(NBA_4F,select=c(W,F1,F2,F3,F4,F5))
datacor4F = na.omit(datacor4F)
summary(datacor4F)
cor.datacor = cor(datacor4F, use="complete.obs")
cor.datacor
reg_4F = lm(NBA_4F$W ~ NBA_4F$F1+NBA_4F$F2+ NBA_4F$F3+NBA_4F$F4)
summary(reg_4F)

NBA_4F$F1DM=NBA_4F$F1-mean(NBA_4F$F1)
NBA_4F$F2DM=NBA_4F$F2-mean(NBA_4F$F2)
NBA_4F$F3DM=NBA_4F$F3-mean(NBA_4F$F3)
NBA_4F$F4DM=NBA_4F$F4-mean(NBA_4F$F4)

reg_4FDM = lm(NBA_4F$W ~ NBA_4F$F1DM+NBA_4F$F2DM+ NBA_4F$F3DM+NBA_4F$F4DM)
summary(reg_4FDM)

plot(NBA_4F$W, pch=20, ylim=c(20, 82),
     xaxt="n", xlab="Team", ylab="Wins",col = "blue")
lines(reg_4F$fitted.values,col = "red", lwd = 2,type="l")
points(reg_4FDM$fitted.values,col = "blue",pch=8)
# Plot the axis separately
axis(1, at=1:30, labels=rownames(NBA_4F))

## ------------------------------------------------------------------------
# HYPOTHESIS TESTING
## ------------------------------------------------------------------------

wald.test(vcov(reg_4FDM), coef(reg_4FDM), H0 = c(0), df = 25,
          L = matrix(c(0, 1, -1, 0, 0), ncol = 5), verbose = T)

wald.test(vcov(reg_4FDM), coef(reg_4FDM), H0 = c(0,41), df = 25,
          L = matrix(c(0, 1, 1, 0, -1,0,0,0,0,0), ncol = 5), verbose = T)


## ------------------------------------------------------------------------
# ALTERNATIVE MODELS 
## ------------------------------------------------------------------------

reg_5F_R = lm(NBA_4F$W ~ NBA_4F$F1+NBA_4F$F2+ NBA_4F$F3+NBA_4F$F4+NBA_4F$F5)
summary(reg_5F_R)

reg_4F_UNR = lm(NBA_4F$W ~ NBA_4F$X1_OF+ NBA_4F$X2_OF+NBA_4F$X3_OF+NBA_4F$X4_OF+NBA_4F$X1_DEF+ NBA_4F$X2_DEF+NBA_4F$X3_DEF+NBA_4F$X4_DEF)
summary(reg_4F_UNR)


