
#clear the environment 
rm(list=ls()) 
## ------------------------------------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
## ------------------------------------------------------------------------

# load library
library(help="MASS")
library("MASS")


### Data preview
# Cars93 - Data from 93 Cars on Sale in the USA in 1993
data = Cars93
?Cars93
names(data)
# preview
head(data)
# number of rows and columns
dim(data)
# classes of data
classes.data = lapply(data, class)


### Data subsetting
sel.vars <- c("Manufacturer", "Model", "Type", "Min.Price", "Price", "Max.Price")
data.new = data[,sel.vars]
head(data.new)

data.Manufacturer = data[which(data$Manufacturer=="Ford"), ]
head(data.Manufacturer)
dim(data.Manufacturer)

data.nonUSA = data[which(data$Origin=="non-USA"), ]
head(data.nonUSA)
dim(data.nonUSA)

data.200hp = data[which(data$Horsepower>200), ]
head(data.200hp)
dim(data.200hp)

data.4WD = data[which(data$Type=="Sporty" & data$DriveTrain=="4WD"), ]
head(data.4WD)
dim(data.4WD)


### Data grouping
count.models = aggregate(list(Model=data$Model), 
                         by=list(Manufacturer=data$Manufacturer),
                         FUN=length)

avg.price = aggregate(list(Min.Price=data$Min.Price,Max.Price=data$Max.Price,Price=data$Price), 
                         by=list(Manufacturer=data$Manufacturer), 
                         FUN=mean)

count.models2 = aggregate(list(Model=data$Model), 
                         by=list(Type=data$Type,DriveTrain=data$DriveTrain), 
                         FUN=length)

