# variable selection

#Forward
fullmodel = lm(rate ~.-obs, Highway1)
summary(fullmodel)

basemodel = lm(rate ~ len, Highway1)
step(basemodel, scope=list(upper=fullmodel, lower=~1), direction="forward", trace=T)
#Backward
step(fullmodel, direction="backward", trace=T)
#Stepwise
step(basemodel, scope=list(upper=fullmodel, lower=~1), direction="both", trace=T)

anova(mymodel, mymodel4)
#Shirinkage method
library(glmnet)
dataF <- na.omit(Highway1)
#remove categorical variable or create dummy variable and use grouped lasso
dataF=dataF[,-13]
dataF=scale(dataF, center=TRUE, scale = TRUE)
View(dataF)

x<- dataF[,-c(1,2)]
y<- dataF[,2]

#Shirinkage method
library(glmnet)
dataF <- na.omit(Highway1)
#remove categorical variable or create dummy variable and use grouped lasso
dataF=dataF[,-13]
dataF=scale(dataF, center=TRUE, scale = TRUE)
View(dataF)

x<- dataF[,-c(1,2)]
y<- dataF[,2]

set.seed(999)
laqmbda1 <- 10^seq(2,-2,by=-0.1)
cv_ridge <- cv.glmnet(x,y,alpha=0, lambda=laqmbda1)
# alpha=0: ridge alpha=1: lasso, alpha in (0,1): elasticNet = (1-alpha/2)norm2 + alpha norm1 [default is 1]
str(cv_ridge)

optimal_lambda <- cv_ridge$lambda.min
#optimal_lambda <- cv_ridge$lambda.1se
#The λmin is the one which minimizes out-of-sample loss in CV.
#The λ1se is the one which is the largest λ value within 1 standard error of λmin. 
#suggests using λ1se because it hedges against overfitting by selecting a larger λ value than the min. 
optimal_lambda
plot(cv_ridge)

plot(cv_ridge$glmnet.fit, xvar="lambda",label=TRUE)
abline(v=log(cv_ridge$lambda.min))
#abline(v=log(cv_ridge$lambda.1se))
ridge_reg <- glmnet(x,y,family="gaussian",alpha=0, lambda=optimal_lambda)
ridge_coef <- coef(ridge_reg)
ridge_coef
pred_ridge <- predict(ridge_reg, newx=t(x[1,]),s=optimal_lambda)
pred_ridge