Im_getting_a_fucking_masters/Dropout NN.R
MyPenisIsBig1998 8a846d5dc9 Neural Network
2024-06-16 02:54:00 +00:00

212 lines
6.8 KiB
R

library(tidyverse)
library(caret)
library(neuralnet)
normalize = function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
dropout_clean <- read.csv("C:/Users/Mark/Desktop/Grad School/PDAT630/dropout_clean.csv")
#factor_cols=c("Marital.status","Scholarship.holder","Tuition.fees.up.to.date","Gender","Displaced","Daytime.evening.attendance","Course","Previous.qualification","Mother.s.qualification","Father.s.qualification","Mother.s.occupation","Father.s.occupation","International","Target")
#dropout_clean[factor_cols] <- lapply(dropout_clean[factor_cols], factor)
#dropout_clean_nn=mutate(dropout_clean, Target = factor(ifelse(Target == "Dropout", 0, 1)))
dropout_clean_nn=mutate(dropout_clean, Target = ifelse(Target == "Dropout", 0, 1))
str(dropout_clean_nn)
nor = as.data.frame(lapply(dropout_clean_nn, normalize))
## Normal 1 Layer Neural Network
df.train <- sample_frac(nor, size = .85)
df.test <- setdiff(nor, df.train)
hid=1
nn=neuralnet(Target~., data = df.train, stepmax=1e7, hidden=hid,threshold = .1)
plot(nn)
pred=predict(nn, df.test, type="response")
pred=as.factor(ifelse(pred>.5, 1, 0))
print(confusionMatrix(as.factor(df.test$Target), pred)$overall[1])
## Normal 2 Layer Neural Network
df.train <- sample_frac(nor, size = .85)
df.test <- setdiff(nor, df.train)
hid2=4
nn2=neuralnet(Target~., data = df.train, stepmax=1e7, hidden=c(hid2,hid2),threshold = .1)
plot(nn2)
pred2=predict(nn2, df.test, type="response")
pred2=as.factor(ifelse(pred2>.5, 1, 0))
print(confusionMatrix(as.factor(df.test$Target), pred2)$overall[1])
## Testing The Nodes 1st layer
max_nodes=10
max_runs=10
accuracy_runs_1=data.frame(id=c(1:max_runs))
total_time <- proc.time()
for (i in 0:max_nodes){
i_time=proc.time()
acc_in=c()
for (j in 1:max_runs){
df.train <- sample_frac(nor, size = .85)
df.test <- setdiff(nor, df.train)
nn=neuralnet(Target~.,
data = df.train,
stepmax=1e7,
hidden=i,
threshold = .5)
pred=predict(nn, df.test, type="response")
pred=as.factor(ifelse(pred>.5, 1, 0))
#print(confusionMatrix(as.factor(df.test$Survived), pred)$overall[1])
acc_in=c(acc_in,confusionMatrix(as.factor(df.test$Target), pred)$overall[1])
print(paste("node", i, "run", j))
}
print(paste(i, "took", proc.time()[3] - i_time)[3])
accuracy_runs_1[paste(i, "nodes")] = acc_in
}
print(paste("whole thing took", proc.time()[3] - total_time)[3])
scores_1=data.frame(sapply(subset(accuracy_runs_1, select = -c(id)), function(x) mean(x)))
sd_1=data.frame(sapply(subset(accuracy_runs_1, select = -c(id)), function(x) sd(x)))
accuracy_1=data.frame(nodes=c(0:max_nodes))
accuracy_1["mean"]=scores_1
ggplot(data=accuracy_1, aes(x=nodes, y=mean))+
geom_point()+
geom_line()+
labs(title="Average Accuracy Over 10 Runs For Different Number Of Nodes. 1 Layer
")+
xlab("Number Of Nodes")+
ylab("Average Accuracy")+
scale_x_discrete(limits=factor(c(1:max_nodes)))+
geom_hline(yintercept = max(accuracy_1$mean), color="red")+
annotate("text",x=3,y=max(accuracy_1$mean)+.002
,label=round(max(accuracy_1$mean),2)
,color="red")
std_1=data.frame(nodes=c(0:max_nodes))
std_1["std"]=sd_1
ggplot(data=std_1, aes(x=nodes, y=std))+
geom_point()+
geom_line()+
labs(title=max_runs)
## Testing The Nodes 2nd layer
max_nodes=10
max_runs=10
accuracy_runs_2=data.frame(id=c(1:max_runs))
total_time <- proc.time()
for (i in 1:max_nodes){
i_time=proc.time()
acc_in=c()
for (j in 1:max_runs){
df.train <- sample_frac(nor, size = .85)
df.test <- setdiff(nor, df.train)
nn=neuralnet(Target~.,
data = df.train,
stepmax=1e7,
hidden=c(i,i),
threshold = .1)
pred=predict(nn, df.test, type="response")
pred=as.factor(ifelse(pred>.5, 1, 0))
#print(confusionMatrix(as.factor(df.test$Survived), pred)$overall[1])
acc_in=c(acc_in,confusionMatrix(as.factor(df.test$Target), pred)$overall[1])
print(paste("node", i, "run", j))
}
print(paste(i, "took", proc.time()[3] - i_time)[3])
accuracy_runs_2[paste(i, "nodes")] = acc_in
}
print(paste("whole thing took", proc.time()[3] - total_time)[3])
scores_2=data.frame(sapply(subset(accuracy_runs_2, select = -c(id)), function(x) mean(x)))
sd_2=data.frame(sapply(subset(accuracy_runs_2, select = -c(id)), function(x) sd(x)))
accuracy_2=data.frame(nodes=c(1:max_nodes))
accuracy_2["mean"]=scores_2
ggplot(data=accuracy_2, aes(x=nodes, y=mean))+
geom_point()+
geom_line()+
labs(title="Average Accuracy Over 10 Runs For Different Number Of Nodes. 2 Layers
")+
xlab("Number Of Nodes")+
ylab("Average Accuracy")+
scale_x_discrete(limits=factor(c(1:max_nodes)))+
geom_hline(yintercept = max(accuracy_2$mean), color="red")+
annotate("text",x=3,y=max(accuracy_2$mean)+.002
,label=round(max(accuracy_2$mean),2)
,color="red")
std_2=data.frame(nodes=c(1:max_nodes))
std_2["std"]=sd_2
ggplot(data=std_2, aes(x=nodes, y=std))+
geom_point()+
geom_line()+
labs(title=max_runs)
# ## Testing The Nodes 3rd layer
# max_nodes=10
# max_runs=10
# accuracy_runs_3=data.frame(id=c(1:max_runs))
#
# total_time <- proc.time()
#
# for (i in 1:max_nodes){
# i_time=proc.time()
# acc_in=c()
# for (j in 1:max_runs){
# df.train <- sample_frac(nor, size = .85)
# df.test <- setdiff(nor, df.train)
# nn=neuralnet(Target~.,
# data = df.train,
# stepmax=1e7,
# hidden=c(i,i,i),
# threshold = .1)
# pred=predict(nn, df.test, type="response")
# pred=as.factor(ifelse(pred>.5, 1, 0))
# #print(confusionMatrix(as.factor(df.test$Survived), pred)$overall[1])
# acc_in=c(acc_in,confusionMatrix(as.factor(df.test$Target), pred)$overall[1])
# print(paste("node", i, "run", j))
# }
# print(paste(i, "took", proc.time()[3] - i_time)[3])
# accuracy_runs_3[paste(i, "nodes")] = acc_in
# }
# print(paste("whole thing took", proc.time()[3] - total_time)[3])
#
# scores_3=data.frame(sapply(subset(accuracy_runs_3, select = -c(id)), function(x) mean(x)))
# sd_3=data.frame(sapply(subset(accuracy_runs_3, select = -c(id)), function(x) sd(x)))
#
# accuracy_3=data.frame(nodes=c(1:max_nodes))
# accuracy_3["mean"]=scores_3
#
# ggplot(data=accuracy_3, aes(x=nodes, y=mean))+
# geom_point()+
# geom_line()+
# labs(title="Average Accuracy Over 10 Runs For Different Number Of Nodes. 3 Layers
# ")+
# xlab("Number Of Nodes")+
# ylab("Average Accuracy")+
# scale_x_discrete(limits=factor(c(1:max_nodes)))
#
# std_3=data.frame(nodes=c(1:max_nodes))
# std_3["std"]=sd_3
#
# ggplot(data=std_3, aes(x=nodes, y=std))+
# geom_point()+
# geom_line()+
# labs(title=max_runs)