From 8a846d5dc9a34a524fef53590bb6022c41c1b81b Mon Sep 17 00:00:00 2001 From: MyPenisIsBig1998 Date: Sun, 16 Jun 2024 02:54:00 +0000 Subject: [PATCH] Neural Network --- Dropout NN.R | 211 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 211 insertions(+) create mode 100644 Dropout NN.R diff --git a/Dropout NN.R b/Dropout NN.R new file mode 100644 index 0000000..7b67585 --- /dev/null +++ b/Dropout NN.R @@ -0,0 +1,211 @@ +library(tidyverse) +library(caret) +library(neuralnet) + +normalize = function(x) { + return ((x - min(x)) / (max(x) - min(x))) +} + +dropout_clean <- read.csv("C:/Users/Mark/Desktop/Grad School/PDAT630/dropout_clean.csv") + +#factor_cols=c("Marital.status","Scholarship.holder","Tuition.fees.up.to.date","Gender","Displaced","Daytime.evening.attendance","Course","Previous.qualification","Mother.s.qualification","Father.s.qualification","Mother.s.occupation","Father.s.occupation","International","Target") +#dropout_clean[factor_cols] <- lapply(dropout_clean[factor_cols], factor) +#dropout_clean_nn=mutate(dropout_clean, Target = factor(ifelse(Target == "Dropout", 0, 1))) +dropout_clean_nn=mutate(dropout_clean, Target = ifelse(Target == "Dropout", 0, 1)) + +str(dropout_clean_nn) + +nor = as.data.frame(lapply(dropout_clean_nn, normalize)) + + +## Normal 1 Layer Neural Network +df.train <- sample_frac(nor, size = .85) +df.test <- setdiff(nor, df.train) +hid=1 + +nn=neuralnet(Target~., data = df.train, stepmax=1e7, hidden=hid,threshold = .1) +plot(nn) +pred=predict(nn, df.test, type="response") +pred=as.factor(ifelse(pred>.5, 1, 0)) +print(confusionMatrix(as.factor(df.test$Target), pred)$overall[1]) + + +## Normal 2 Layer Neural Network +df.train <- sample_frac(nor, size = .85) +df.test <- setdiff(nor, df.train) +hid2=4 + +nn2=neuralnet(Target~., data = df.train, stepmax=1e7, hidden=c(hid2,hid2),threshold = .1) +plot(nn2) +pred2=predict(nn2, df.test, type="response") +pred2=as.factor(ifelse(pred2>.5, 1, 0)) +print(confusionMatrix(as.factor(df.test$Target), pred2)$overall[1]) + + +## Testing The Nodes 1st layer +max_nodes=10 +max_runs=10 +accuracy_runs_1=data.frame(id=c(1:max_runs)) + +total_time <- proc.time() + +for (i in 0:max_nodes){ + i_time=proc.time() + acc_in=c() + for (j in 1:max_runs){ + df.train <- sample_frac(nor, size = .85) + df.test <- setdiff(nor, df.train) + nn=neuralnet(Target~., + data = df.train, + stepmax=1e7, + hidden=i, + threshold = .5) + pred=predict(nn, df.test, type="response") + pred=as.factor(ifelse(pred>.5, 1, 0)) + #print(confusionMatrix(as.factor(df.test$Survived), pred)$overall[1]) + acc_in=c(acc_in,confusionMatrix(as.factor(df.test$Target), pred)$overall[1]) + print(paste("node", i, "run", j)) + } + print(paste(i, "took", proc.time()[3] - i_time)[3]) + accuracy_runs_1[paste(i, "nodes")] = acc_in +} +print(paste("whole thing took", proc.time()[3] - total_time)[3]) + +scores_1=data.frame(sapply(subset(accuracy_runs_1, select = -c(id)), function(x) mean(x))) +sd_1=data.frame(sapply(subset(accuracy_runs_1, select = -c(id)), function(x) sd(x))) + +accuracy_1=data.frame(nodes=c(0:max_nodes)) +accuracy_1["mean"]=scores_1 + +ggplot(data=accuracy_1, aes(x=nodes, y=mean))+ + geom_point()+ + geom_line()+ + labs(title="Average Accuracy Over 10 Runs For Different Number Of Nodes. 1 Layer + ")+ + xlab("Number Of Nodes")+ + ylab("Average Accuracy")+ + scale_x_discrete(limits=factor(c(1:max_nodes)))+ + geom_hline(yintercept = max(accuracy_1$mean), color="red")+ + annotate("text",x=3,y=max(accuracy_1$mean)+.002 + ,label=round(max(accuracy_1$mean),2) + ,color="red") + + +std_1=data.frame(nodes=c(0:max_nodes)) +std_1["std"]=sd_1 + +ggplot(data=std_1, aes(x=nodes, y=std))+ + geom_point()+ + geom_line()+ + labs(title=max_runs) + + +## Testing The Nodes 2nd layer +max_nodes=10 +max_runs=10 +accuracy_runs_2=data.frame(id=c(1:max_runs)) + +total_time <- proc.time() + +for (i in 1:max_nodes){ + i_time=proc.time() + acc_in=c() + for (j in 1:max_runs){ + df.train <- sample_frac(nor, size = .85) + df.test <- setdiff(nor, df.train) + nn=neuralnet(Target~., + data = df.train, + stepmax=1e7, + hidden=c(i,i), + threshold = .1) + pred=predict(nn, df.test, type="response") + pred=as.factor(ifelse(pred>.5, 1, 0)) + #print(confusionMatrix(as.factor(df.test$Survived), pred)$overall[1]) + acc_in=c(acc_in,confusionMatrix(as.factor(df.test$Target), pred)$overall[1]) + print(paste("node", i, "run", j)) + } + print(paste(i, "took", proc.time()[3] - i_time)[3]) + accuracy_runs_2[paste(i, "nodes")] = acc_in +} +print(paste("whole thing took", proc.time()[3] - total_time)[3]) + +scores_2=data.frame(sapply(subset(accuracy_runs_2, select = -c(id)), function(x) mean(x))) +sd_2=data.frame(sapply(subset(accuracy_runs_2, select = -c(id)), function(x) sd(x))) + +accuracy_2=data.frame(nodes=c(1:max_nodes)) +accuracy_2["mean"]=scores_2 + +ggplot(data=accuracy_2, aes(x=nodes, y=mean))+ + geom_point()+ + geom_line()+ + labs(title="Average Accuracy Over 10 Runs For Different Number Of Nodes. 2 Layers + ")+ + xlab("Number Of Nodes")+ + ylab("Average Accuracy")+ + scale_x_discrete(limits=factor(c(1:max_nodes)))+ + geom_hline(yintercept = max(accuracy_2$mean), color="red")+ + annotate("text",x=3,y=max(accuracy_2$mean)+.002 + ,label=round(max(accuracy_2$mean),2) + ,color="red") + +std_2=data.frame(nodes=c(1:max_nodes)) +std_2["std"]=sd_2 + +ggplot(data=std_2, aes(x=nodes, y=std))+ + geom_point()+ + geom_line()+ + labs(title=max_runs) + + +# ## Testing The Nodes 3rd layer +# max_nodes=10 +# max_runs=10 +# accuracy_runs_3=data.frame(id=c(1:max_runs)) +# +# total_time <- proc.time() +# +# for (i in 1:max_nodes){ +# i_time=proc.time() +# acc_in=c() +# for (j in 1:max_runs){ +# df.train <- sample_frac(nor, size = .85) +# df.test <- setdiff(nor, df.train) +# nn=neuralnet(Target~., +# data = df.train, +# stepmax=1e7, +# hidden=c(i,i,i), +# threshold = .1) +# pred=predict(nn, df.test, type="response") +# pred=as.factor(ifelse(pred>.5, 1, 0)) +# #print(confusionMatrix(as.factor(df.test$Survived), pred)$overall[1]) +# acc_in=c(acc_in,confusionMatrix(as.factor(df.test$Target), pred)$overall[1]) +# print(paste("node", i, "run", j)) +# } +# print(paste(i, "took", proc.time()[3] - i_time)[3]) +# accuracy_runs_3[paste(i, "nodes")] = acc_in +# } +# print(paste("whole thing took", proc.time()[3] - total_time)[3]) +# +# scores_3=data.frame(sapply(subset(accuracy_runs_3, select = -c(id)), function(x) mean(x))) +# sd_3=data.frame(sapply(subset(accuracy_runs_3, select = -c(id)), function(x) sd(x))) +# +# accuracy_3=data.frame(nodes=c(1:max_nodes)) +# accuracy_3["mean"]=scores_3 +# +# ggplot(data=accuracy_3, aes(x=nodes, y=mean))+ +# geom_point()+ +# geom_line()+ +# labs(title="Average Accuracy Over 10 Runs For Different Number Of Nodes. 3 Layers +# ")+ +# xlab("Number Of Nodes")+ +# ylab("Average Accuracy")+ +# scale_x_discrete(limits=factor(c(1:max_nodes))) +# +# std_3=data.frame(nodes=c(1:max_nodes)) +# std_3["std"]=sd_3 +# +# ggplot(data=std_3, aes(x=nodes, y=std))+ +# geom_point()+ +# geom_line()+ +# labs(title=max_runs) +