From 395788e921077509fd136dbaab598a58d29f931e Mon Sep 17 00:00:00 2001 From: MyPenisIsBig1998 Date: Sun, 16 Jun 2024 02:57:46 +0000 Subject: [PATCH] Upload files to "/" --- Dropout Data Cleaning.R | 142 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 Dropout Data Cleaning.R diff --git a/Dropout Data Cleaning.R b/Dropout Data Cleaning.R new file mode 100644 index 0000000..b4c16a0 --- /dev/null +++ b/Dropout Data Cleaning.R @@ -0,0 +1,142 @@ +library(tidyverse) +library(caret) + +dropout <- read.csv("C:/Users/Mark/Desktop/Grad School/PDAT630/dropout.csv") + +dropout_clean=dropout +colnames(dropout_clean)[1]='Marital.status' + +## Combining under represented factors into buckets of at least 10 +table(dropout_clean$Marital.status) +table(dropout_clean$Application.mode) +table(dropout_clean$Previous.qualification) +table(dropout_clean$Mother.s.qualification) +table(dropout_clean$Father.s.qualification) +table(dropout_clean$Mother.s.occupation) +table(dropout_clean$Father.s.occupation) + +dropout_clean=mutate(dropout_clean, Marital.status = ifelse((Marital.status == "3"| + Marital.status == "6"), 7, Marital.status)) + +dropout_clean=mutate(dropout_clean, Application.mode = ifelse((Application.mode == "2"| + Application.mode =="5"| + Application.mode == "10"| + Application.mode == "11"| + Application.mode == "18"), 19, Application.mode)) + +dropout_clean=mutate(dropout_clean, Previous.qualification = ifelse((Previous.qualification == "4"| + Previous.qualification == "5"| + Previous.qualification == "8"| + Previous.qualification == "10"| + Previous.qualification == "11"| + Previous.qualification == "13"| + Previous.qualification == "17"), 18, Previous.qualification)) + +dropout_clean=mutate(dropout_clean, Mother.s.qualification = ifelse((Mother.s.qualification == "6"| + Mother.s.qualification == "7"| + Mother.s.qualification == "8"| + Mother.s.qualification == "9"| + Mother.s.qualification == "11"| + Mother.s.qualification == "12"| + Mother.s.qualification == "14"| + Mother.s.qualification == "15"| + Mother.s.qualification == "16"| + Mother.s.qualification == "17"| + Mother.s.qualification == "18"| + Mother.s.qualification == "20"| + Mother.s.qualification == "21"| + Mother.s.qualification == "24"| + Mother.s.qualification == "25"| + Mother.s.qualification == "26"| + Mother.s.qualification == "27"| + Mother.s.qualification == "28"| + Mother.s.qualification == "29"), 35, Mother.s.qualification)) + +dropout_clean=mutate(dropout_clean, Father.s.qualification = ifelse((Father.s.qualification == "6"| + Father.s.qualification == "7"| + Father.s.qualification == "8"| + Father.s.qualification == "11"| + Father.s.qualification == "12"| + Father.s.qualification == "13"| + Father.s.qualification == "15"| + Father.s.qualification == "16"| + Father.s.qualification == "17"| + Father.s.qualification == "18"| + Father.s.qualification == "19"| + Father.s.qualification == "20"| + Father.s.qualification == "21"| + Father.s.qualification == "22"| + Father.s.qualification == "23"| + Father.s.qualification == "25"| + Father.s.qualification == "26"| + Father.s.qualification == "30"| + Father.s.qualification == "31"| + Father.s.qualification == "32"| + Father.s.qualification == "33"| + Father.s.qualification == "34"), 35, Father.s.qualification)) + +dropout_clean=mutate(dropout_clean, Mother.s.occupation = ifelse((Mother.s.occupation == "11"| + Mother.s.occupation == "14"| + Mother.s.occupation == "15"| + Mother.s.occupation == "16"| + Mother.s.occupation == "17"| + Mother.s.occupation == "18"| + Mother.s.occupation == "19"| + Mother.s.occupation == "20"| + Mother.s.occupation == "21"| + Mother.s.occupation == "22"| + Mother.s.occupation == "23"| + Mother.s.occupation == "24"| + Mother.s.occupation == "25"| + Mother.s.occupation == "26"| + Mother.s.occupation == "27"| + Mother.s.occupation == "28"| + Mother.s.occupation == "30"| + Mother.s.occupation == "31"), 47, Mother.s.occupation)) + +dropout_clean=mutate(dropout_clean, Father.s.occupation = ifelse((Father.s.occupation == "14"| + Father.s.occupation == "15"| + Father.s.occupation == "16"| + Father.s.occupation == "17"| + Father.s.occupation == "18"| + Father.s.occupation == "19"| + Father.s.occupation == "20"| + Father.s.occupation == "21"| + Father.s.occupation == "22"| + Father.s.occupation == "23"| + Father.s.occupation == "24"| + Father.s.occupation == "25"| + Father.s.occupation == "26"| + Father.s.occupation == "27"| + Father.s.occupation == "28"| + Father.s.occupation == "29"| + Father.s.occupation == "30"| + Father.s.occupation == "31"| + Father.s.occupation == "32"| + Father.s.occupation == "33"| + Father.s.occupation == "34"| + Father.s.occupation == "35"| + Father.s.occupation == "36"| + Father.s.occupation == "37"| + Father.s.occupation == "38"| + Father.s.occupation == "39"| + Father.s.occupation == "40"| + Father.s.occupation == "41"| + Father.s.occupation == "42"| + Father.s.occupation == "43"| + Father.s.occupation == "45"| + Father.s.occupation == "46"), 47, Father.s.occupation)) + +#factor_cols=c("Marital.status","Scholarship.holder","Tuition.fees.up.to.date","Gender","Displaced","Daytime.evening.attendance","Application.mode", "Course","Previous.qualification","Mother.s.qualification","Father.s.qualification","Mother.s.occupation","Father.s.occupation","International","Target") +#dropout_clean[factor_cols] <- lapply(dropout_clean[factor_cols], factor) +#sapply(dropout_clean, table) + +dropout_clean=subset(dropout_clean, select = c(Gender,Displaced,Tuition.fees.up.to.date, + Scholarship.holder,Marital.status, Daytime.evening.attendance, Course, + Previous.qualification, Mother.s.qualification,Father.s.qualification, + Mother.s.occupation, Father.s.occupation, Age.at.enrollment, + International, Target)) + +#str(dropout_clean) + +write.csv(dropout_clean, "C:/Users/Mark/Desktop/Grad School/PDAT630/dropout_clean.csv", row.names=FALSE) \ No newline at end of file