diff --git a/Early Dropout Prediction System/EJECUTABLE.py b/Early Dropout Prediction System/EJECUTABLE.py
new file mode 100644
index 0000000000000000000000000000000000000000..b637c3217a2edf291b6892d0c8e086da962600b2
--- /dev/null
+++ b/Early Dropout Prediction System/EJECUTABLE.py	
@@ -0,0 +1,15 @@
+import os
+
+print("Prediciendo Dropout...\n")
+os.system("python dropout/jsontocsv.py")
+print("Archivos descargados de BD\n")
+os.system("python dropout/get_calificaciones.py")
+os.system("python dropout/get_mallas.py")
+os.system("python dropout/get_estudiantes.py")
+os.system("python dropout/get_pga.py")
+os.system("python dropout/calculosasigmallas.py")
+print("Variables de entrada calculadas\n")
+os.system("python dropout/final.py")
+print("Datos finales listos\n")
+os.system("python dropout/predict.py")
+print("Predicción acabada.")
\ No newline at end of file
diff --git a/Early Dropout Prediction System/dropout/calculosasigmallas.py b/Early Dropout Prediction System/dropout/calculosasigmallas.py
new file mode 100644
index 0000000000000000000000000000000000000000..e570655134fe638994bd4c86ee6811ae3ab90c1c
--- /dev/null
+++ b/Early Dropout Prediction System/dropout/calculosasigmallas.py	
@@ -0,0 +1,142 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+import time
+
+def main():
+	
+	file = 'asigMallas.csv'
+	data = pd.read_csv(file)
+	data = data.drop("ASIGNATURA_CODIGO",1).drop("GRUPO",1).drop("PERLEC_ID",1).drop("FORMA_APROBACION",1)
+	data = data.drop("NOTA1",1).drop("NOTA2",1).drop("NOTA3",1).drop("NOTA4",1).drop("NOTA5",1).drop("NOTA6",1).drop("RESP_ID",1)
+	data = data.drop("MALLA_ANIO",1).drop("EJE_FORMACION",1).drop("OPTATIVO",1).drop("ELECTIVO",1)
+	data = data.fillna(0)
+	data = data[data['id']!=0]
+	data['TOTAL_HORAS_MALLA'] = data['TOTAL_HORAS_MALLA'].astype(int)
+	data.drop("Unnamed: 0",1,inplace=True)
+	for i in range(len(data)):
+		if data['CREDITOS'].values[i] == 0:
+			data['CREDITOS'].values[i] = data['TOTAL_HORAS_CICLO'].values[i]
+	data.drop("TOTAL_HORAS_CICLO",1,inplace = True)
+	data = data.sort_values(['id', 'ANIO'], ascending=[True, True]) 
+	
+	#Quitamos todos los estudiantes que no tengan datos desde el primer semestre
+	malSem = data.groupby(['id','CARRERA'],as_index=False).SEMESTRE_x.min()
+	malSem = malSem[malSem['SEMESTRE_x']!=1]
+	
+	for i in range(len(malSem)):
+		data = data[data['id']!=malSem['id'].values[i]]
+
+
+	#Calculamos numero de suspensos por estudiante
+	fail =  data[data.ESTADO_APROBACION == '0']
+	fail['ESTADO_APROBACION'].replace(0,1,inplace = True)
+	numFail = fail.groupby(['id','CARRERA'],as_index=False).size().to_frame()
+	numFail.columns = ['Count']
+
+	#Dropout despues de 2 años sin matricularse	
+	years = np.array((data.ANIO).astype(int))
+	ids = np.array(data.id)
+	dropout = []
+	for i in range(len(years)):
+		if ids[i] == ids[i-1]:
+			if (years[i]-years[i-1])>=4:
+				dropout.append(1)
+			else:
+				dropout.append(0)
+		else:
+			dropout.append(0)
+
+	dropout = pd.DataFrame(dropout)
+	ids = pd.DataFrame(ids)
+	dropid =pd.concat([ids, dropout], axis=1,)
+	dropid.columns = ['id','dropout']
+	dropout = dropid.groupby(['id'],as_index=False).dropout.sum()
+
+	#Si lleva mas de los ultimos 2 años sin ir y no se ha graduado
+	datamax = data.groupby(['id'],as_index=False).ANIO.max()
+	datamax.columns = ['id','year']	
+	añoActual = int(time.strftime("%Y"))
+
+	fileE = 'estudPGA.csv'
+	df = pd.read_csv(fileE)
+	dataEstudiante = df.drop("Unnamed: 0",1).drop("FSE",1).drop("ANIO_EGRESO",1).drop("ANIO_INGRESO",1).drop("CARRERA_ID",1).drop("DURACION_ANIOS",1)
+	dataEstudiante = dataEstudiante.drop("SEMESTRE_EGRESO",1).drop("SEMESTRE_INGRESO",1)
+
+	stateStud = pd.merge(datamax,dataEstudiante,how='inner',on = 'id')
+	stateStud = pd.merge(stateStud,dropout,how='inner',on = 'id')
+	stateStud.year.fillna(0,inplace = True)
+	stateStud = stateStud[stateStud['year']!=0]
+	stateStud = stateStud.drop_duplicates(subset='id', keep='first', inplace=False)
+	
+	for i in range(len(stateStud)):
+		if añoActual >= (stateStud.year.values[i]+3) and stateStud.DROPOUT.values[i] != 1:
+			dropout.dropout.values[i] = 1	
+
+	dropout.to_csv("dropout5Years.csv")
+
+	#Media ponderada
+	data['NUMERO_MATRICULA'].replace(2,0.85,inplace = True)
+	data['NUMERO_MATRICULA'].replace(3,0.75,inplace = True)
+	data.NOTA_FINAL = data.NUMERO_MATRICULA*data.NOTA_FINAL*data.CREDITOS
+	
+
+	#Semestres que lleva un estudiante en la universidad
+	semestres = data.groupby(['id','CARRERA'],as_index=False).SEMESTRE_x.max()
+	semTot = data.groupby(['id','CARRERA'],as_index=False).SEMESTRE_y.max()
+	semestres =  pd.merge(semestres, semTot, how='outer', on=['id','CARRERA'])
+	semestres.SEMESTRE_x = semestres.SEMESTRE_x/semTot.SEMESTRE_y
+	semestres.drop("SEMESTRE_y",1,inplace = True)
+
+	#Años desde que empezo
+	datamax = data.groupby(['id','CARRERA'],as_index=False).ANIO.max()
+	datamin = data.groupby(['id','CARRERA'],as_index=False).ANIO.min()
+	dataMaxMin = pd.merge(datamax, datamin, how='outer', on=['id','CARRERA'])
+	dataMaxMin.ANIO_x = (dataMaxMin.ANIO_x).astype(int)
+	dataMaxMin.ANIO_y = (dataMaxMin.ANIO_y).astype(int)
+	dataMaxMin.ANIO_x = dataMaxMin.ANIO_x - dataMaxMin.ANIO_y + 1 
+	dataMaxMin = pd.merge(dataMaxMin, semTot, how='outer', on=['id','CARRERA'])
+	#años que lleva/años que deberia durar la carrera
+	dataMaxMin.ANIO_x = dataMaxMin.ANIO_x/(dataMaxMin.SEMESTRE_y/2) 
+	dataMaxMin = dataMaxMin.drop("ANIO_y",1).drop("SEMESTRE_y",1)
+
+
+	#Empezamos a calcular la media
+	dataS =  data[data.ESTADO_APROBACION == '1']
+	credDone = data.groupby(['id','CARRERA'],as_index=False).CREDITOS.sum()
+	credSum = dataS.groupby(['id','CARRERA'],as_index=False).CREDITOS.sum()
+	gradeSum = dataS.groupby(['id','CARRERA'],as_index=False).NOTA_FINAL.sum()#Suma de las notas poderadas	
+	dataMedia = pd.merge(credSum, gradeSum, how='outer', on=['id','CARRERA'])
+	dataMedia = pd.merge(credDone, dataMedia, how='outer', on=['id','CARRERA'])
+	dataMedia = dataMedia.fillna(0)
+	dataMedia.NOTA_FINAL = dataMedia.NOTA_FINAL / dataMedia.CREDITOS_y
+	dataMedia.CREDITOS_y =  dataMedia.CREDITOS_y/dataMedia.CREDITOS_x
+	dataMedia = dataMedia.fillna(0)
+
+	dataMedia.drop("CREDITOS_x",1,inplace = True)	
+	namesTot = ["id","CARRERA", "passDone","gradeMean"]
+	dataMedia.columns = namesTot
+	dataMedia.to_csv("varHistCredMedia.csv")#Media de todos los estudiantes
+	
+
+	#Numero de creditos aprobados
+	dataPass = pd.merge(dataMaxMin,credSum, how='outer', on=['id','CARRERA'])
+	dataPass = dataPass.fillna(0)
+	totCred = data.groupby(['id','CARRERA'],as_index=False).TOTAL_HORAS_MALLA.max()
+	mallaId =  data.groupby(['id','CARRERA'],as_index=False).MALLA_ID.max()
+	dataPass = pd.merge(dataPass,totCred,how = 'outer', on = ['id','CARRERA'])
+	dataPass = pd.merge(dataPass,mallaId,how = 'outer', on = ['id','CARRERA'])
+	dataPass = pd.merge(dataPass,semestres,how = 'outer', on = ['id','CARRERA'])
+	dataPass['CREDITOS'] = dataPass.CREDITOS/dataPass.TOTAL_HORAS_MALLA
+	dataPass.drop("TOTAL_HORAS_MALLA",1,inplace = True)
+	dataPass.to_csv("credPassYear.csv")#creditos totales aprobados
+
+	
+	data = pd.merge(data, numFail, how='outer', on=['id'])
+	data['Count'] = data['Count'].fillna(0)
+	data =  data[data.ESTADO_APROBACION != '0']
+
+
+if __name__ == "__main__":
+   	main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/dropout/final.py b/Early Dropout Prediction System/dropout/final.py
new file mode 100644
index 0000000000000000000000000000000000000000..f447b1921c1108e96af103e2ba072d9d80cdaf1d
--- /dev/null
+++ b/Early Dropout Prediction System/dropout/final.py	
@@ -0,0 +1,69 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+
+def main():
+	file = 'varHistCredMedia.csv'
+	dataMedia = pd.read_csv(file)
+	dataMedia = dataMedia.drop("Unnamed: 0",1)
+
+	fileE = 'cambioEstudiantes.csv'
+	df = pd.read_csv(fileE)
+	dataEstudiante = df.drop("Unnamed: 0",1)
+
+	fileC = 'credPassYear.csv'
+	dataCredCurs = pd.read_csv(fileC)
+	dataCredCurs = dataCredCurs.drop("Unnamed: 0",1)
+
+	data = pd.merge(dataEstudiante, dataMedia, how='outer', on=['id'])
+	data = pd.merge(data, dataCredCurs, how='outer', on=['id','CARRERA'])
+	data['gradeMean'].fillna(0,inplace = True)
+	data['CREDITOS'].fillna(0,inplace = True)
+	data['FSE'].fillna(5,inplace = True)
+
+	dataTot = data.groupby(['CARRERA','MALLA_ID'],as_index=False).id.count()
+	dataTot.columns = ['CARRERA','MALLA_ID','TOT']
+	
+	#Ponemos el dropout de los 3 años sin acudir a clase
+	dropout = 'dropout5Years.csv'
+	dropout = pd.read_csv(dropout)
+	dropout.drop("Unnamed: 0",1,inplace = True)
+	data = pd.merge(data, dropout, how='outer', on=['id'])
+	
+	for i in range(len(data)):
+		if data.dropout.values[i] >= 1 :
+			data.DROPOUT.values[i] = 0
+	data.drop("dropout",1,inplace=True)
+	dataPass = data[data.DROPOUT == 1]
+	dataPass = dataPass.groupby(['CARRERA','MALLA_ID'],as_index=False).id.count()
+	dataPass.columns = ['CARRERA','MALLA_ID','PASS']
+	dataFail =  data[data.DROPOUT == 0]
+	dataFail = dataFail.groupby(['CARRERA','MALLA_ID'],as_index=False).id.count()
+	dataFail.columns = ['CARRERA','MALLA_ID','FAIL']
+
+	dataRate = pd.merge(dataTot, dataPass, how='outer', on=['CARRERA','MALLA_ID'])
+	dataRate = pd.merge(dataRate, dataFail, how='outer', on=['CARRERA','MALLA_ID'])
+	
+	dataRate.fillna(0,inplace = True)
+
+	
+	#No se tiene en cuenta el número de estudiantes que están cursando ahora para el total de los alumnos
+	dataRate['rate'] = dataRate.FAIL/(dataRate.FAIL+dataRate.PASS)
+	dataRate = dataRate.drop("TOT",1).drop("PASS",1).drop("FAIL",1)
+	data = pd.merge(data, dataRate, how='outer', on=['CARRERA','MALLA_ID'])
+	data = data.drop_duplicates()
+	
+
+	data = data.sort_values(by='id', ascending=True)
+	data = data.drop("ANIO_EGRESO",1).drop("CARRERA_ID",1).drop("DURACION_ANIOS",1).drop("NOMBRE",1).drop("NOTA_FINAL",1).drop("SEMESTRE_EGRESO",1)
+	data = data.drop("SEMESTRE_INGRESO",1).drop("ANIO_INGRESO",1)
+	columnas = ['FSE','ID','DROPOUT','CARRERA','PASSDONE','GRADEMEAN','YEARSMAT/YEARSDEGREE','CREDITSPASSEDDEGREE','MALLA_ID','SEMESTERMAT/SEMESTERDEG','ABANDONMENTRATE']
+	data.columns = columnas
+	data.CARRERA = data['CARRERA'].fillna(0)
+	data = data[data['CARRERA']!= 0]
+	data.to_csv("Final.csv")
+
+
+if __name__ == "__main__":
+   	main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/dropout/get_calificaciones.py b/Early Dropout Prediction System/dropout/get_calificaciones.py
new file mode 100644
index 0000000000000000000000000000000000000000..105e29626898ef6cffd56c87f820a82ed26a9739
--- /dev/null
+++ b/Early Dropout Prediction System/dropout/get_calificaciones.py	
@@ -0,0 +1,30 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+
+
+def main():
+
+	file = 'calificaciones1.csv'
+	df = pd.read_csv(file)
+	df = df.sort_values(by='id', ascending=True)
+	df.drop("Unnamed: 0",1,inplace = True)
+	df = df.drop("NOTA7",1)
+
+	df['ESTADO_APROBACION'].replace("APROBADO",1,inplace = True)
+	df['ESTADO_APROBACION'].replace("REPROBADO",0,inplace = True)
+	df['ESTADO_APROBACION'].replace("REPROBADO POR FALTAS",0,inplace = True)
+
+	df['NOTA1'].fillna(0,inplace = True)
+	df['NOTA2'].fillna(0,inplace = True)
+	df['NOTA3'].fillna(0,inplace = True)
+	df['NOTA4'].fillna(0,inplace = True)
+	df['NOTA5'].fillna(0,inplace = True)
+	df['NOTA6'].fillna(0,inplace = True)	
+	df['NOTA_FINAL'].fillna(0,inplace = True)
+	
+	df.to_csv("cambioCal.csv")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/dropout/get_estudiantes.py b/Early Dropout Prediction System/dropout/get_estudiantes.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fdfe1dd3514bec52a577477f4c2d1cce97f281d
--- /dev/null
+++ b/Early Dropout Prediction System/dropout/get_estudiantes.py	
@@ -0,0 +1,26 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+
+
+def main():
+
+	file = 'estudiantes.csv'
+	df = pd.read_csv(file,encoding =  "ISO-8859-1")
+	df = df.sort_values(by='id', ascending=True)
+	df = df.drop("TIPO",1).drop("COLEGIO",1)
+
+	file1 = 'graduados.csv'
+	df1 = pd.read_csv(file1)
+	df1 = df1.sort_values(by='id', ascending=True)
+	df1.drop("Unnamed: 0",1,inplace = True)
+	df1 = df1.drop("SEMESTRE_INGRESO_DESC",1).drop("SEMESTRE_EGRESO_DESC",1)
+
+	df1['DROPOUT'] = 1
+	df = pd.merge(df, df1, how='outer', on=['id'])
+	
+	df.to_csv("cambioEstudiantes.csv")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/dropout/get_mallas.py b/Early Dropout Prediction System/dropout/get_mallas.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2865eb82d180c6d1eab09347ce381ea9c28360a
--- /dev/null
+++ b/Early Dropout Prediction System/dropout/get_mallas.py	
@@ -0,0 +1,27 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+
+
+def main():
+
+	file = 'mallas.csv'
+	df = pd.read_csv(file,encoding =  "ISO-8859-1")
+	df = df.sort_values(by='CARRERA', ascending=True)
+	df = df.drop("NOMBRE_ASIGNATURA",1)
+	
+	df.to_csv("cambioMallas.csv")
+
+	file = 'cambioCal.csv'
+	df1 = pd.read_csv(file)
+
+	semestresMalla = df.groupby(['CARRERA','MALLA_ID']).SEMESTRE.max().to_frame()
+
+	df = pd.merge(df1,df,how='inner', on=['ASIGNATURA_CODIGO','CARRERA']).drop("Unnamed: 0",1).drop("DESCRIPCIONPERIODO",1)
+	df = pd.merge(df,semestresMalla, how= 'outer', on = ['CARRERA','MALLA_ID'])
+	df = df.sort_values(by='id', ascending=True)
+
+	df.to_csv("asigMallas.csv")
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/dropout/get_pga.py b/Early Dropout Prediction System/dropout/get_pga.py
new file mode 100644
index 0000000000000000000000000000000000000000..a2d0f27c30cdc2e68e7f6bbd93d0f91f43cd24de
--- /dev/null
+++ b/Early Dropout Prediction System/dropout/get_pga.py	
@@ -0,0 +1,37 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+
+
+def main():
+
+	file = 'pga.csv'
+	df = pd.read_csv(file)
+	df = df.sort_values(by='id', ascending=True)
+
+	df1 = df.groupby(['id','CARRERA']).PGA.sum().to_frame()
+	df2 = df.groupby(['id','CARRERA']).id.count()
+	df1 = df1.reset_index()
+	
+	df2 = df2.to_frame()
+	df2.columns = ['COUNT']
+	
+	df1 = pd.merge(df1, df2, how='outer', on=['id','CARRERA'])
+
+	df1.PGA = df1.PGA/df1.COUNT
+	df1 = df1.drop("COUNT",1)
+	df1.columns = ['id','CARRERA','MEDIA']
+	
+	df1.to_csv("cambioPGA.csv")
+
+	df1.drop("CARRERA",1,inplace = True)
+
+	file = 'cambioEstudiantes.csv'
+	df = pd.read_csv(file)
+
+	df = pd.merge(df, df1, how='outer', on=['id']).drop("Unnamed: 0",1).drop("NOMBRE",1)	
+
+	df.to_csv("estudPGA.csv")
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/dropout/jsontocsv.py b/Early Dropout Prediction System/dropout/jsontocsv.py
new file mode 100644
index 0000000000000000000000000000000000000000..37f9e5f85dcbf93c9dd125691da1dc9b38d48e76
--- /dev/null
+++ b/Early Dropout Prediction System/dropout/jsontocsv.py	
@@ -0,0 +1,149 @@
+import csv
+import json
+import pandas as pd 
+import numpy as np
+
+est = json.loads(open('estudiantes.json').read())
+
+f = csv.writer(open("estudiantes.csv", "w+"))
+
+
+# Write CSV Header, If you dont need that, remove this line
+for y in range(len(est)):
+	
+	est[y]['id'] = y+1
+
+f.writerow(["COLEGIO", "TIPO", "FSE","id"])
+
+for x in est:
+    f.writerow([x["COLEGIO"],
+                x["TIPO"],
+                x["FSE"],
+                x["id"]])
+file = 'estudiantes.csv'
+df = pd.read_csv(file,encoding =  "ISO-8859-1")
+df.to_csv("estudiantes1.csv")
+
+
+grad = json.loads(open('graduados.json').read())
+
+df = pd.DataFrame(grad)
+df.columns = ['dict']
+
+arr = np.array(df['dict']).tolist()
+arr = [{'CARRERA_ID': 0, 'NOMBRE': 0, 'DURACION_ANIOS': 0, 'ANIO_INGRESO': 0, 'SEMESTRE_INGRESO': 0, 'SEMESTRE_INGRESO_DESC': 0, 'ANIO_EGRESO': 0, 'SEMESTRE_EGRESO': 115, 'SEMESTRE_EGRESO_DESC': 'SEPTIEMBRE 2016-FEBRERO 2017', 'NOTA_FINAL': 0} if v is None else v for v in arr]
+
+df = pd.DataFrame.from_records(arr)
+
+df['id'] = 1
+arr = np.array(df)
+for i in range(len(arr)):
+	arr[i][10] = i+1
+df = pd.DataFrame(arr, columns=df.columns)
+df = df[df.CARRERA_ID !=0]
+df.to_csv("graduados.csv")
+
+
+
+mallas = json.loads(open('mallas.json').read())
+
+
+f = csv.writer(open("mallas.csv", "w+"))
+# Write CSV Header, If you dont need that, remove this line
+
+f.writerow(['MALLA_ID', 'CARRERA', 'MALLA_ANIO', 'SEMESTRE', 'ASIGNATURA_CODIGO', 'NOMBRE_ASIGNATURA', 'CREDITOS', 'TOTAL_HORAS_CICLO', 'EJE_FORMACION', 'OPTATIVO', 'ELECTIVO','TOTAL_HORAS_MALLA'])
+
+for x in mallas:
+    f.writerow([x["MALLA_ID_"],
+                x["CARRERA"],
+                x["MALLA_ANIO"],
+                x["SEMESTRE"],
+                x["ASIGNATURA_CODIGO"],
+                x["NOMBRE_ASIGNATURA"],
+                x["CREDITOS"],
+                x["TOTAL_HORAS_CICLO"],
+                x["EJE_FORMACION"],
+                x["OPTATIVO"],
+                x["ELECTIVO"],
+                x["TOTAL_HORAS_MALLA"]])
+
+file = 'mallas.csv'
+df = pd.read_csv(file,encoding =  "ISO-8859-1")
+df = df.drop_duplicates()
+df['id'] = 1
+arr = np.array(df)
+for i in range(len(arr)):
+	arr[i][12] = i+1
+df = pd.DataFrame(arr, columns=df.columns)
+df.to_csv("mallas1.csv")
+
+
+
+pga = json.loads(open('pga.json').read())
+
+f = csv.writer(open("pga.csv", "w+"))
+
+for y in range(len(pga)):
+	for x in range(len(pga[y])):
+		pga[y][x]['id'] = y+1
+	if len(pga[y]) == 0:
+		pga[y] =  [{"CARRERA":0, "PERLEC_ID":0, "DESCRIPCIONPERIODO":0,"PGA":0,"id":y+1}]
+
+
+
+# Write CSV Header, If you dont need that, remove this line
+
+f.writerow(["CARRERA", "PERLEC_ID", "DESCRIPCIONPERIODO","PGA","id"])
+
+for y in pga:
+	for x in y:
+		f.writerow([x["CARRERA"],
+                x["PERLEC_ID"],
+                x["DESCRIPCIONPERIODO"],
+                x["PGA"],
+                x["id"]])
+
+
+file = 'pga.csv'
+df = pd.read_csv(file,index_col='id')
+df.to_csv("pga1.csv")
+
+calif = json.loads(open('calificaciones.json').read())
+
+
+f = csv.writer(open("calificaciones.csv", "w+"))
+
+for y in range(len(calif)):
+	for x in range(len(calif[y])):
+		calif[y][x]['id'] = y+1
+
+# Write CSV Header, If you dont need that, remove this line
+
+f.writerow(['CARRERA', 'ASIGNATURA_CODIGO', 'NUMERO_MATRICULA', 'GRUPO', 'ANIO', 'PERLEC_ID', 'DESCRIPCIONPERIODO',
+ 'ESTADO_APROBACION', 'FORMA_APROBACION', 'NOTA1', 'NOTA2', 'NOTA3', 'NOTA4', 'NOTA5', 'NOTA6', 'NOTA7', 'NOTA_FINAL', 'RESP_ID','id'])
+
+for y in calif:
+	f.writerow("")
+	for x in y:
+		f.writerow([x["CARRERA"],
+    		x["ASIGNATURA_CODIGO"],
+    		x["NUMERO_MATRICULA"],
+            x["GRUPO"],
+            x["ANIO"],
+            x["PERLEC_ID"],
+            x["DESCRIPCIONPERIODO"],
+            x["ESTADO_APROBACION"],
+            x["FORMA_APROBACION"],
+            x["NOTA1"],
+            x["NOTA2"],
+            x["NOTA3"],
+            x["NOTA4"],
+            x["NOTA5"],
+            x["NOTA6"],
+            x["NOTA7"],
+            x["NOTA_FINAL"],
+            x["RESP_ID"],
+            x["id"]])
+file = 'calificaciones.csv'
+df = pd.read_csv(file,encoding =  "ISO-8859-1")
+df.to_csv("calificaciones1.csv")
\ No newline at end of file
diff --git a/Early Dropout Prediction System/dropout/predict.py b/Early Dropout Prediction System/dropout/predict.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b59a6b4fb8c526c1a36fc377e289550c7c2b4fc
--- /dev/null
+++ b/Early Dropout Prediction System/dropout/predict.py	
@@ -0,0 +1,81 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+from sklearn import model_selection
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import RandomForestClassifier
+from sklearn import preprocessing
+import matplotlib.pyplot as plt
+from sklearn import metrics
+
+def main():
+	file = 'Final.csv'
+	data = pd.read_csv(file)
+	data.drop("Unnamed: 0",1,inplace=True)
+	data['DROPOUT'].fillna(2,inplace = True)
+	data = data.dropna()
+	data['DROPOUT'].replace("",2,inplace = True)	
+	
+	dataPred = data.fillna(0)
+
+	dataTrain = dataPred[dataPred.DROPOUT != 2]
+	dataTrain.DROPOUT = dataTrain.DROPOUT.astype(int)
+	dataTrain = dataTrain.drop("ID",1).drop("CARRERA",1)
+	
+	dataF = dataPred[dataPred.DROPOUT == 2]
+	dataTest = dataPred[dataPred.DROPOUT == 2]
+	dataTest['DROPOUT'].replace(2,"",inplace = True)
+	dataTest = dataTest.drop("ID",1).drop("CARRERA",1)
+
+	x_train = dataTrain.values[:,[2,3,4,7]]
+	y_train = dataTrain.values[:,1]
+
+	x_test1 = dataTest.values[:,[2,3,4,7]]
+	y_test = dataTest.values[:,1]
+
+
+	robust_scaler = preprocessing.StandardScaler()
+	x_train = robust_scaler.fit_transform(x_train)
+	x_test = robust_scaler.fit_transform(x_test1)
+	
+	"""
+	cfr = RandomForestClassifier()
+	
+	from sklearn.model_selection import GridSearchCV
+    # Create the parameter grid based on the results of random search 
+	param_grid = {    
+        'max_depth': range(3,20,1),
+        'max_features': ['auto'],
+        'min_samples_leaf': range(1,20,1),
+        'min_samples_split': range(3,20,1),
+        'n_estimators': range(300,600,25)
+        }
+	
+	# Instantiate the grid search model
+	grid_search = GridSearchCV(estimator = cfr, param_grid = param_grid, 
+                          cv = 15, n_jobs = -1, verbose = 2)
+	# Fit the grid search to the data
+	grid_search.fit(x_train, y_train)
+	print("\n")
+	print(grid_search.best_params_)
+	print("\n")
+	"""
+	cfr2 = RandomForestClassifier(bootstrap = True, max_depth = 13, max_features='auto',min_samples_leaf = 7, min_samples_split = 15, n_estimators = 400)
+	y_pred = cfr2.fit(x_train, y_train).predict_proba(x_test)
+	y_pr2 = cfr2.fit(x_train, y_train).predict(x_test)
+	
+	#y_pred = grid_search.fit(x_train, y_train).predict_proba(x_test)
+	#y_pr2 = grid_search.fit(x_train, y_train).predict(x_test)
+
+	x_pr1 = pd.DataFrame(x_test1)
+	y_pred2 = pd.DataFrame(y_pr2)
+	df = pd.DataFrame(y_pred[:,1] )
+	dataF = dataF.reset_index(drop=True)
+	dataF.drop("DROPOUT",1,inplace=True)
+	df = pd.concat([dataF,y_pred2, df], axis=1, ignore_index=True)
+	df.columns = ['FSE','ID','CARRERA','PASSDONE','MEDIA','YEARSMAT/DEGREE','CREDITPASSDEGREE','MALLA_ID','SEMESTERMAT/DEG','ABANDRATE','CLAS','PROB']
+	df.to_excel("ResultsRF.xlsx")
+
+if __name__ == "__main__":
+	   	main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/predDecTree.py b/Early Dropout Prediction System/predDecTree.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a9fdb48ce0cc9e2076bd3128c5299cfbc95c53f
--- /dev/null
+++ b/Early Dropout Prediction System/predDecTree.py	
@@ -0,0 +1,167 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+from sklearn import model_selection
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.tree import DecisionTreeClassifier
+from sklearn import preprocessing
+import matplotlib.pyplot as plt
+from sklearn import metrics
+
+def main():
+
+	file = 'Final.csv'
+	data = pd.read_csv(file)
+	dataPred = data.drop("id",1).drop("CARRERA",1).drop("Unnamed: 0",1) 
+	#dataPred['DROPOUT'] = dataPred['DROPOUT'].str.strip()
+
+	
+	dataPred['DROPOUT'].fillna(2,inplace = True)
+	#print(dataPred.stateStudent)	
+	dataPred['DROPOUT'].replace("",2,inplace = True)
+	dataPred = dataPred.fillna(0)
+
+	dataTrain = dataPred[dataPred.DROPOUT != 2]
+	dataTrain.DROPOUT = dataTrain.DROPOUT.astype(int)
+
+	dataTest = dataPred[dataPred.DROPOUT == 2]
+	dataTest['DROPOUT'].replace(2,"",inplace = True)
+	
+	#dataTrain.to_excel("dataTrain.xlsx")
+	#dataTest.to_excel("dataTest.xlsx")
+
+	x_train = dataTrain.values[:,[0,2,3,4,5,6]]
+	y_train = dataTrain.values[:,1]
+
+	x_test = dataTest.values[:,[0,2,3,4,5,6]]
+	y_test = dataTest.values[:,1]
+
+	robust_scaler = preprocessing.StandardScaler()
+	x_train = robust_scaler.fit_transform(x_train)
+	
+	
+	print("Starting cross-validation (" +  str(len(x_train)) + ' learners)')
+
+	#cfr = DecisionTreeRegressor()
+	cfr2 = DecisionTreeClassifier()
+	
+	kf = model_selection.KFold(n_splits=10)
+	cv = kf.split(x_train)
+	
+	results = []
+	res_ce = []
+	A_A = 0
+	A_S = 0
+	S_A = 0
+	S_S = 0
+
+	y_pred_list = list()
+	y_true_list = list()
+	y_true2_list = list()
+
+	for traincv, testcv in cv:
+		y_pred = cfr2.fit(x_train[traincv], y_train[traincv]).predict_proba(x_train[testcv])
+		#results.append(np.sqrt(np.mean((y_pred[:,1] - y_train[testcv])**2)))
+		y_pr2 = cfr2.fit(x_train[traincv], y_train[traincv]).predict(x_train[testcv])
+		res_ce.append(np.mean(np.abs(y_pr2 - y_train[testcv])))
+		y_pr1 = pd.DataFrame(y_train[testcv])
+		x_pr1 = pd.DataFrame(x_train[testcv])
+		y_pred2 = pd.DataFrame(y_pr2)
+		df = pd.DataFrame(y_pred[:,1] )
+		df = pd.concat([y_pr1,y_pred2, df], axis=1,)
+		print(df)
+		df.to_excel("ResultsSVC.xlsx")
+
+		# Store results for AUC
+		for i, v in enumerate(y_pred[:,1]):
+			y_pred_list.append(v)
+			y_true_list.append(y_train[testcv][i])
+			y_true2_list.append(y_train[testcv][i])
+			# Certificate earners
+		for i, val in enumerate(y_pr2):
+			if y_pr2[i] == 1 and y_train[testcv][i] == 1:
+				A_A += 1
+			if y_pr2[i] == 0 and  y_train[testcv][i] == 1:
+				A_S += 1
+			if y_pr2[i] == 1  and  y_train[testcv][i] == 0:
+				S_A += 1
+			if y_pr2[i] == 0  and  y_train[testcv][i] == 0:
+				S_S += 1
+	#print out the mean of the cross-validated results
+	RMSE = np.array(results).mean()
+	#print("RMSE: " + str( RMSE))
+	accuracy = (A_A+S_S)/((A_A+A_S+S_A+S_S)*1.0)
+	print("Results CE: " + str(1-np.array(res_ce).mean()) + " / " + str(accuracy))
+	# Results about certificate earners
+	print(str(A_A) + "\t" + str(A_S))
+	print(str(S_A) + "\t" + str(S_S))
+	TP = A_A
+	FP = A_S
+	FN = S_A
+	TN = S_S
+	try:
+		recall = TP / ((TP+FN)*1.0);
+	except:
+		recall = 0
+	try:
+		precision = TP / ((TP+FP)*1.0);
+	except:
+		precision = 0
+	try:
+		specificity = TN / ((TN+FP)*1.0)
+	except:
+		specificicty = 0
+	try:
+		NPV = TN / ((FN+TN)*1.0);
+	except:
+		NPV = 0
+	try:
+		F_score = (2*TP)/((2*TP+FP+FN)*1.0)
+	except:
+		F_score = 0
+
+	print('Recall: ' + str(recall))
+	print('Precision: ' + str(precision))
+	print('Specificity: ' + str(specificity))
+	print('NVP:' + str(NPV))
+	print('F-score: ' + str(F_score))
+
+	# Compute AUC
+	y = np.array(y_true_list)
+	pred = np.array(y_pred_list)
+	y_true = np.array(y_true2_list)
+	fpr, tpr, thresholds = metrics.roc_curve(y, pred)
+
+	AUC = metrics.auc(fpr, tpr)
+	RMSEsk = np.sqrt(metrics.mean_squared_error(y_true, pred))
+	MAE = metrics.mean_absolute_error(y_true, pred)
+	print('AUC: ' + str(AUC))
+
+	plt.figure()
+	lw = 2
+	plt.plot(fpr, tpr, color='darkorange',
+             lw=lw, label='ROC curve (area = %0.2f)' % metrics.auc(fpr, tpr))
+	plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
+	plt.xlim([0.0, 1.0])
+	plt.ylim([0.0, 1.05])
+	plt.xlabel('False Positive Rate')
+	plt.ylabel('True Positive Rate')
+	plt.title('Receiver operating characteristic example')
+	plt.legend(loc="lower right")
+	plt.show()
+
+	results = dict()
+	results['RMSE'] = RMSEsk
+	results['MAE'] = MAE
+	results['AUC'] = AUC
+	results['F1'] = F_score
+	results['recall'] = recall
+	results['precision'] = precision
+	results['accuracy'] = accuracy
+
+	print(results)
+	
+
+if __name__ == "__main__":
+	   	main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/predGradBoost.py b/Early Dropout Prediction System/predGradBoost.py
new file mode 100644
index 0000000000000000000000000000000000000000..a05fe44d905f3960a0f8a354a26279192ee76b7a
--- /dev/null
+++ b/Early Dropout Prediction System/predGradBoost.py	
@@ -0,0 +1,166 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+from sklearn import model_selection
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn import preprocessing
+import matplotlib.pyplot as plt
+from sklearn import metrics
+
+def main():
+
+	file = 'Final.csv'
+	data = pd.read_csv(file)
+	dataPred = data.drop("id",1).drop("CARRERA",1).drop("Unnamed: 0",1) 
+	#dataPred['DROPOUT'] = dataPred['stateStudent'].str.strip()
+
+	
+	dataPred['DROPOUT'].fillna(2,inplace = True)
+	#print(dataPred.stateStudent)	
+	dataPred['DROPOUT'].replace("",2,inplace = True)
+	dataPred = dataPred.fillna(0)
+
+	dataTrain = dataPred[dataPred.DROPOUT != 2]
+	dataTrain.DROPOUT = dataTrain.DROPOUT.astype(int)
+
+	dataTest = dataPred[dataPred.DROPOUT == 2]
+	dataTest['DROPOUT'].replace(2,"",inplace = True)
+	
+	#dataTrain.to_excel("dataTrain.xlsx")
+	#dataTest.to_excel("dataTest.xlsx")
+
+	x_train = dataTrain.values[:,[0,2,3,4,5,6]]
+	y_train = dataTrain.values[:,1]
+
+	x_test = dataTest.values[:,[0,2,3,4,5,6]]
+	y_test = dataTest.values[:,1]
+
+	robust_scaler = preprocessing.StandardScaler()
+	x_train = robust_scaler.fit_transform(x_train)
+	
+	
+	print("Starting cross-validation (" +  str(len(x_train)) + ' learners)')
+
+	#cfr = DecisionTreeRegressor()
+	cfr2 = GradientBoostingClassifier()
+	
+	kf = model_selection.KFold(n_splits=10)
+	cv = kf.split(x_train)
+	
+	results = []
+	res_ce = []
+	A_A = 0
+	A_S = 0
+	S_A = 0
+	S_S = 0
+
+	y_pred_list = list()
+	y_true_list = list()
+	y_true2_list = list()
+
+	for traincv, testcv in cv:
+		y_pred = cfr2.fit(x_train[traincv], y_train[traincv]).predict_proba(x_train[testcv])
+		#results.append(np.sqrt(np.mean((y_pred[:,1] - y_train[testcv])**2)))
+		y_pr2 = cfr2.fit(x_train[traincv], y_train[traincv]).predict(x_train[testcv])
+		res_ce.append(np.mean(np.abs(y_pr2 - y_train[testcv])))
+		y_pr1 = pd.DataFrame(y_train[testcv])
+		x_pr1 = pd.DataFrame(x_train[testcv])
+		y_pred2 = pd.DataFrame(y_pr2)
+		df = pd.DataFrame(y_pred[:,1] )
+		df = pd.concat([y_pr1,y_pred2, df], axis=1,)
+		print(df)
+		df.to_excel("ResultsDT.xlsx")
+
+		# Store results for AUC
+		for i, v in enumerate(y_pred[:,1]):
+			y_pred_list.append(v)
+			y_true_list.append(y_train[testcv][i])
+			y_true2_list.append(y_train[testcv][i])
+			# Certificate earners
+		for i, val in enumerate(y_pr2):
+			if y_pr2[i] == 1 and y_train[testcv][i] == 1:
+				A_A += 1
+			if y_pr2[i] == 0 and  y_train[testcv][i] == 1:
+				A_S += 1
+			if y_pr2[i] == 1  and  y_train[testcv][i] == 0:
+				S_A += 1
+			if y_pr2[i] == 0  and  y_train[testcv][i] == 0:
+				S_S += 1
+	#print out the mean of the cross-validated results
+	#RMSE = np.array(results).mean()
+	#print("RMSE: " + str( RMSE))
+	accuracy = (A_A+S_S)/((A_A+A_S+S_A+S_S)*1.0)
+	print("Results CE: " + str(1-np.array(res_ce).mean()) + " / " + str(accuracy))
+	# Results about certificate earners
+	print(str(A_A) + "\t" + str(A_S))
+	print(str(S_A) + "\t" + str(S_S))
+	TP = A_A
+	FP = A_S
+	FN = S_A
+	TN = S_S
+	try:
+		recall = TP / ((TP+FN)*1.0);
+	except:
+		recall = 0
+	try:
+		precision = TP / ((TP+FP)*1.0);
+	except:
+		precision = 0
+	try:
+		specificity = TN / ((TN+FP)*1.0)
+	except:
+		specificicty = 0
+	try:
+		NPV = TN / ((FN+TN)*1.0);
+	except:
+		NPV = 0
+	try:
+		F_score = (2*TP)/((2*TP+FP+FN)*1.0)
+	except:
+		F_score = 0
+
+	print('Recall: ' + str(recall))
+	print('Precision: ' + str(precision))
+	print('Specificity: ' + str(specificity))
+	print('NVP:' + str(NPV))
+	print('F-score: ' + str(F_score))
+
+	# Compute AUC
+	y = np.array(y_true_list)
+	pred = np.array(y_pred_list)
+	y_true = np.array(y_true2_list)
+	fpr, tpr, thresholds = metrics.roc_curve(y, pred)
+
+	AUC = metrics.auc(fpr, tpr)
+	RMSEsk = np.sqrt(metrics.mean_squared_error(y_true, pred))
+	MAE = metrics.mean_absolute_error(y_true, pred)
+	print('AUC: ' + str(AUC))
+
+	plt.figure()
+	lw = 2
+	plt.plot(fpr, tpr, color='darkorange',
+             lw=lw, label='ROC curve (area = %0.2f)' % metrics.auc(fpr, tpr))
+	plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
+	plt.xlim([0.0, 1.0])
+	plt.ylim([0.0, 1.05])
+	plt.xlabel('False Positive Rate')
+	plt.ylabel('True Positive Rate')
+	plt.title('Receiver operating characteristic example')
+	plt.legend(loc="lower right")
+	plt.show()
+
+	results = dict()
+	results['RMSE'] = RMSEsk
+	results['MAE'] = MAE
+	results['AUC'] = AUC
+	results['F1'] = F_score
+	results['recall'] = recall
+	results['precision'] = precision
+	results['accuracy'] = accuracy
+
+	print(results)
+	
+
+if __name__ == "__main__":
+	   	main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/predLinLog.py b/Early Dropout Prediction System/predLinLog.py
new file mode 100644
index 0000000000000000000000000000000000000000..c32aaa2c24f373cfb2588aa5563d5d30f9446cfe
--- /dev/null
+++ b/Early Dropout Prediction System/predLinLog.py	
@@ -0,0 +1,165 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+from sklearn import model_selection
+from sklearn import linear_model
+from sklearn import preprocessing
+import matplotlib.pyplot as plt
+from sklearn import metrics
+
+def main():
+
+	file = 'Final.csv'
+	data = pd.read_csv(file)
+	dataPred = data.drop("id",1).drop("CARRERA",1).drop("Unnamed: 0",1) 
+	#dataPred['DROPOUT'] = dataPred['DROPOUT'].str.strip()
+
+	
+	dataPred['DROPOUT'].fillna(2,inplace = True)
+	#print(dataPred.stateStudent)	
+	dataPred['DROPOUT'].replace("",2,inplace = True)
+	dataPred = dataPred.fillna(0)
+
+	dataTrain = dataPred[dataPred.DROPOUT != 2]
+	dataTrain.DROPOUT = dataTrain.DROPOUT.astype(int)
+
+	dataTest = dataPred[dataPred.DROPOUT == 2]
+	dataTest['DROPOUT'].replace(2,"",inplace = True)
+	
+	#dataTrain.to_excel("dataTrain.xlsx")
+	#dataTest.to_excel("dataTest.xlsx")
+
+	x_train = dataTrain.values[:,[0,2,3,4,5,6]]
+	y_train = dataTrain.values[:,1]
+
+	x_test = dataTest.values[:,[0,2,3,4,5,6]]
+	y_test = dataTest.values[:,1]
+
+	robust_scaler = preprocessing.StandardScaler()
+	x_train = robust_scaler.fit_transform(x_train)
+	
+	
+	print("Starting cross-validation (" +  str(len(x_train)) + ' learners)')
+
+	#cfr2 = linear_model.LinearRegression()
+	cfr2 = linear_model.LogisticRegression()
+	
+	kf = model_selection.KFold(n_splits=10)
+	cv = kf.split(x_train)
+	
+	results = []
+	res_ce = []
+	A_A = 0
+	A_S = 0
+	S_A = 0
+	S_S = 0
+
+	y_pred_list = list()
+	y_true_list = list()
+	y_true2_list = list()
+
+	for traincv, testcv in cv:
+		y_pred = cfr2.fit(x_train[traincv], y_train[traincv]).predict_proba(x_train[testcv])
+		#results.append(np.sqrt(np.mean((y_pred[:,1] - y_train[testcv])**2)))
+		y_pr2 = cfr2.fit(x_train[traincv], y_train[traincv]).predict(x_train[testcv])
+		res_ce.append(np.mean(np.abs(y_pr2 - y_train[testcv])))
+		y_pr1 = pd.DataFrame(y_train[testcv])
+		x_pr1 = pd.DataFrame(x_train[testcv])
+		y_pred2 = pd.DataFrame(y_pr2)
+		df = pd.DataFrame(y_pred[:,1] )
+		df = pd.concat([y_pr1,y_pred2, df], axis=1,)
+		print(df)
+		df.to_excel("ResultsSVC.xlsx")
+		# Store results for AUC
+		for i, v in enumerate(y_pred[:,1]):
+			y_pred_list.append(v)
+			y_true_list.append(y_train[testcv][i])
+			y_true2_list.append(y_train[testcv][i])
+			# Certificate earners
+		for i, val in enumerate(y_pr2):
+			if y_pr2[i] == 1 and y_train[testcv][i] == 1:
+				A_A += 1
+			if y_pr2[i] == 0 and  y_train[testcv][i] == 1:
+				A_S += 1
+			if y_pr2[i] == 1  and  y_train[testcv][i] == 0:
+				S_A += 1
+			if y_pr2[i] == 0  and  y_train[testcv][i] == 0:
+				S_S += 1
+	#print out the mean of the cross-validated results
+	RMSE = np.array(results).mean()
+	#print("RMSE: " + str( RMSE))
+	accuracy = (A_A+S_S)/((A_A+A_S+S_A+S_S)*1.0)
+	print("Results CE: " + str(1-np.array(res_ce).mean()) + " / " + str(accuracy))
+	# Results about certificate earners
+	print(str(A_A) + "\t" + str(A_S))
+	print(str(S_A) + "\t" + str(S_S))
+	TP = A_A
+	FP = A_S
+	FN = S_A
+	TN = S_S
+	try:
+		recall = TP / ((TP+FN)*1.0);
+	except:
+		recall = 0
+	try:
+		precision = TP / ((TP+FP)*1.0);
+	except:
+		precision = 0
+	try:
+		specificity = TN / ((TN+FP)*1.0)
+	except:
+		specificicty = 0
+	try:
+		NPV = TN / ((FN+TN)*1.0);
+	except:
+		NPV = 0
+	try:
+		F_score = (2*TP)/((2*TP+FP+FN)*1.0)
+	except:
+		F_score = 0
+
+	print('Recall: ' + str(recall))
+	print('Precision: ' + str(precision))
+	print('Specificity: ' + str(specificity))
+	print('NVP:' + str(NPV))
+	print('F-score: ' + str(F_score))
+
+	# Compute AUC
+	y = np.array(y_true_list)
+	pred = np.array(y_pred_list)
+	y_true = np.array(y_true2_list)
+	fpr, tpr, thresholds = metrics.roc_curve(y, pred)
+
+	AUC = metrics.auc(fpr, tpr)
+	RMSEsk = np.sqrt(metrics.mean_squared_error(y_true, pred))
+	MAE = metrics.mean_absolute_error(y_true, pred)
+	print('AUC: ' + str(AUC))
+
+	plt.figure()
+	lw = 2
+	plt.plot(fpr, tpr, color='darkorange',
+             lw=lw, label='ROC curve (area = %0.2f)' % metrics.auc(fpr, tpr))
+	plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
+	plt.xlim([0.0, 1.0])
+	plt.ylim([0.0, 1.05])
+	plt.xlabel('False Positive Rate')
+	plt.ylabel('True Positive Rate')
+	plt.title('Receiver operating characteristic example')
+	plt.legend(loc="lower right")
+	plt.show()
+
+	results = dict()
+	results['RMSE'] = RMSEsk
+	results['MAE'] = MAE
+	results['AUC'] = AUC
+	results['F1'] = F_score
+	results['recall'] = recall
+	results['precision'] = precision
+	results['accuracy'] = accuracy
+
+	print(results)
+	
+
+if __name__ == "__main__":
+	   	main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/predMLP.py b/Early Dropout Prediction System/predMLP.py
new file mode 100644
index 0000000000000000000000000000000000000000..521a62e7b4f357f637e2de5ff33a4c37fcd7f714
--- /dev/null
+++ b/Early Dropout Prediction System/predMLP.py	
@@ -0,0 +1,166 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+from sklearn import model_selection
+from sklearn.neural_network import MLPClassifier
+from sklearn import preprocessing
+import matplotlib.pyplot as plt
+from sklearn import metrics
+
+def main():
+
+	file = 'Final.csv'
+	data = pd.read_csv(file)
+	dataPred = data.drop("id",1).drop("CARRERA",1).drop("Unnamed: 0",1) 
+	#dataPred['DROPOUT'] = dataPred['DROPOUT'].str.strip()
+
+	
+	dataPred['DROPOUT'].fillna(2,inplace = True)
+	#print(dataPred.stateStudent)	
+	dataPred['DROPOUT'].replace("",2,inplace = True)
+	dataPred = dataPred.fillna(0)
+
+	dataTrain = dataPred[dataPred.DROPOUT != 2]
+	dataTrain.DROPOUT = dataTrain.DROPOUT.astype(int)
+
+	dataTest = dataPred[dataPred.DROPOUT == 2]
+	dataTest['DROPOUT'].replace(2,"",inplace = True)
+	
+	#dataTrain.to_excel("dataTrain.xlsx")
+	#dataTest.to_excel("dataTest.xlsx")
+
+	x_train = dataTrain.values[:,[0,2,3,4,5,6]]
+	y_train = dataTrain.values[:,1]
+
+	x_test = dataTest.values[:,[0,2,3,4,5,6]]
+	y_test = dataTest.values[:,1]
+
+	robust_scaler = preprocessing.StandardScaler()
+	x_train = robust_scaler.fit_transform(x_train)
+	
+	
+	print("Starting cross-validation (" +  str(len(x_train)) + ' learners)')
+
+	#cfr = DecisionTreeRegressor()
+	cfr2 = MLPClassifier()
+	
+	kf = model_selection.KFold(n_splits=10)
+	cv = kf.split(x_train)
+	
+	results = []
+	res_ce = []
+	A_A = 0
+	A_S = 0
+	S_A = 0
+	S_S = 0
+
+	y_pred_list = list()
+	y_true_list = list()
+	y_true2_list = list()
+
+	for traincv, testcv in cv:
+		y_pred = cfr2.fit(x_train[traincv], y_train[traincv]).predict_proba(x_train[testcv])
+		#results.append(np.sqrt(np.mean((y_pred[:,1] - y_train[testcv])**2)))
+		y_pr2 = cfr2.fit(x_train[traincv], y_train[traincv]).predict(x_train[testcv])
+		res_ce.append(np.mean(np.abs(y_pr2 - y_train[testcv])))
+		y_pr1 = pd.DataFrame(y_train[testcv])
+		x_pr1 = pd.DataFrame(x_train[testcv])
+		y_pred2 = pd.DataFrame(y_pr2)
+		df = pd.DataFrame(y_pred[:,1] )
+		df = pd.concat([y_pr1,y_pred2, df], axis=1,)
+		print(df)
+		df.to_excel("ResultsDT.xlsx")
+
+		# Store results for AUC
+		for i, v in enumerate(y_pred[:,1]):
+			y_pred_list.append(v)
+			y_true_list.append(y_train[testcv][i])
+			y_true2_list.append(y_train[testcv][i])
+			# Certificate earners
+		for i, val in enumerate(y_pr2):
+			if y_pr2[i] == 1 and y_train[testcv][i] == 1:
+				A_A += 1
+			if y_pr2[i] == 0 and  y_train[testcv][i] == 1:
+				A_S += 1
+			if y_pr2[i] == 1  and  y_train[testcv][i] == 0:
+				S_A += 1
+			if y_pr2[i] == 0  and  y_train[testcv][i] == 0:
+				S_S += 1
+	#print out the mean of the cross-validated results
+	#RMSE = np.array(results).mean()
+	#print("RMSE: " + str( RMSE))
+	accuracy = (A_A+S_S)/((A_A+A_S+S_A+S_S)*1.0)
+	print("Results CE: " + str(1-np.array(res_ce).mean()) + " / " + str(accuracy))
+	# Results about certificate earners
+	print(str(A_A) + "\t" + str(A_S))
+	print(str(S_A) + "\t" + str(S_S))
+	TP = A_A
+	FP = A_S
+	FN = S_A
+	TN = S_S
+	try:
+		recall = TP / ((TP+FN)*1.0);
+	except:
+		recall = 0
+	try:
+		precision = TP / ((TP+FP)*1.0);
+	except:
+		precision = 0
+	try:
+		specificity = TN / ((TN+FP)*1.0)
+	except:
+		specificicty = 0
+	try:
+		NPV = TN / ((FN+TN)*1.0);
+	except:
+		NPV = 0
+	try:
+		F_score = (2*TP)/((2*TP+FP+FN)*1.0)
+	except:
+		F_score = 0
+
+	print('Recall: ' + str(recall))
+	print('Precision: ' + str(precision))
+	print('Specificity: ' + str(specificity))
+	print('NVP:' + str(NPV))
+	print('F-score: ' + str(F_score))
+
+	# Compute AUC
+	y = np.array(y_true_list)
+	pred = np.array(y_pred_list)
+	y_true = np.array(y_true2_list)
+	fpr, tpr, thresholds = metrics.roc_curve(y, pred)
+
+	AUC = metrics.auc(fpr, tpr)
+	RMSEsk = np.sqrt(metrics.mean_squared_error(y_true, pred))
+	MAE = metrics.mean_absolute_error(y_true, pred)
+	print('AUC: ' + str(AUC))
+
+	plt.figure()
+	lw = 2
+	plt.plot(fpr, tpr, color='darkorange',
+             lw=lw, label='ROC curve (area = %0.2f)' % metrics.auc(fpr, tpr))
+	plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
+	plt.xlim([0.0, 1.0])
+	plt.ylim([0.0, 1.05])
+	plt.xlabel('False Positive Rate')
+	plt.ylabel('True Positive Rate')
+	plt.title('Receiver operating characteristic example')
+	plt.legend(loc="lower right")
+	plt.show()
+
+	results = dict()
+	results['RMSE'] = RMSEsk
+	results['MAE'] = MAE
+	results['AUC'] = AUC
+	results['F1'] = F_score
+	results['recall'] = recall
+	results['precision'] = precision
+	results['accuracy'] = accuracy
+
+	print(results)
+	
+
+if __name__ == "__main__":
+	   	main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/predRF.py b/Early Dropout Prediction System/predRF.py
new file mode 100644
index 0000000000000000000000000000000000000000..688bfd1c1565117257620d1c868f300141c760c9
--- /dev/null
+++ b/Early Dropout Prediction System/predRF.py	
@@ -0,0 +1,176 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+from sklearn import model_selection
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import RandomForestClassifier
+from sklearn import preprocessing
+import matplotlib.pyplot as plt
+from sklearn import metrics
+
+def main():
+
+	file = 'Final.csv'
+	data = pd.read_csv(file)
+	dataPred = data.drop("ID",1).drop("CARRERA",1).drop("Unnamed: 0",1)
+	#dataPred['DROPOUT'] = dataPred['DROPOUT'].str.strip()
+
+	
+	dataPred['DROPOUT'].fillna(2,inplace = True)
+	#print(dataPred.stateStudent)
+	dataPred = dataPred.dropna()
+	print(len(dataPred))	
+	dataPred['DROPOUT'].replace("",2,inplace = True)
+	dataPred = dataPred.fillna(0)
+
+	dataTrain = dataPred[dataPred.DROPOUT != 2]
+	dataTrain.DROPOUT = dataTrain.DROPOUT.astype(int)
+
+	dataTest = dataPred[dataPred.DROPOUT == 2]
+	dataTest['DROPOUT'].replace(2,"",inplace = True)
+	
+	#dataTrain.to_excel("dataTrain.xlsx")
+	#dataTest.to_excel("dataTest.xlsx")
+
+	x_train = dataTrain.values[:,[2,3,4,5,6,7]]
+	y_train = dataTrain.values[:,1]
+
+	x_test = dataTest.values[:,[2,3,4,5,6,7]]
+	y_test = dataTest.values[:,1]
+
+	robust_scaler = preprocessing.StandardScaler()
+	x_train = robust_scaler.fit_transform(x_train)
+	x_test = robust_scaler.fit_transform(x_test)
+	
+	print("Starting cross-validation (" +  str(len(x_train)) + ' learners)')
+
+	#cfr = RandomForestRegressor(n_estimators = 500)
+	cfr2 = RandomForestClassifier(n_estimators = 500)
+	
+	kf = model_selection.KFold(n_splits=10)
+	cv = kf.split(x_train)
+	
+	results = []
+	res_ce = []
+	A_A = 0
+	A_S = 0
+	S_A = 0
+	S_S = 0
+
+	y_pred_list = list()
+	y_true_list = list()
+	y_true2_list = list()
+
+	for traincv, testcv in cv:
+		#y_pred = cfr.fit(x_train[traincv], y_train[traincv]).predict_proba(x_train[testcv])
+		#results.append(np.sqrt(np.mean((y_pred - y_train[testcv])**2)))
+		y_pred1 = cfr2.fit(x_train[traincv], y_train[traincv])
+		y_pred = y_pred1.predict_proba(x_train[testcv])
+		#results.append(np.sqrt(np.mean((y_pred[:,1] - y_train[testcv])**2)))
+		y_pr2 = cfr2.fit(x_train[traincv], y_train[traincv]).predict(x_train[testcv])
+		res_ce.append(np.mean(np.abs(y_pr2 - y_train[testcv])))
+		y_pr1 = pd.DataFrame(y_train[testcv])
+		x_pr1 = pd.DataFrame(x_train[testcv])
+		y_pred2 = pd.DataFrame(y_pr2)
+		df = pd.DataFrame(y_pred[:,1] )
+		df = pd.concat([y_pr1,y_pred2, df], axis=1,)
+		print(df)
+		#df.to_excel("ResultsRF.xlsx")
+
+		# Store results for AUC
+		for i, v in enumerate(y_pred[:,1]):
+			y_pred_list.append(v)
+			y_true_list.append(y_train[testcv][i])
+			y_true2_list.append(y_train[testcv][i])
+			# Certificate earners
+		for i, val in enumerate(y_pr2):
+			if y_pr2[i] == 1 and y_train[testcv][i] == 1:
+				A_A += 1
+			if y_pr2[i] == 0 and  y_train[testcv][i] == 1:
+				A_S += 1
+			if y_pr2[i] == 1  and  y_train[testcv][i] == 0:
+				S_A += 1
+			if y_pr2[i] == 0  and  y_train[testcv][i] == 0:
+				S_S += 1
+	#print out the mean of the cross-validated results
+	#RMSE = np.array(results).mean()
+	#print("RMSE: " + str( RMSE))
+	accuracy = (A_A+S_S)/((A_A+A_S+S_A+S_S)*1.0)
+	print("Results CE: " + str(1-np.array(res_ce).mean()) + " / " + str(accuracy))
+	# Results about certificate earners
+	print(str(A_A) + "\t" + str(A_S))
+	print(str(S_A) + "\t" + str(S_S))
+	TP = A_A
+	FP = A_S
+	FN = S_A
+	TN = S_S
+	try:
+		recall = TP / ((TP+FN)*1.0);
+	except:
+		recall = 0
+	try:
+		precision = TP / ((TP+FP)*1.0);
+	except:
+		precision = 0
+	try:
+		specificity = TN / ((TN+FP)*1.0)
+	except:
+		specificicty = 0
+	try:
+		NPV = TN / ((FN+TN)*1.0);
+	except:
+		NPV = 0
+	try:
+		F_score = (2*TP)/((2*TP+FP+FN)*1.0)
+	except:
+		F_score = 0
+
+	print('Recall: ' + str(recall))
+	print('Precision: ' + str(precision))
+	print('Specificity: ' + str(specificity))
+	print('NVP:' + str(NPV))
+	print('F-score: ' + str(F_score))
+
+	# Compute AUC
+	y = np.array(y_true_list)
+	pred = np.array(y_pred_list)
+	y_true = np.array(y_true2_list)
+	fpr, tpr, thresholds = metrics.roc_curve(y, pred)
+
+	AUC = metrics.auc(fpr, tpr)
+	RMSEsk = np.sqrt(metrics.mean_squared_error(y_true, pred))
+	MAE = metrics.mean_absolute_error(y_true, pred)
+	print('AUC: ' + str(AUC))
+
+	plt.figure()
+	lw = 2
+	plt.plot(fpr, tpr, color='darkorange',
+             lw=lw, label='ROC curve (area = %0.2f)' % metrics.auc(fpr, tpr))
+	plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
+	plt.xlim([0.0, 1.0])
+	plt.ylim([0.0, 1.05])
+	plt.xlabel('False Positive Rate')
+	plt.ylabel('True Positive Rate')
+	plt.title('Receiver operating characteristic example')
+	plt.legend(loc="lower right")
+	plt.show()
+
+	results = dict()
+	results['RMSE'] = RMSEsk
+	results['MAE'] = MAE
+	results['AUC'] = AUC
+	results['F1'] = F_score
+	results['recall'] = recall
+	results['precision'] = precision
+	results['accuracy'] = accuracy
+
+	print(results)
+
+	pred = y_pred1.predict_proba(x_test)
+	pred = pd.DataFrame(pred)
+	pred.to_excel("RF6.xlsx")
+	
+
+if __name__ == "__main__":
+	   	main()
\ No newline at end of file
diff --git a/Early Dropout Prediction System/predSVM.py b/Early Dropout Prediction System/predSVM.py
new file mode 100644
index 0000000000000000000000000000000000000000..adab0d7d452c8dd54a7b19f3c9e61698777a832f
--- /dev/null
+++ b/Early Dropout Prediction System/predSVM.py	
@@ -0,0 +1,187 @@
+import numpy as np
+import csv
+import pandas as pd 
+import string
+from sklearn import model_selection
+from sklearn.svm import SVC
+from sklearn.svm import SVR
+from sklearn.model_selection import GridSearchCV
+from sklearn import preprocessing
+import matplotlib.pyplot as plt
+from sklearn import metrics
+
+def main():
+
+	file = 'Final.csv'
+	data = pd.read_csv(file)
+	dataPred = data.drop("id",1).drop("CARRERA",1).drop("Unnamed: 0",1) 
+	#dataPred['DROPOUT'] = dataPred['DROPOUT'].str.strip()
+
+	
+	dataPred['DROPOUT'].fillna(2,inplace = True)
+	#print(dataPred.stateStudent)	
+	dataPred['DROPOUT'].replace("",2,inplace = True)
+	dataPred = dataPred.fillna(0)
+
+	dataTrain = dataPred[dataPred.DROPOUT != 2]
+	dataTrain.DROPOUT = dataTrain.DROPOUT.astype(int)
+
+	dataTest = dataPred[dataPred.DROPOUT == 2]
+	dataTest['DROPOUT'].replace(2,"",inplace = True)
+	
+	#dataTrain.to_excel("dataTrain.xlsx")
+	#dataTest.to_excel("dataTest.xlsx")
+
+	x_train = dataTrain.values[:,[0,2,3,4,5,6]]
+	y_train = dataTrain.values[:,1]
+	print(y_train)
+	x_test = dataTest.values[:,[0,2,3,4,5,6]]
+	y_test = dataTest.values[:,1]
+
+	robust_scaler = preprocessing.StandardScaler()
+	x_train = robust_scaler.fit_transform(x_train)
+	x_test = robust_scaler.fit_transform(x_test)
+	# Get best parameters
+	# Set the parameters by cross-validation
+
+	"""
+	tuned_parameters = [{'kernel': ['rbf','linear','poly'], 'gamma': [1e-2,0.5e-3, 1.5e-1],'C': [1, 2] }]
+	clf = GridSearchCV(SVC(C=1), tuned_parameters, cv=25)
+	clf.fit(x_train, y_train)
+	print("Best parameters set found on development set:")
+	print(clf.best_params_)
+    
+	"""
+	print("Starting cross-validation (" +  str(len(x_train)) + ' learners)')
+
+	#cfr = SVR( epsilon =0.01)
+	cfr2 = SVC(probability = True)
+	
+	kf = model_selection.KFold(n_splits=25)
+	cv = kf.split(x_train)
+	
+	results = []
+	res_ce = []
+	A_A = 0
+	A_S = 0
+	S_A = 0
+	S_S = 0
+
+	y_pred_list = list()
+	y_true_list = list()
+	y_true2_list = list()
+
+	for traincv, testcv in cv:
+		"""
+		y_pred = cfr.fit(x_train[traincv], y_train[traincv]).predict(x_train[testcv])
+		results.append(np.sqrt(np.mean((y_pred - y_train[testcv])**2)))
+		y_pr1 = pd.DataFrame(y_train[testcv])
+		df = pd.DataFrame(y_pred)
+		df = pd.concat([y_pr1, df], axis=1,)		
+		df.to_excel("ResultsSVR.xlsx")
+		"""
+		y_pred = cfr2.fit(x_train[traincv], y_train[traincv]).predict_proba(x_train[testcv])
+		#print(y_pred)
+		#results.append(np.sqrt(np.mean((y_pred[:,1] - y_train[testcv])**2)))
+		y_pr2 = cfr2.fit(x_train[traincv], y_train[traincv]).predict(x_train[testcv])
+		res_ce.append(np.mean(np.abs(y_pr2 - y_train[testcv])))
+		y_pr1 = pd.DataFrame(y_train[testcv])
+		x_pr1 = pd.DataFrame(x_train[testcv])
+		y_pred2 = pd.DataFrame(y_pr2)
+		df = pd.DataFrame(y_pred[:,1] )
+		df = pd.concat([y_pr1,y_pred2, df], axis=1,)
+		print(df)
+		df.to_excel("ResultsSVC.xlsx")
+
+		# Store results for AUC
+		for i, v in enumerate(y_pred[:,1]):
+			y_pred_list.append(v)
+			y_true_list.append(y_train[testcv][i])
+			y_true2_list.append(y_train[testcv][i])
+			# Certificate earners
+		for i, val in enumerate(y_pr2):
+			if y_pr2[i]  == 1 and y_train[testcv][i] == 1:
+				A_A += 1
+			if y_pr2[i] == 0 and  y_train[testcv][i] == 1:
+				A_S += 1
+			if y_pr2[i] == 1  and  y_train[testcv][i] == 0:
+				S_A += 1
+			if y_pr2[i] == 0  and  y_train[testcv][i] == 0:
+				S_S += 1
+	#print out the mean of the cross-validated results
+	RMSE = np.array(results).mean()
+	print("RMSE: " + str( RMSE))
+	accuracy = (A_A+S_S)/((A_A+A_S+S_A+S_S)*1.0)
+	print("Results CE: " + str(1-np.array(res_ce).mean()) + " / " + str(accuracy))
+	# Results about certificate earners
+	print(str(A_A) + "\t" + str(A_S))
+	print(str(S_A) + "\t" + str(S_S))
+	TP = A_A
+	FP = A_S
+	FN = S_A
+	TN = S_S
+	try:
+		recall = TP / ((TP+FN)*1.0);
+	except:
+		recall = 0
+	try:
+		precision = TP / ((TP+FP)*1.0);
+	except:
+		precision = 0
+	try:
+		specificity = TN / ((TN+FP)*1.0)
+	except:
+		specificicty = 0
+	try:
+		NPV = TN / ((FN+TN)*1.0);
+	except:
+		NPV = 0
+	try:
+		F_score = (2*TP)/((2*TP+FP+FN)*1.0)
+	except:
+		F_score = 0
+
+	print('Recall: ' + str(recall))
+	print('Precision: ' + str(precision))
+	print('Specificity: ' + str(specificity))
+	print('NVP:' + str(NPV))
+	print('F-score: ' + str(F_score))
+
+	# Compute AUC
+	y = np.array(y_true_list)
+	pred = np.array(y_pred_list)
+	y_true = np.array(y_true2_list)
+	fpr, tpr, thresholds = metrics.roc_curve(y, pred)
+
+	AUC = metrics.auc(fpr, tpr)
+	RMSEsk = np.sqrt(metrics.mean_squared_error(y_true, pred))
+	MAE = metrics.mean_absolute_error(y_true, pred)
+	print('AUC: ' + str(AUC))
+
+	plt.figure()
+	lw = 2
+	plt.plot(fpr, tpr, color='darkorange',
+             lw=lw, label='ROC curve (area = %0.2f)' % metrics.auc(fpr, tpr))
+	plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
+	plt.xlim([0.0, 1.0])
+	plt.ylim([0.0, 1.05])
+	plt.xlabel('False Positive Rate')
+	plt.ylabel('True Positive Rate')
+	plt.title('Receiver operating characteristic example')
+	plt.legend(loc="lower right")
+	plt.show()
+
+	results = dict()
+	results['RMSE'] = RMSEsk
+	results['MAE'] = MAE
+	results['AUC'] = AUC
+	results['F1'] = F_score
+	results['recall'] = recall
+	results['precision'] = precision
+	results['accuracy'] = accuracy
+
+	print(results)
+	
+
+if __name__ == "__main__":
+	   	main()
\ No newline at end of file