diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..9760c544591a73c89c654f56f39993d8f6c6bbb9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.ipynb_checkpoints +.idea +data +__pycache__ +*.pyc +*.csv \ No newline at end of file diff --git a/CodaLabPackages/.gitignore b/CodaLabPackages/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/CodaLabPackages/CSEDMDC.png b/CodaLabPackages/CSEDMDC.png new file mode 100644 index 0000000000000000000000000000000000000000..f8f56405984bf555d704791a6a4b06ee4cf13c65 Binary files /dev/null and b/CodaLabPackages/CSEDMDC.png differ diff --git a/CodaLabPackages/Track1Package/competition.yaml b/CodaLabPackages/Track1Package/competition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58b4506b399334e85b139f5437e84310172dfac4 --- /dev/null +++ b/CodaLabPackages/Track1Package/competition.yaml @@ -0,0 +1,61 @@ +title: 2th CSEDM Challenge -- Student Struggling Prediction +description: Tracking student programming traces and predicting their struggles in late problems. +image: csedm image.jpeg +has_registration: True +html: + overview: overview.html + evaluation: evaluation.html + terms: terms_and_conditions.html + data: data.html +phases: + 1: + phasenumber: 1 + label: "Practice" + start_date: 2021-06-28 + max_submissions: 100 + scoring_program: program.zip + reference_data: reference1.zip + starting_kit: submission1.zip + 2: + phasenumber: 2 + label: "Cross-Semester" + start_date: 2021-06-28 + max_submissions: 100 + scoring_program: program.zip + reference_data: reference2.zip + starting_kit: submission2.zip + 3: + phasenumber: 3 + label: "Within-Semester" + start_date: 2022-02-07 + max_submissions: 100 + scoring_program: program.zip + reference_data: reference3.zip + starting_kit: submission3.zip +leaderboard: + leaderboards: + RESULTS: &RESULTS + label: Results + rank: 1 + columns: + AUC: + leaderboard: *RESULTS + label: AUC + rank: 1 + numeric_format: 4 + MACRO_F1: + leaderboard: *RESULTS + label: Macro F1 + rank: 2 + numeric_format: 4 + POSITIVE_F1: + leaderboard: *RESULTS + label: Positive F1 + rank: 3 + numeric_format: 4 + ACC: + leaderboard: *RESULTS + label: Accuracy + rank: 4 + numeric_format: 4 + diff --git a/CodaLabPackages/Track1Package/csedm image.jpeg b/CodaLabPackages/Track1Package/csedm image.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..e118ecaa874819b4fcdf8fda03182a3a2065b6b0 Binary files /dev/null and b/CodaLabPackages/Track1Package/csedm image.jpeg differ diff --git a/CodaLabPackages/Track1Package/data.html b/CodaLabPackages/Track1Package/data.html new file mode 100644 index 0000000000000000000000000000000000000000..c4464e8dbf6a91c844302d7455a9a0ec41fe8814 --- /dev/null +++ b/CodaLabPackages/Track1Package/data.html @@ -0,0 +1,3 @@ +<p> + This is the data for the competition. It is to be used responsibly. +</p> \ No newline at end of file diff --git a/CodaLabPackages/Track1Package/evaluation.html b/CodaLabPackages/Track1Package/evaluation.html new file mode 100644 index 0000000000000000000000000000000000000000..a2705a1a3d04dcdd4cbc574b7d2ec634bd268d0b --- /dev/null +++ b/CodaLabPackages/Track1Package/evaluation.html @@ -0,0 +1,5 @@ +<H3>Evaluation Criteria</H3> + +<p> + This is the page that tells you how competition submissions will be evaluated and scored. +</p> \ No newline at end of file diff --git a/CodaLabPackages/Track1Package/overview.html b/CodaLabPackages/Track1Package/overview.html new file mode 100644 index 0000000000000000000000000000000000000000..6fa77c7c2f1a7cb636cbf6805820b042a89749c2 --- /dev/null +++ b/CodaLabPackages/Track1Package/overview.html @@ -0,0 +1,5 @@ +<H3>Welcome!</H3> + +<p> + This is an example competition. +</p> \ No newline at end of file diff --git a/CodaLabPackages/Track1Package/program.zip b/CodaLabPackages/Track1Package/program.zip new file mode 100644 index 0000000000000000000000000000000000000000..d98e2b65ace7c8958e71581ff0e8dcbcf27d0169 Binary files /dev/null and b/CodaLabPackages/Track1Package/program.zip differ diff --git a/CodaLabPackages/Track1Package/program/evaluate.py b/CodaLabPackages/Track1Package/program/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..c081a03eba00e02043b111cea6d7ef2122e929c8 --- /dev/null +++ b/CodaLabPackages/Track1Package/program/evaluate.py @@ -0,0 +1,77 @@ +import sys, os +from sklearn.metrics import f1_score, accuracy_score, roc_auc_score +import pandas as pd +import numpy as np +import time + + +print("vamos a empezar") +time.sleep(2) + +input_dir = sys.argv[1] +output_dir = sys.argv[2] + +print("directorios de entrada y salida ingresados") +time.sleep(2) + +submit_dir = os.path.join(input_dir, 'res') +truth_dir = os.path.join(input_dir, 'ref') + +print("join de los directorios para res y ref realizados") +time.sleep(2) + +if not os.path.isdir(submit_dir): + print("%s doesn't exist" % submit_dir) +else: + print("directorio input + res (submit_dir) si existe") + time.sleep(2) + +if os.path.isdir(submit_dir) and os.path.isdir(truth_dir): + + print("vamos a bien, segundo if") + time.sleep(2) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + else: + print("si existe directorio de salida") + time.sleep(2) + + + + output_filename = os.path.join(output_dir, 'scores.txt') + output_file = open(output_filename, 'wb') + print("archivo score.txt creado correctamente") + time.sleep(2) + + true_csv = pd.read_csv(os.path.join(truth_dir, "truth.csv")) + pred_csv = pd.read_csv(os.path.join(submit_dir, "predictions.csv")) + + assert len(true_csv) == len(pred_csv), "Submission with wrong number of entries: Should be " + str(len(true_csv)) + assert 'SubjectID' in pred_csv.columns and 'ProblemID' in pred_csv.columns and 'Label' in pred_csv.columns, \ + "Submission columns should be: SubjectID, ProblemID, Label" + assert set(true_csv['SubjectID']) == set(pred_csv['SubjectID']), "Submission SubjectIDs do not match." + assert set(true_csv['ProblemID']) == set(pred_csv['ProblemID']), "Submission ProblemIDs do not match." + assert set(pred_csv["Label"]) != set([False,True]), \ + "Submission should include probabilities rather than binary results." + + df = true_csv.set_index(['SubjectID','ProblemID']).join(pred_csv.set_index(['SubjectID','ProblemID']), rsuffix="ScorePrediction") + df["LabelPrediction"] = df["LabelScorePrediction"] > 0.5 + + f1_negative = f1_score(1-df["Label"], 1-df["LabelPrediction"]) + f1_positive = f1_score(df["Label"], df["LabelPrediction"]) + f1 = (f1_negative + f1_positive)/2 + acc = accuracy_score(df["Label"], df["LabelPrediction"]) + auc = roc_auc_score(df["Label"], df["LabelScorePrediction"]) + + + print("MACRO F1: %f" % f1) + print("POSITIVE F1: %f" % f1_positive) + print("ACC: %f" % acc) + print("AUC: %f" % auc) + + output_file.write(b"MACRO_F1: %f \n" % f1) + output_file.write(b"POSITIVE_F1: %f \n" % f1_positive) + output_file.write(b"ACC: %f \n" % acc) + output_file.write(b"AUC: %f \n" % auc) + output_file.close() diff --git a/CodaLabPackages/Track1Package/program/metadata b/CodaLabPackages/Track1Package/program/metadata new file mode 100644 index 0000000000000000000000000000000000000000..9691767d67c02ee03e327f01db36c09765e19310 --- /dev/null +++ b/CodaLabPackages/Track1Package/program/metadata @@ -0,0 +1,2 @@ +command: python $program/evaluate.py $input $output +description: Example competition evaluation program. diff --git a/CodaLabPackages/Track1Package/program/readme.md b/CodaLabPackages/Track1Package/program/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..45f5468da17f9fa851dc08fdd5ba6397d061fa3b --- /dev/null +++ b/CodaLabPackages/Track1Package/program/readme.md @@ -0,0 +1,8 @@ +Building an evaluation program that works with CodaLab + +This example uses python. It assumes python is installed on the codalab worker machines. + +evaluate.py - is an example that loads a single value from each of the gold files, looks for a corresponding submission, and finds the difference. +metadata - this is a file that lists the contents of the program.zip bundle for the CodaLab system. + +Once these pieces are assembled they are packages as program.zip which CodaLab can then use to evaluate the submissions for a competition. \ No newline at end of file diff --git a/CodaLabPackages/Track1Package/submission.zip b/CodaLabPackages/Track1Package/submission.zip new file mode 100644 index 0000000000000000000000000000000000000000..f3a6adfe6868f6d4977baf33b7c662aa58c1e3b9 Binary files /dev/null and b/CodaLabPackages/Track1Package/submission.zip differ diff --git a/CodaLabPackages/Track1Package/submission/scores.txt b/CodaLabPackages/Track1Package/submission/scores.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/CodaLabPackages/Track1Package/terms_and_conditions.html b/CodaLabPackages/Track1Package/terms_and_conditions.html new file mode 100644 index 0000000000000000000000000000000000000000..de518cddc92d8e60b2c90191a4913e743d0f5f04 --- /dev/null +++ b/CodaLabPackages/Track1Package/terms_and_conditions.html @@ -0,0 +1,5 @@ +<H3>Terms and Conditions</H3> + +<p> + This page enumerated the terms and conditions of the competition. +</p> \ No newline at end of file diff --git a/CodaLabPackages/Track2Package/competition.yaml b/CodaLabPackages/Track2Package/competition.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2ffb86bb9c786b0327cca20e322b0b1b3767d5d6 --- /dev/null +++ b/CodaLabPackages/Track2Package/competition.yaml @@ -0,0 +1,29 @@ +title: Example Hello World Competition +description: An example competition where submissions should output "Hello World!" +image: csedm image.jpeg +has_registration: True +html: + overview: overview.html + evaluation: evaluation.html + terms: terms_and_conditions.html + data: data.html +phases: + 1: + phasenumber: 1 + label: "First phase" + start_date: 2013-06-30 + max_submissions: 100 + scoring_program: program.zip + reference_data: reference.zip + starting_kit: submission.zip +leaderboard: + leaderboards: + RESULTS: &RESULTS + label: Results + rank: 1 + columns: + correct: + leaderboard: *RESULTS + label: correct + rank: 1 + numeric_format: 1 diff --git a/CodaLabPackages/Track2Package/csedm image.jpeg b/CodaLabPackages/Track2Package/csedm image.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..e118ecaa874819b4fcdf8fda03182a3a2065b6b0 Binary files /dev/null and b/CodaLabPackages/Track2Package/csedm image.jpeg differ diff --git a/CodaLabPackages/Track2Package/data.html b/CodaLabPackages/Track2Package/data.html new file mode 100644 index 0000000000000000000000000000000000000000..c4464e8dbf6a91c844302d7455a9a0ec41fe8814 --- /dev/null +++ b/CodaLabPackages/Track2Package/data.html @@ -0,0 +1,3 @@ +<p> + This is the data for the competition. It is to be used responsibly. +</p> \ No newline at end of file diff --git a/CodaLabPackages/Track2Package/evaluation.html b/CodaLabPackages/Track2Package/evaluation.html new file mode 100644 index 0000000000000000000000000000000000000000..a2705a1a3d04dcdd4cbc574b7d2ec634bd268d0b --- /dev/null +++ b/CodaLabPackages/Track2Package/evaluation.html @@ -0,0 +1,5 @@ +<H3>Evaluation Criteria</H3> + +<p> + This is the page that tells you how competition submissions will be evaluated and scored. +</p> \ No newline at end of file diff --git a/CodaLabPackages/Track2Package/overview.html b/CodaLabPackages/Track2Package/overview.html new file mode 100644 index 0000000000000000000000000000000000000000..6fa77c7c2f1a7cb636cbf6805820b042a89749c2 --- /dev/null +++ b/CodaLabPackages/Track2Package/overview.html @@ -0,0 +1,5 @@ +<H3>Welcome!</H3> + +<p> + This is an example competition. +</p> \ No newline at end of file diff --git a/CodaLabPackages/Track2Package/program.zip b/CodaLabPackages/Track2Package/program.zip new file mode 100644 index 0000000000000000000000000000000000000000..f809293589691869182bcf4da02252552a01631a Binary files /dev/null and b/CodaLabPackages/Track2Package/program.zip differ diff --git a/CodaLabPackages/Track2Package/program/evaluate.py b/CodaLabPackages/Track2Package/program/evaluate.py new file mode 100644 index 0000000000000000000000000000000000000000..673f3a265507cf931b3d1b297345da2ab8630623 --- /dev/null +++ b/CodaLabPackages/Track2Package/program/evaluate.py @@ -0,0 +1,39 @@ +import sys, os +from sklearn.metrics import mean_squared_error +import pandas as pd + + +input_dir = sys.argv[1] +output_dir = sys.argv[2] + +submit_dir = os.path.join(input_dir, 'res') +truth_dir = os.path.join(input_dir, 'ref') + +if not os.path.isdir(submit_dir): + print("%s doesn't exist" % submit_dir) + +if os.path.isdir(submit_dir) and os.path.isdir(truth_dir): + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + output_filename = os.path.join(output_dir, 'scores.txt') + output_file = open(output_filename, 'wb') + + true_csv = pd.read_csv(os.path.join(truth_dir, "truth.csv")) + pred_csv = pd.read_csv(os.path.join(submit_dir, "predictions.csv")) + + assert len(true_csv) == len(pred_csv), "Submission with wrong number of entries: Should be " + str(len(true_csv)) + assert 'SubjectID' in pred_csv.columns and 'X-Grade' in pred_csv.columns, \ + "Submission columns should be: SubjectID, X-Grade" + assert set(true_csv['SubjectID']) == set(pred_csv['SubjectID']), "Submission SubjectIDs do not match." + assert set(pred_csv["X-Grade"]) != set( + [False, True]), "Submission should be a continuous grade prediction, not binary." + + df = true_csv.set_index('SubjectID').join(pred_csv.set_index('SubjectID'), rsuffix="Prediction") + + mse = mean_squared_error(df["X-Grade"], df["X-GradePrediction"]) + print(mse) + + output_file.write(b"MSE: %f" % mse) + output_file.close() diff --git a/CodaLabPackages/Track2Package/program/metadata b/CodaLabPackages/Track2Package/program/metadata new file mode 100644 index 0000000000000000000000000000000000000000..9691767d67c02ee03e327f01db36c09765e19310 --- /dev/null +++ b/CodaLabPackages/Track2Package/program/metadata @@ -0,0 +1,2 @@ +command: python $program/evaluate.py $input $output +description: Example competition evaluation program. diff --git a/CodaLabPackages/Track2Package/program/readme.md b/CodaLabPackages/Track2Package/program/readme.md new file mode 100644 index 0000000000000000000000000000000000000000..45f5468da17f9fa851dc08fdd5ba6397d061fa3b --- /dev/null +++ b/CodaLabPackages/Track2Package/program/readme.md @@ -0,0 +1,8 @@ +Building an evaluation program that works with CodaLab + +This example uses python. It assumes python is installed on the codalab worker machines. + +evaluate.py - is an example that loads a single value from each of the gold files, looks for a corresponding submission, and finds the difference. +metadata - this is a file that lists the contents of the program.zip bundle for the CodaLab system. + +Once these pieces are assembled they are packages as program.zip which CodaLab can then use to evaluate the submissions for a competition. \ No newline at end of file diff --git a/CodaLabPackages/Track2Package/submission.zip b/CodaLabPackages/Track2Package/submission.zip new file mode 100644 index 0000000000000000000000000000000000000000..48dec4f2d0189fa4669eab4c512fa95017b83194 Binary files /dev/null and b/CodaLabPackages/Track2Package/submission.zip differ diff --git a/CodaLabPackages/Track2Package/terms_and_conditions.html b/CodaLabPackages/Track2Package/terms_and_conditions.html new file mode 100644 index 0000000000000000000000000000000000000000..de518cddc92d8e60b2c90191a4913e743d0f5f04 --- /dev/null +++ b/CodaLabPackages/Track2Package/terms_and_conditions.html @@ -0,0 +1,5 @@ +<H3>Terms and Conditions</H3> + +<p> + This page enumerated the terms and conditions of the competition. +</p> \ No newline at end of file diff --git a/ProgSnap2.py b/ProgSnap2.py new file mode 100644 index 0000000000000000000000000000000000000000..f639e6e8386edcab608240158d86cc56df141b7d --- /dev/null +++ b/ProgSnap2.py @@ -0,0 +1,205 @@ +import pandas as pd +import os +from os import path + + +class PS2: + """ A class holding constants used to get columns of a PS2 dataset + """ + + Order = 'Order' + SubjectID = 'SubjectID' + ToolInstances = 'ToolInstances' + ServerTimestamp = 'ServerTimestamp' + ServerTimezone = 'ServerTimezone' + CourseID = 'CourseID' + CourseSectionID = 'CourseSectionID' + AssignmentID = 'AssignmentID' + ProblemID = 'ProblemID' + Attempt = 'Attempt' + CodeStateID = 'CodeStateID' + IsEventOrderingConsistent = 'IsEventOrderingConsistent' + EventType = 'EventType' + Score = 'Score' + CompileResult = 'CompileResult' + CompileMessageType = 'CompileMessageType' + CompileMessageData = 'CompileMessageData' + EventID = 'EventID' + ParentEventID = 'ParentEventID' + SourceLocation = 'SourceLocation' + Code = 'Code' + + Version = 'Version' + IsEventOrderingConsistent = 'IsEventOrderingConsistent' + EventOrderScope = 'EventOrderScope' + EventOrderScopeColumns = 'EventOrderScopeColumns' + CodeStateRepresentation = 'CodeStateRepresentation' + + +class ProgSnap2Dataset: + + MAIN_TABLE_FILE = 'MainTable.csv' + METADATA_TABLE_FILE = 'DatasetMetadata.csv' + LINK_TABLE_DIR = 'LinkTables' + CODE_STATES_DIR = 'CodeStates' + CODE_STATES_TABLE_FILE = os.path.join(CODE_STATES_DIR, 'CodeStates.csv') + + def __init__(self, directory): + self.directory = directory + self.main_table = None + self.metadata_table = None + self.code_states_table = None + + def path(self, local_path): + return path.join(self.directory, local_path) + + def get_main_table(self): + """ Returns a Pandas DataFrame with the main event table for this dataset + """ + if self.main_table is None: + self.main_table = pd.read_csv(self.path(ProgSnap2Dataset.MAIN_TABLE_FILE)) + if self.get_metadata_property(PS2.IsEventOrderingConsistent): + order_scope = self.get_metadata_property(PS2.EventOrderScope) + if order_scope == 'Global': + # If the table is globally ordered, sort it + self.main_table.sort_values(by=[PS2.Order], inplace=True) + elif order_scope == 'Restricted': + # If restricted ordered, sort first by grouping columns, then by order + order_columns = self.get_metadata_property(PS2.EventOrderScopeColumns) + if order_columns is None or len(order_columns) == 0: + raise Exception('EventOrderScope is restricted by no EventOrderScopeColumns given') + columns = order_columns.split(';') + columns.append('Order') + # The result is that _within_ these groups, events are ordered + self.main_table.sort_values(by=columns, inplace=True) + return self.main_table.copy() + + def set_main_table(self, main_table): + """ Overwrites the main table loaded from the file with the provided table. + This this table will be used for future operations, including copying the dataset. + """ + self.main_table = main_table.copy() + + def get_code_states_table(self): + """ Returns a Pandas DataFrame with the code states table form this dataset + """ + if self.code_states_table is None: + self.code_states_table = pd.read_csv(self.path(ProgSnap2Dataset.CODE_STATES_TABLE_FILE)) + return self.code_states_table.copy() + + def get_metadata_property(self, property): + """ Returns the value of a given metadata property in the metadata table + """ + if self.metadata_table is None: + self.metadata_table = pd.read_csv(self.path(ProgSnap2Dataset.METADATA_TABLE_FILE)) + + values = self.metadata_table[self.metadata_table['Property'] == property]['Value'] + if len(values) == 1: + return values.iloc[0] + if len(values) > 1: + raise Exception('Multiple values for property: ' + property) + + # Default return values as of V6 + if property == PS2.IsEventOrderingConsistent: + return False + if property == PS2.EventOrderScope: + return 'None' + if property == PS2.EventOrderScopeColumns: + return '' + + return None + + def __link_table_path(self): + return self.path(ProgSnap2Dataset.LINK_TABLE_DIR) + + def list_link_tables(self): + """ Returns a list of the link tables in this dataset, which can be loaded with load_link_table + """ + path = self.__link_table_path() + dirs = os.listdir(path) + return [f for f in dirs if os.path.isfile(os.path.join(path, f)) and f.endswith('.csv')] + + def load_link_table(self, link_table): + """ Returns a Pandas DataFrame with the link table with the given name + :param link_table: The link table nme or file + """ + if not link_table.endswith('.csv'): + link_table += '.csv' + return pd.read_csv(path.join(self.__link_table_path(), link_table)) + + def drop_main_table_column(self, column): + self.get_main_table() + self.main_table.drop(column, axis=1, inplace=True) + + def save_subset(self, path, main_table_filterer, copy_link_tables=True): + os.makedirs(os.path.join(path, ProgSnap2Dataset.CODE_STATES_DIR), exist_ok=True) + main_table = main_table_filterer(self.get_main_table()) + main_table.to_csv(os.path.join(path, ProgSnap2Dataset.MAIN_TABLE_FILE), index=False) + code_state_ids = main_table[PS2.CodeStateID].unique() + code_states = self.get_code_states_table() + code_states = code_states[code_states[PS2.CodeStateID].isin(code_state_ids)] + code_states.to_csv(os.path.join(path, ProgSnap2Dataset.CODE_STATES_DIR, 'CodeStates.csv'), index=False) + self.metadata_table.to_csv(os.path.join(path, ProgSnap2Dataset.METADATA_TABLE_FILE), index=False) + + if not copy_link_tables: + return + + os.makedirs(os.path.join(path, ProgSnap2Dataset.LINK_TABLE_DIR), exist_ok=True) + + def indexify(x): + return tuple(x) if len(x) > 1 else x[0] + + for link_table_name in self.list_link_tables(): + link_table = self.load_link_table(link_table_name) + columns = [col for col in link_table.columns if col.endswith('ID') and col in main_table.columns] + distinct_ids = main_table.groupby(columns).apply(lambda x: True) + # TODO: Still need to test this with multi-ID link tables + to_keep = [indexify(list(row)) in distinct_ids for index, row in link_table[columns].iterrows()] + filtered_link_table = link_table[to_keep] + filtered_link_table.to_csv(os.path.join(path, ProgSnap2Dataset.LINK_TABLE_DIR, link_table_name), index=False) + + + + @staticmethod + def __to_one(lst, error): + if len(lst) == 0: + return None + if len(lst) > 1: + raise Exception(error or 'Should have only one result!') + return lst.iloc[0] + + def get_code_for_id(self, code_state_id): + if code_state_id is None: + return None + code_states = self.get_code_states_table() + code = code_states[code_states[PS2.CodeStateID] == code_state_id][PS2.Code] + return ProgSnap2Dataset.__to_one(code, 'Multiple code states match that ID.') + + def get_code_for_event_id(self, row_id): + events = self.get_main_table() + code_state_ids = events[events[PS2.EventID == row_id]][PS2.CodeStateID] + code_state_id = ProgSnap2Dataset.__to_one(code_state_ids, 'Multiple rows match that ID.') + return self.get_code_for_id(code_state_id) + + def get_subject_ids(self): + events = self.get_main_table() + return events[PS2.SubjectID].unique() + + def get_problem_ids(self): + events = self.get_main_table() + return events[PS2.ProblemID].unique() + + def get_trace(self, subject_id, problem_id): + events = self.get_main_table() + rows = events[(events[PS2.SubjectID] == subject_id) & (events[PS2.ProblemID] == problem_id)] + ids = rows[PS2.CodeStateID].unique() + return [self.get_code_for_id(code_state_id) for code_state_id in ids] + + +if __name__ == '__main__': + data = ProgSnap2Dataset('data/CodeWorkout/S19') + # for code in data.get_trace('4d230b683bf9840553ae57f4acc96e81', 32): + # print(code) + # print('-------') + + data.save_subset('data/test/CopyA', lambda df: df[df[PS2.SubjectID].str.startswith('a')]) diff --git a/analisis de la matriz de x_train.png b/analisis de la matriz de x_train.png new file mode 100644 index 0000000000000000000000000000000000000000..b8c60e58096fc9990491c957e866ec861a0d23c8 Binary files /dev/null and b/analisis de la matriz de x_train.png differ diff --git a/code_feature_model.ipynb b/code_feature_model.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..d200f7d3c6f1edac75e7d9f8d18f53ba846edbe1 --- /dev/null +++ b/code_feature_model.ipynb @@ -0,0 +1,1367 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from ProgSnap2 import ProgSnap2Dataset\n", + "from ProgSnap2 import PS2\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "import numpy as np\n", + "import os\n", + "from os import path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "semester = 'S19'\n", + "BASE_PATH = os.path.join('data', 'Release', semester)\n", + "TRAIN_PATH = os.path.join(BASE_PATH, 'Train')\n", + "TEST_PATH = os.path.join(BASE_PATH, 'Test')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "train_ps2 = ProgSnap2Dataset(os.path.join(TRAIN_PATH, 'Data')) " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Attempts</th>\n", + " <th>CorrectEventually</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>5</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>12</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Attempts \\\n", + "0 04c32d4d95425f73b3a1d6502aed4d48 439.0 1 1 \n", + "1 04c32d4d95425f73b3a1d6502aed4d48 439.0 3 2 \n", + "2 04c32d4d95425f73b3a1d6502aed4d48 439.0 5 3 \n", + "3 04c32d4d95425f73b3a1d6502aed4d48 439.0 12 1 \n", + "4 04c32d4d95425f73b3a1d6502aed4d48 439.0 13 2 \n", + "\n", + " CorrectEventually Label \n", + "0 True True \n", + "1 True True \n", + "2 True True \n", + "3 True True \n", + "4 True True " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The early dataset will help us to feature extraction,\n", + "# but we're not actually predicting anything here\n", + "# Note: we could still use this for model training if desired.\n", + "early_train = pd.read_csv(os.path.join(TRAIN_PATH, 'early.csv'))\n", + "early_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>41</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>43</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>44</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>46</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>49</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Label\n", + "0 04c32d4d95425f73b3a1d6502aed4d48 494.0 41 False\n", + "1 04c32d4d95425f73b3a1d6502aed4d48 494.0 43 True\n", + "2 04c32d4d95425f73b3a1d6502aed4d48 494.0 44 True\n", + "3 04c32d4d95425f73b3a1d6502aed4d48 494.0 46 True\n", + "4 04c32d4d95425f73b3a1d6502aed4d48 494.0 49 True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The late dataset contains the problems that we're actually predicting for.\n", + "# The training portion of it includes labels.\n", + "late_train = pd.read_csv(os.path.join(TRAIN_PATH, 'late.csv'))\n", + "late_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_base = late_train.copy().drop('Label', axis=1)\n", + "y_train = late_train['Label'].values" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "problem_encoder = OneHotEncoder().fit(X_train_base[PS2.ProblemID].values.reshape(-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0., ..., 0., 0., 0.],\n", + " [0., 1., 0., ..., 0., 0., 0.],\n", + " [0., 0., 1., ..., 0., 0., 0.],\n", + " ...,\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 1., 0.],\n", + " [0., 0., 0., ..., 0., 0., 1.]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "problem_encoder.transform(X_train_base[PS2.ProblemID].values.reshape(-1, 1)).toarray()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def get_code_table(ps2_dataset):\n", + " events_table = ps2_dataset.get_main_table()\n", + " code_states = ps2_dataset.get_code_states_table()\n", + " runs = events_table.merge(code_states, on=PS2.CodeStateID)\n", + " runs = runs[runs[PS2.EventType] == 'Run.Program']\n", + " runs = runs[[PS2.Order, PS2.SubjectID, PS2.ProblemID, 'Code']]\n", + " return runs" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Order</th>\n", + " <th>SubjectID</th>\n", + " <th>ProblemID</th>\n", + " <th>Code</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>119441</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>1</td>\n", + " <td>public int sortaSum(int a, int b)\\r\\n{\\r\\n ...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>134115</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>3</td>\n", + " <td>public boolean in1To10(int n, boolean outsideM...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>134117</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>3</td>\n", + " <td>public boolean in1To10(int n, boolean outsideM...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>65403</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>5</td>\n", + " <td>public boolean answerCell(boolean isMorning, b...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>65407</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>5</td>\n", + " <td>public boolean answerCell(boolean isMorning, b...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>134497</th>\n", + " <td>113478</td>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>70</td>\n", + " <td>public boolean twoTwo(int[] nums)\\r\\n{\\r\\n ...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>134500</th>\n", + " <td>113483</td>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>70</td>\n", + " <td>public boolean twoTwo(int[] nums)\\r\\n{\\r\\n ...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>134502</th>\n", + " <td>108282</td>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>71</td>\n", + " <td>public boolean canBalance(int[] nums)\\r\\n{\\r\\n...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>134504</th>\n", + " <td>110202</td>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>112</td>\n", + " <td>public int[] seriesUp(int n)\\r\\n{\\r\\n int[]...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>134506</th>\n", + " <td>81273</td>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>118</td>\n", + " <td>public int[] shiftLeft(int[] nums)\\r\\n{\\r\\n ...</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>46825 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Order SubjectID ProblemID \\\n", + "0 119441 04c32d4d95425f73b3a1d6502aed4d48 1 \n", + "2 134115 04c32d4d95425f73b3a1d6502aed4d48 3 \n", + "4 134117 04c32d4d95425f73b3a1d6502aed4d48 3 \n", + "6 65403 04c32d4d95425f73b3a1d6502aed4d48 5 \n", + "10 65407 04c32d4d95425f73b3a1d6502aed4d48 5 \n", + "... ... ... ... \n", + "134497 113478 ffb72475a81de0e95b910ffad039f5c2 70 \n", + "134500 113483 ffb72475a81de0e95b910ffad039f5c2 70 \n", + "134502 108282 ffb72475a81de0e95b910ffad039f5c2 71 \n", + "134504 110202 ffb72475a81de0e95b910ffad039f5c2 112 \n", + "134506 81273 ffb72475a81de0e95b910ffad039f5c2 118 \n", + "\n", + " Code \n", + "0 public int sortaSum(int a, int b)\\r\\n{\\r\\n ... \n", + "2 public boolean in1To10(int n, boolean outsideM... \n", + "4 public boolean in1To10(int n, boolean outsideM... \n", + "6 public boolean answerCell(boolean isMorning, b... \n", + "10 public boolean answerCell(boolean isMorning, b... \n", + "... ... \n", + "134497 public boolean twoTwo(int[] nums)\\r\\n{\\r\\n ... \n", + "134500 public boolean twoTwo(int[] nums)\\r\\n{\\r\\n ... \n", + "134502 public boolean canBalance(int[] nums)\\r\\n{\\r\\n... \n", + "134504 public int[] seriesUp(int n)\\r\\n{\\r\\n int[]... \n", + "134506 public int[] shiftLeft(int[] nums)\\r\\n{\\r\\n ... \n", + "\n", + "[46825 rows x 4 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code_table_train = get_code_table(train_ps2)\n", + "code_table_train" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'public': 19,\n", + " 'int': 14,\n", + " 'if': 13,\n", + " '10': 0,\n", + " 'return': 21,\n", + " 'else': 7,\n", + " 'boolean': 3,\n", + " 'true': 28,\n", + " 'false': 10,\n", + " 'speed': 23,\n", + " 'string': 25,\n", + " 'day': 6,\n", + " '21': 1,\n", + " 'str': 24,\n", + " 'substring': 26,\n", + " 'length': 15,\n", + " 'num': 17,\n", + " 'small': 22,\n", + " 'big': 2,\n", + " 'goal': 12,\n", + " 'for': 11,\n", + " 'end': 8,\n", + " 'equals': 9,\n", + " 'word': 29,\n", + " 'new': 16,\n", + " 'count': 5,\n", + " 'charat': 4,\n", + " 'nums': 18,\n", + " 'sum': 27,\n", + " 'result': 20}" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We want to find a consistent, common vocabulary across all problems\n", + "# so we first build our vocabulary for all code submissions\n", + "\n", + "from sklearn.feature_extraction.text import TfidfVectorizer\n", + "\n", + "# Note this approach is _very_ naive, since it's using NLP assumptions\n", + "# about tokenizing, among other things, but it is good enough for a demonstration.\n", + "code_vectorizer = TfidfVectorizer(max_features=30)\n", + "code_vectorizer.fit(code_table_train['Code'])\n", + "top_vocab = code_vectorizer.vocabulary_\n", + "top_vocab" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "50" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# We want to create a separate encoder for each problem, since the\n", + "# \"document frequency\" part of TF-IDF should be calibrated separately\n", + "# for each problem.\n", + "code_problem_encoders = {}\n", + "def create_encoder(rows):\n", + " code = rows['Code']\n", + " problem_id = rows[PS2.ProblemID].iloc[0]\n", + " code_vectorizer = TfidfVectorizer(vocabulary=top_vocab)\n", + " code_vectorizer.fit(code)\n", + " code_problem_encoders[problem_id] = code_vectorizer\n", + " \n", + "code_table_train.groupby(PS2.ProblemID).apply(create_encoder)\n", + "len(code_problem_encoders)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "public int sortaSum(int a, int b)\r\n", + "{\r\n", + " if (a + b >= 10)\r\n", + " {\r\n", + " if (a + b <= 19)\r\n", + " {\r\n", + " return 20;\r\n", + " }\r\n", + " return a + b;\r\n", + " }\r\n", + " else\r\n", + " {\r\n", + " return a + b;\r\n", + " }\r\n", + "}\r\n", + "\n", + " (0, 21)\t0.5929619322827931\n", + " (0, 19)\t0.19320378937452193\n", + " (0, 14)\t0.5796113681235658\n", + " (0, 13)\t0.39681148818701084\n", + " (0, 7)\t0.2744965584355051\n", + " (0, 0)\t0.20569731847202877\n" + ] + } + ], + "source": [ + "test_code = code_table_train['Code'].iloc[0]\n", + "print(test_code)\n", + "print(code_problem_encoders[1].transform([test_code]))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_instance_features(instance, early_df):\n", + " instance = instance.copy()\n", + " subject_id = instance[PS2.SubjectID]\n", + " early_problems = early_df[early_df[PS2.SubjectID] == subject_id]\n", + " # Extract very naive features about the student\n", + " # (without respect to the problem bring predicted)\n", + " # Number of early problems attempted\n", + " instance['ProblemsAttempted'] = early_problems.shape[0]\n", + " # Percentage of early problems gotten correct eventually\n", + " instance['PercCorrectEventually'] = np.mean(early_problems['CorrectEventually'])\n", + " # Median attempts made on early problems\n", + " instance['MedAttempts'] = np.median(early_problems['Attempts'])\n", + " # Max attempts made on early problems\n", + " instance['MaxAttempts'] = np.max(early_problems['Attempts'])\n", + " # Percentage of problems gotten correct on the first try\n", + " instance['PercCorrectFirstTry'] = np.mean(early_problems['Attempts'] == 1)\n", + " \n", + " instance = instance.drop('SubjectID')\n", + " return instance" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AssignmentID 494\n", + "ProblemID 41\n", + "ProblemsAttempted 30\n", + "PercCorrectEventually 1\n", + "MedAttempts 6.5\n", + "MaxAttempts 45\n", + "PercCorrectFirstTry 0.166667\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "extract_instance_features(X_train_base.iloc[0], early_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_instance_code_features(instance, early_df, code_table):\n", + " subject_id = instance[PS2.SubjectID]\n", + " problem_id = instance[PS2.ProblemID]\n", + " \n", + " # Get all attempts for this problem by this subject\n", + " attempts = code_table[(code_table[PS2.SubjectID] == subject_id) & \\\n", + " (code_table[PS2.ProblemID] == problem_id)]\n", + " # Get the code of the last attempt (we could use others but don't here)\n", + " encoder = code_problem_encoders[problem_id]\n", + " # If for some reason there were no attempts, return 0s\n", + " if (attempts.shape[0] == 0):\n", + " return encoder.transform([\"\"])\n", + " last_attempt = attempts.sort_values('Order')['Code'].iloc[-1]\n", + " code_features = encoder.transform([last_attempt])\n", + " \n", + " return code_features" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " (0, 21)\t0.22116095645566827\n", + " (0, 19)\t0.2128142305637896\n", + " (0, 18)\t0.8512569222551584\n", + " (0, 14)\t0.4256284611275792\n" + ] + } + ], + "source": [ + "print(extract_instance_code_features(X_train_base.iloc[0], early_train, code_table_train))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<5x30 sparse matrix of type '<class 'numpy.float64'>'\n", + "\twith 37 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Test how to stack code features across instances\n", + "from scipy.sparse import vstack\n", + "import functools\n", + "\n", + "code_features = X_train_base.iloc[:5].apply(\\\n", + " lambda instance: extract_instance_code_features(\\\n", + " instance, early_train, code_table_train), axis=1)\n", + "\n", + "vstack(code_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_features(X, early_df, code_table, scaler, is_train):\n", + " # First extract performance features for each row\n", + " features = X.apply(lambda instance: extract_instance_features(instance, early_df), axis=1)\n", + " # Then get code features\n", + " code_features = X.apply(lambda instance: extract_instance_code_features(\\\n", + " instance, early_df, code_table), axis=1)\n", + " code_features = vstack(code_features).toarray()\n", + " \n", + " # Then one-hot encode the problem_id and append it\n", + " problem_ids = problem_encoder.transform(features[PS2.ProblemID].values.reshape(-1, 1)).toarray()\n", + " # Then get rid of nominal features\n", + " features.drop([PS2.AssignmentID, PS2.ProblemID], axis=1, inplace=True)\n", + " # Then scale the continuous features, fitting the scaler if this is training\n", + " if is_train:\n", + " scaler.fit(features)\n", + " features = scaler.transform(features)\n", + " \n", + " # Return continuous and one-hot features together\n", + " return np.concatenate([features, code_features, problem_ids], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "scaler = StandardScaler()\n", + "X_train = extract_features(X_train_base, early_train, code_table_train, scaler, True)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4201, 55)\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 0.51751812, 0.58371895, 1.76922077, 1.70602676, -0.89569333,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0.42562846,\n", + " 0. , 0. , 0. , 0.85125692, 0.21281423,\n", + " 0. , 0.22116096, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ],\n", + " [ 0.51751812, 0.58371895, 1.76922077, 1.70602676, -0.89569333,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0.26622699, 0. , 0. ,\n", + " 0. , 0. , 0. , 0.22181069, 0.57514592,\n", + " 0.21929281, 0.38891882, 0. , 0.57514592, 0.09585765,\n", + " 0. , 0.09900339, 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 1. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ]])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(X_train.shape)\n", + "X_train[:2,]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate the Training Performance of the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegressionCV\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "\n", + "# We set an alpha value; otherwise the model overfits with 100% accuracy\n", + "# If we were being rigorous, we would set this using hyperparameter tuning\n", + "model = RandomForestClassifier(ccp_alpha=0.001)\n", + "model.fit(X_train, y_train)\n", + "train_predictions = model.predict(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " False 0.43 0.89 0.58 518\n", + " True 0.98 0.83 0.90 3683\n", + "\n", + " accuracy 0.84 4201\n", + " macro avg 0.70 0.86 0.74 4201\n", + "weighted avg 0.91 0.84 0.86 4201\n", + "\n", + "AUC: 0.86040290513546\n", + "Macro F1: 0.7377652933832709\n" + ] + } + ], + "source": [ + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import roc_auc_score\n", + "from sklearn.metrics import f1_score\n", + "\n", + "print(classification_report(train_predictions, y_train))\n", + "print('AUC: ' + str(roc_auc_score(train_predictions, y_train)))\n", + "print('Macro F1: ' + str(f1_score(train_predictions, y_train, average='macro')))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<sklearn.metrics._plot.roc_curve.RocCurveDisplay at 0x1c53d1fd608>" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import plot_roc_curve\n", + "\n", + "plot_roc_curve(model, X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 9 28 31 6 34 17 11 7 45 29 42 53 14 36 27 39 41 40 48 54 46 35 49 38\n", + " 50 43 22 30 52 25 51 13 47 44 10 32 5 37 15 12 8 21 33 0 23 16 18 19\n", + " 20 26 3 24 2 1 4]\n" + ] + }, + { + "data": { + "text/plain": [ + "array([2.20181309e-02, 1.30740725e-01, 1.25642876e-01, 7.78558802e-02,\n", + " 1.71925190e-01, 8.73186425e-03, 0.00000000e+00, 6.09759997e-05,\n", + " 1.57659859e-02, 0.00000000e+00, 7.27993663e-03, 0.00000000e+00,\n", + " 1.51091871e-02, 3.42760977e-03, 9.11457338e-04, 1.22381474e-02,\n", + " 3.29994376e-02, 0.00000000e+00, 3.32649011e-02, 4.05234308e-02,\n", + " 4.69469226e-02, 1.62030641e-02, 2.25987032e-03, 2.81709411e-02,\n", + " 8.25541671e-02, 2.90669657e-03, 4.85810729e-02, 1.03981661e-03,\n", + " 0.00000000e+00, 4.79588211e-04, 2.38210644e-03, 0.00000000e+00,\n", + " 7.65973842e-03, 1.93997924e-02, 0.00000000e+00, 1.35906502e-03,\n", + " 9.82319379e-04, 1.03365796e-02, 1.50237139e-03, 1.17278967e-03,\n", + " 1.19422740e-03, 1.19220858e-03, 7.76112145e-04, 2.21994738e-03,\n", + " 4.11228179e-03, 4.42651252e-04, 1.32946917e-03, 3.96809141e-03,\n", + " 1.30278686e-03, 1.37757687e-03, 2.12040369e-03, 2.94789037e-03,\n", + " 2.45866110e-03, 8.13472736e-04, 1.31158028e-03])" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Look at which features were important\n", + "print(np.argsort(model.feature_importances_))\n", + "model.feature_importances_" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['AssignmentID', 'ProblemID', 'ProblemsAttempted',\n", + " 'PercCorrectEventually', 'MedAttempts', 'MaxAttempts',\n", + " 'PercCorrectFirstTry'],\n", + " dtype='object')" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The first 7 featurs are from attempts.\n", + "# The top-4 most important are from these.\n", + "extract_instance_features(X_train_base.iloc[0], early_train).index" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['10',\n", + " '21',\n", + " 'big',\n", + " 'boolean',\n", + " 'charat',\n", + " 'count',\n", + " 'day',\n", + " 'else',\n", + " 'end',\n", + " 'equals',\n", + " 'false',\n", + " 'for',\n", + " 'goal',\n", + " 'if',\n", + " 'int',\n", + " 'length',\n", + " 'new',\n", + " 'num',\n", + " 'nums',\n", + " 'public',\n", + " 'result',\n", + " 'return',\n", + " 'small',\n", + " 'speed',\n", + " 'str',\n", + " 'string',\n", + " 'substring',\n", + " 'sum',\n", + " 'true',\n", + " 'word']" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The next 30 features come from the vocbulary, and the next 20 are the test problem ID\n", + "sorted(top_vocab)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "num\n", + "public\n", + "goal\n", + "if\n", + "for\n" + ] + } + ], + "source": [ + "# The 5th highest importance feature is #24\n", + "# The variable name num\n", + "print(sorted(top_vocab)[24 - 7])\n", + "# public, perhaps used if declaring a helper method\n", + "print(sorted(top_vocab)[26 - 7])\n", + "# The variable name goal\n", + "print(sorted(top_vocab)[19 - 7])\n", + "# if\n", + "print(sorted(top_vocab)[20 - 7])\n", + "# for\n", + "print(sorted(top_vocab)[18 - 7])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate the CV Performance of the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.7621963648717514\n", + "AUC: 0.7716510786626823\n", + "Macro F1: 0.6024647798356945\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import cross_validate\n", + "\n", + "model = RandomForestClassifier(ccp_alpha=0.001)\n", + "cv_results = cross_validate(model, X_train, y_train, cv=5, scoring=['accuracy', 'f1_macro', 'roc_auc'])\n", + "print(f'Accuracy: {np.mean(cv_results[\"test_accuracy\"])}')\n", + "print(f'AUC: {np.mean(cv_results[\"test_roc_auc\"])}')\n", + "print(f'Macro F1: {np.mean(cv_results[\"test_f1_macro\"])}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predict on the test data" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "early_test = pd.read_csv(os.path.join(TEST_PATH, 'early.csv'))\n", + "late_test = pd.read_csv(os.path.join(TEST_PATH, 'late.csv'))\n", + "\n", + "test_ps2 = ProgSnap2Dataset(os.path.join(TEST_PATH, 'Data'))\n", + "code_table_test = get_code_table(test_ps2)\n", + "\n", + "X_test = extract_features(late_test, early_test, code_table_test, scaler, False)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1511, 55)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "model = RandomForestClassifier(ccp_alpha=0.001)\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict_proba(X_test)[:,1]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.54347246, 0.53288555, 0.55693535, ..., 0.49766704, 0.50718469,\n", + " 0.50822475])" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>036ad3e516c5bf3a4b3be35b137bcbb8</td>\n", + " <td>494.0</td>\n", + " <td>41</td>\n", + " <td>0.543472</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>036ad3e516c5bf3a4b3be35b137bcbb8</td>\n", + " <td>494.0</td>\n", + " <td>43</td>\n", + " <td>0.532886</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>036ad3e516c5bf3a4b3be35b137bcbb8</td>\n", + " <td>494.0</td>\n", + " <td>44</td>\n", + " <td>0.556935</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>036ad3e516c5bf3a4b3be35b137bcbb8</td>\n", + " <td>494.0</td>\n", + " <td>46</td>\n", + " <td>0.530884</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>036ad3e516c5bf3a4b3be35b137bcbb8</td>\n", + " <td>494.0</td>\n", + " <td>49</td>\n", + " <td>0.550823</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1506</th>\n", + " <td>fc5f86251458722c799d1830fa0c2c1f</td>\n", + " <td>494.0</td>\n", + " <td>67</td>\n", + " <td>0.504612</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1507</th>\n", + " <td>fc5f86251458722c799d1830fa0c2c1f</td>\n", + " <td>494.0</td>\n", + " <td>104</td>\n", + " <td>0.501157</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1508</th>\n", + " <td>fc5f86251458722c799d1830fa0c2c1f</td>\n", + " <td>494.0</td>\n", + " <td>106</td>\n", + " <td>0.497667</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1509</th>\n", + " <td>fc5f86251458722c799d1830fa0c2c1f</td>\n", + " <td>494.0</td>\n", + " <td>107</td>\n", + " <td>0.507185</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1510</th>\n", + " <td>fc5f86251458722c799d1830fa0c2c1f</td>\n", + " <td>494.0</td>\n", + " <td>108</td>\n", + " <td>0.508225</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>1511 rows × 4 columns</p>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Label\n", + "0 036ad3e516c5bf3a4b3be35b137bcbb8 494.0 41 0.543472\n", + "1 036ad3e516c5bf3a4b3be35b137bcbb8 494.0 43 0.532886\n", + "2 036ad3e516c5bf3a4b3be35b137bcbb8 494.0 44 0.556935\n", + "3 036ad3e516c5bf3a4b3be35b137bcbb8 494.0 46 0.530884\n", + "4 036ad3e516c5bf3a4b3be35b137bcbb8 494.0 49 0.550823\n", + "... ... ... ... ...\n", + "1506 fc5f86251458722c799d1830fa0c2c1f 494.0 67 0.504612\n", + "1507 fc5f86251458722c799d1830fa0c2c1f 494.0 104 0.501157\n", + "1508 fc5f86251458722c799d1830fa0c2c1f 494.0 106 0.497667\n", + "1509 fc5f86251458722c799d1830fa0c2c1f 494.0 107 0.507185\n", + "1510 fc5f86251458722c799d1830fa0c2c1f 494.0 108 0.508225\n", + "\n", + "[1511 rows x 4 columns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions_df = late_test.copy()\n", + "predictions_df['Label'] = predictions\n", + "predictions_df" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "# We don't have the test labels - you have to submit to evaluate it" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "# We use res/predictions.csv, since that's where the scoring rogram expects it\n", + "# but you can change this directory\n", + "path = os.path.join('data', 'Prediction', semester, 'code_RF_task1', 'res')\n", + "os.makedirs(path, exist_ok=True)\n", + "predictions_df.to_csv(os.path.join(path, 'predictions.csv'), index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/compare_semesters.ipynb b/compare_semesters.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a92d1e62c16461b44320100832cf91caf85c4ae9 --- /dev/null +++ b/compare_semesters.ipynb @@ -0,0 +1,565 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from ProgSnap2 import ProgSnap2Dataset\n", + "from ProgSnap2 import PS2\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import train_test_split\n", + "import numpy as np\n", + "import os\n", + "from os import path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "PATH = \"data/CodeWorkout/\"\n", + "\n", + "s19_ps2 = ProgSnap2Dataset(PATH + 'S19')\n", + "f19_ps2 = ProgSnap2Dataset(PATH + 'F19')\n", + "\n", + "s19 = s19_ps2.get_main_table()\n", + "f19 = f19_ps2.get_main_table()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "s19_problems = set(s19[PS2.ProblemID].unique())\n", + "f19_problems = set(f19[PS2.ProblemID].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([439., 487., 492., 494., 502.])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.sort(s19[PS2.AssignmentID].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([597, 600, 609, 615, 622, 631], dtype=int64)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.sort(f19[PS2.AssignmentID].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "48" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 48 problems the same\n", + "len(s19_problems.intersection(f19_problems))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{45, 48}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Problems 45 and 48 were dropped (from assignment 5)\n", + "dropped_problems = s19_problems.difference(f19_problems)\n", + "dropped_problems" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{30, 171, 241, 242, 243, 244, 245, 246, 254, 255, 736, 737}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 12 problems were added\n", + "added_problems = f19_problems.difference(s19_problems)\n", + "added_problems" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>73845</th>\n", + " <td>622</td>\n", + " <td>241</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73862</th>\n", + " <td>622</td>\n", + " <td>171</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73880</th>\n", + " <td>622</td>\n", + " <td>30</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73900</th>\n", + " <td>622</td>\n", + " <td>244</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73904</th>\n", + " <td>622</td>\n", + " <td>245</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73912</th>\n", + " <td>622</td>\n", + " <td>246</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73922</th>\n", + " <td>622</td>\n", + " <td>254</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73927</th>\n", + " <td>622</td>\n", + " <td>255</td>\n", + " </tr>\n", + " <tr>\n", + " <th>73973</th>\n", + " <td>622</td>\n", + " <td>242</td>\n", + " </tr>\n", + " <tr>\n", + " <th>74023</th>\n", + " <td>622</td>\n", + " <td>243</td>\n", + " </tr>\n", + " <tr>\n", + " <th>92628</th>\n", + " <td>631</td>\n", + " <td>736</td>\n", + " </tr>\n", + " <tr>\n", + " <th>92639</th>\n", + " <td>631</td>\n", + " <td>737</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " AssignmentID ProblemID\n", + "73845 622 241\n", + "73862 622 171\n", + "73880 622 30\n", + "73900 622 244\n", + "73904 622 245\n", + "73912 622 246\n", + "73922 622 254\n", + "73927 622 255\n", + "73973 622 242\n", + "74023 622 243\n", + "92628 631 736\n", + "92639 631 737" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 10 Were added to a new assignment and 2 replace the old problems in assignment 5\n", + "f19[f19[PS2.ProblemID].isin(added_problems)][[PS2.AssignmentID, PS2.ProblemID]].drop_duplicates()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def time_stats(df):\n", + " df = df.copy()\n", + " df['TimeInt'] = pd.to_datetime(df[PS2.ServerTimestamp]).apply(lambda x: x.value)\n", + " med_time = df.groupby([PS2.AssignmentID, PS2.ProblemID])['TimeInt'].apply(lambda x: np.median(x))\n", + " # df = df.merge(med_time.to_frame('MedTime'), on=[PS2.AssignmentID, PS2.ProblemID])\n", + " return med_time.reset_index().sort_values('TimeInt')\n", + "\n", + "s19_times = time_stats(s19)\n", + "f19_times = time_stats(f19)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<matplotlib.collections.PathCollection at 0x2102ad0f8c8>" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "y = range(0, len(s19_times.index))\n", + "colors = s19_times[PS2.ProblemID].isin(dropped_problems).apply(lambda x: 'red' if x else 'blue')\n", + "plt.scatter(s19_times['TimeInt'], y, color=colors)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<matplotlib.collections.PathCollection at 0x2102af98708>" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# The added assignment comes between 4 and 5 in spring\n", + "# We could just skip this assignment for F19->S19 prediction...\n", + "y = range(0, len(f19_times.index))\n", + "colors = f19_times[PS2.ProblemID].isin(added_problems).apply(lambda x: 'red' if x else 'blue')\n", + "plt.scatter(f19_times['TimeInt'], y, color=colors)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "S19 Problem 45 == F19 Problem 736\n", + "\n", + "S19 Problem 48 == f19 Problem 737" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "def print_code_samples(df, ps2, problem_id, n_samples):\n", + " code_states = df[(df[PS2.ProblemID] == problem_id) & (df[PS2.EventType] == 'Run.Program') & (df[PS2.Score] == 1)].sample(n_samples)[PS2.CodeStateID]\n", + " solutions = code_states.apply(lambda cs_id: ps2.get_code_for_id(cs_id))\n", + " for sol in solutions:\n", + " print(sol)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "public int sum67(int[] nums)\r\n", + "{\r\n", + " int sum = 0;\r\n", + " int change = 0;\r\n", + " for(int i = 0; i < nums.length ; i++)\r\n", + " {\r\n", + " if(nums[i] == 6)\r\n", + " {\r\n", + " for(int c = i; c < nums.length ; c++)\r\n", + " {\r\n", + " if(nums[c] == 7)\r\n", + " {\r\n", + " change = c + 1;\r\n", + " break;\r\n", + " }\r\n", + " }\r\n", + " }\r\n", + " if(nums[i] == 6)\r\n", + " i = change;\r\n", + " if(i < nums.length)\r\n", + " {\r\n", + " \tsum = sum + nums[i];\r\n", + " }\r\n", + " }\r\n", + " return sum;\r\n", + "}\r\n", + "\n" + ] + } + ], + "source": [ + "print_code_samples(s19, s19_ps2, 45, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "public int[] withoutTen(int[] nums)\r\n", + "{\r\n", + " int[] newArray = new int[nums.length];\r\n", + " for(int val : newArray)\r\n", + " val = 0;\r\n", + " \r\n", + " int newIndex = 0;\r\n", + " for (int i = 0; i < nums.length; i++){\r\n", + " \tif(nums[i] != 10)\r\n", + " newArray[newIndex++] = nums[i]; \r\n", + " }\r\n", + " \r\n", + " return newArray;\r\n", + "}\r\n", + "\n" + ] + } + ], + "source": [ + "print_code_samples(s19, s19_ps2, 48, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "public int sum67(int[] nums)\r\n", + "{\r\n", + " boolean deadZone = false;\r\n", + " if (nums.length > 0)\r\n", + " {\r\n", + " int sum = 0;\r\n", + " for (int i = 0; i < nums.length; i++)\r\n", + " {\r\n", + " if (nums[i] == 6)\r\n", + " {\r\n", + " deadZone = true;\r\n", + " }\r\n", + " \r\n", + " if (deadZone == false)\r\n", + " {\r\n", + " \tsum += nums[i];\r\n", + " }\r\n", + " \r\n", + " if (nums[i] == 7)\r\n", + " {\r\n", + " deadZone = false;\r\n", + " }\r\n", + " }\r\n", + " return sum;\r\n", + " }\r\n", + " else\r\n", + " {\r\n", + " return 0;\r\n", + " }\r\n", + "}\r\n", + "\n" + ] + } + ], + "source": [ + "print_code_samples(f19, f19_ps2, 736, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "public int[] withoutTen(int[] nums)\r\n", + "{\r\n", + " int[] r = new int[nums.length];\r\n", + "\tint j = 0;\r\n", + "\tfor(int i = 0; i < nums.length; i++)\r\n", + "\t{\r\n", + "\t\tif(nums[i] != 10)\r\n", + "\t\t{\r\n", + "\t\t\tr[j] = nums[i];\r\n", + "\t\t\tj++;\r\n", + "\t\t}\r\n", + "\t}\r\n", + "\t\r\n", + "\treturn r;\r\n", + "}\r\n", + "\n" + ] + } + ], + "source": [ + "print_code_samples(f19, f19_ps2, 737, 1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/eval_models.bat b/eval_models.bat new file mode 100644 index 0000000000000000000000000000000000000000..f5b8b6d73caeb01eaa17bf844a1f86d913945bc5 --- /dev/null +++ b/eval_models.bat @@ -0,0 +1,4 @@ +python .\CodaLabPackages\Track1Package\program\evaluate.py .\data\Prediction\S19\basic_LR_task1\ .\data\Results\S19\basic_LR_task1 +python .\CodaLabPackages\Track1Package\program\evaluate.py .\data\Prediction\S19\code_RF_task1\ .\data\Results\S19\code_RF_task1 +python .\CodaLabPackages\Track1Package\program\evaluate.py .\data\Prediction\F19\basic_LR_task1\ .\data\Results\F19\basic_LR_task1 +python .\CodaLabPackages\Track2Package\program\evaluate.py .\data\Prediction\S19\basic_LR_task2\ .\data\Results\S19\basic_LR_task2 \ No newline at end of file diff --git a/modelo_v1.ipynb b/modelo_v1.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..75b361bf11719010e7231bf1eea0e04408d03dc6 --- /dev/null +++ b/modelo_v1.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 12, + "id": "ca052d45", + "metadata": {}, + "outputs": [], + "source": [ + "#importando librerias\n", + "from sklearn import datasets\n", + "from sklearn.model_selection import train_test_split\n", + "import sklearn.linear_model as sk\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "fe82d9b0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1, 1], [2, 1], [3, 1], [1, 1], [2, 1], [11, 1], [7, 1], [7, 1], [3, 1], [2, 1], [1, 1], [2, 1], [1, 1], [6, 1], [1, 1], [9, 1], [4, 1], [18, 1], [22, 1], [45, 1], [3, 1], [37, 1], [7, 1], [30, 1], [5, 1], [28, 1], [11, 1], [13, 1], [19, 1], [16, 1], [2, 1], [4, 1], [1, 1], [1, 1], [2, 1], [5, 1], [8, 1], [3, 1], [2, 1], [2, 1], [9, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [11, 1], [1, 1], [2, 1], [2, 1], [31, 1], [1, 1], [6, 1], [17, 1], [1, 1], [24, 1], [22, 1], [10, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [11, 1], [5, 1], [2, 1], [7, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [10, 1], [21, 1], [2, 1], [1, 1], [5, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [12, 1], [6, 1], [4, 1], [7, 1], [6, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [6, 1], [3, 1], [1, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [2, 1], [11, 1], [4, 1], [1, 1], [4, 1], [6, 1], [2, 0], [1, 1], [5, 1], [4, 1], [4, 1], [5, 1], [10, 0], [10, 1], [3, 1], [15, 0], [8, 0], [22, 1], [5, 1], [6, 0], [6, 0], [1, 1], [14, 0], [19, 1], [9, 0], [9, 0], [2, 1], [13, 1], [4, 1], [1, 1], [12, 1], [1, 1], [2, 1], [6, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [2, 0], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [8, 1], [1, 1], [6, 1], [31, 1], [15, 1], [1, 1], [2, 1], [6, 1], [2, 1], [5, 1], [6, 1], [4, 1], [25, 1], [6, 1], [3, 1], [10, 1], [2, 1], [7, 1], [40, 0], [2, 1], [3, 1], [4, 0], [6, 0], [4, 1], [17, 1], [2, 0], [8, 1], [27, 0], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [3, 1], [13, 1], [3, 1], [1, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [5, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [1, 1], [19, 0], [1, 1], [2, 1], [3, 1], [1, 0], [7, 1], [5, 1], [6, 0], [7, 1], [3, 0], [2, 1], [5, 1], [18, 1], [1, 0], [10, 1], [4, 1], [3, 1], [4, 1], [1, 1], [1, 1], [14, 1], [2, 1], [1, 1], [3, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [4, 1], [9, 1], [1, 1], [2, 1], [8, 1], [12, 1], [4, 1], [2, 1], [7, 1], [13, 1], [3, 1], [2, 1], [21, 1], [3, 1], [16, 1], [33, 1], [6, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [4, 1], [2, 1], [3, 1], [1, 1], [3, 1], [3, 1], [37, 1], [14, 1], [16, 1], [18, 1], [9, 0], [7, 1], [4, 1], [6, 1], [11, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 0], [1, 1], [5, 1], [5, 1], [1, 1], [16, 1], [12, 1], [3, 1], [1, 1], [3, 1], [5, 1], [1, 1], [5, 1], [6, 1], [9, 1], [15, 1], [1, 1], [8, 0], [3, 1], [3, 1], [14, 0], [1, 1], [1, 1], [1, 0], [6, 0], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [1, 1], [11, 1], [4, 1], [14, 0], [2, 1], [41, 1], [9, 1], [10, 1], [5, 1], [2, 1], [15, 1], [5, 1], [2, 1], [2, 1], [9, 0], [9, 1], [12, 0], [3, 1], [3, 1], [7, 1], [6, 1], [10, 0], [7, 1], [1, 1], [19, 0], [4, 1], [1, 1], [10, 0], [7, 1], [12, 1], [6, 1], [7, 1], [2, 1], [1, 1], [2, 1], [3, 1], [1, 1], [22, 1], [32, 1], [3, 1], [3, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [2, 1], [3, 1], [2, 1], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [8, 1], [1, 1], [4, 1], [3, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [11, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 0], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [10, 0], [8, 1], [3, 1], [4, 1], [16, 1], [11, 1], [17, 0], [17, 0], [13, 0], [4, 1], [11, 1], [5, 1], [3, 0], [18, 0], [8, 1], [11, 0], [1, 0], [4, 0], [9, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [6, 0], [9, 0], [1, 0], [3, 0], [14, 0], [1, 1], [1, 1], [3, 1], [1, 1], [18, 1], [3, 1], [2, 1], [4, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [11, 1], [2, 1], [1, 1], [4, 1], [3, 1], [1, 1], [5, 1], [2, 1], [1, 1], [1, 1], [4, 1], [16, 1], [1, 1], [1, 1], [5, 1], [1, 1], [3, 1], [3, 1], [4, 1], [3, 1], [8, 1], [2, 1], [6, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [11, 1], [8, 1], [1, 1], [13, 1], [3, 1], [2, 1], [3, 1], [4, 1], [1, 1], [2, 1], [7, 1], [9, 1], [5, 1], [1, 1], [17, 1], [19, 1], [4, 1], [21, 1], [2, 1], [3, 1], [1, 1], [12, 1], [18, 0], [4, 1], [48, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [9, 1], [4, 1], [1, 1], [2, 1], [1, 1], [5, 1], [5, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [3, 1], [3, 1], [1, 1], [5, 1], [3, 1], [2, 1], [4, 1], [1, 1], [4, 1], [1, 1], [4, 1], [3, 0], [4, 1], [3, 1], [1, 1], [5, 1], [9, 0], [18, 0], [6, 1], [4, 1], [2, 0], [9, 0], [2, 1], [16, 0], [5, 1], [3, 0], [9, 1], [6, 1], [5, 1], [3, 1], [6, 1], [1, 1], [11, 1], [10, 1], [2, 1], [2, 1], [1, 1], [4, 1], [1, 1], [6, 1], [2, 1], [3, 1], [3, 1], [1, 1], [9, 1], [6, 1], [1, 1], [3, 1], [3, 1], [3, 1], [6, 0], [1, 1], [4, 1], [2, 1], [7, 1], [4, 1], [1, 1], [17, 1], [6, 1], [11, 1], [4, 1], [3, 1], [9, 1], [8, 1], [1, 1], [2, 1], [14, 1], [16, 1], [1, 1], [7, 1], [1, 1], [1, 1], [10, 1], [3, 1], [4, 1], [6, 1], [8, 1], [2, 1], [5, 1], [11, 1], [4, 1], [1, 1], [9, 1], [6, 1], [3, 1], [1, 1], [2, 1], [6, 0], [6, 0], [6, 0], [10, 0], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [6, 1], [4, 1], [8, 1], [6, 1], [11, 1], [7, 1], [3, 1], [34, 1], [16, 1], [17, 1], [5, 1], [4, 1], [7, 1], [6, 1], [16, 1], [5, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [6, 1], [2, 1], [1, 1], [1, 1], [6, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [5, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [1, 1], [3, 1], [2, 1], [3, 1], [3, 1], [5, 1], [1, 1], [17, 1], [4, 1], [4, 1], [4, 1], [5, 1], [7, 1], [2, 1], [5, 1], [3, 1], [2, 1], [6, 1], [1, 1], [6, 1], [2, 1], [7, 0], [3, 1], [1, 1], [2, 1], [4, 1], [2, 1], [2, 1], [4, 1], [3, 1], [3, 1], [5, 0], [4, 1], [1, 1], [2, 1], [7, 1], [3, 1], [8, 1], [6, 1], [3, 1], [3, 1], [4, 1], [2, 1], [1, 1], [4, 1], [1, 1], [2, 1], [6, 1], [3, 1], [4, 1], [9, 0], [14, 0], [16, 1], [8, 1], [33, 0], [5, 1], [14, 1], [8, 1], [1, 1], [1, 1], [12, 0], [2, 0], [16, 1], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [5, 1], [2, 1], [2, 1], [5, 1], [11, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [5, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [9, 1], [3, 1], [8, 1], [4, 1], [6, 1], [7, 1], [2, 1], [1, 1], [2, 1], [2, 1], [2, 1], [9, 1], [2, 1], [3, 1], [3, 1], [6, 1], [2, 1], [6, 1], [2, 1], [1, 1], [4, 1], [1, 1], [4, 1], [8, 1], [2, 1], [5, 1], [4, 1], [55, 1], [8, 1], [1, 1], [5, 1], [7, 1], [5, 1], [11, 1], [14, 1], [4, 1], [4, 1], [2, 1], [1, 1], [17, 1], [2, 1], [8, 1], [4, 1], [14, 1], [23, 1], [5, 1], [18, 1], [5, 1], [7, 1], [1, 1], [6, 1], [1, 1], [10, 1], [1, 1], [20, 1], [24, 1], [6, 1], [53, 1], [37, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 0], [1, 1], [1, 1], [3, 0], [1, 0], [1, 1], [10, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [2, 1], [2, 1], [1, 1], [3, 1], [17, 1], [4, 1], [3, 1], [5, 1], [9, 1], [12, 1], [1, 1], [13, 1], [6, 1], [8, 1], [3, 1], [5, 1], [18, 1], [4, 1], [5, 1], [6, 1], [3, 1], [2, 1], [1, 1], [2, 1], [25, 1], [2, 1], [2, 1], [2, 1], [1, 1], [8, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [4, 1], [5, 1], [52, 1], [6, 1], [46, 1], [6, 1], [8, 1], [6, 1], [24, 0], [2, 1], [15, 1], [14, 1], [16, 1], [42, 1], [8, 1], [4, 1], [2, 1], [3, 1], [2, 1], [6, 1], [9, 1], [5, 1], [10, 1], [1, 1], [2, 1], [2, 1], [5, 1], [2, 1], [1, 1], [4, 1], [7, 1], [3, 1], [3, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [4, 1], [10, 1], [1, 1], [4, 1], [2, 1], [4, 1], [1, 1], [1, 1], [6, 1], [7, 1], [1, 1], [2, 1], [15, 1], [10, 1], [4, 1], [10, 0], [9, 0], [11, 0], [6, 1], [24, 0], [1, 0], [6, 0], [11, 1], [2, 1], [4, 1], [1, 1], [1, 1], [11, 1], [8, 1], [6, 1], [1, 1], [3, 1], [5, 1], [3, 1], [3, 1], [1, 0], [4, 1], [8, 0], [3, 0], [7, 1], [16, 0], [15, 1], [9, 0], [3, 1], [2, 1], [5, 1], [10, 1], [18, 1], [5, 1], [6, 1], [5, 1], [27, 1], [3, 1], [20, 0], [21, 1], [35, 0], [8, 1], [11, 0], [3, 1], [2, 1], [6, 1], [1, 1], [1, 1], [8, 1], [1, 1], [9, 1], [2, 1], [8, 0], [2, 1], [1, 1], [2, 1], [4, 1], [1, 1], [4, 1], [6, 1], [6, 1], [1, 1], [2, 1], [10, 0], [1, 1], [3, 1], [3, 1], [1, 1], [1, 1], [3, 1], [5, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [7, 1], [2, 1], [1, 1], [2, 1], [4, 1], [1, 1], [7, 1], [7, 1], [3, 1], [1, 1], [3, 1], [4, 1], [3, 1], [4, 1], [1, 1], [8, 1], [10, 1], [3, 1], [10, 1], [8, 1], [9, 1], [10, 1], [6, 1], [3, 1], [4, 1], [2, 1], [1, 1], [2, 1], [2, 1], [4, 1], [6, 1], [6, 1], [4, 1], [7, 1], [1, 1], [3, 1], [2, 1], [3, 1], [1, 1], [1, 1], [3, 1], [3, 1], [2, 1], [1, 1], [1, 1], [3, 1], [3, 1], [1, 1], [16, 1], [3, 1], [5, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [2, 1], [9, 1], [2, 1], [4, 1], [11, 1], [21, 1], [7, 1], [14, 1], [7, 1], [7, 1], [3, 1], [2, 1], [26, 1], [18, 1], [20, 1], [11, 1], [1, 1], [1, 1], [2, 1], [1, 1], [19, 1], [2, 1], [1, 1], [8, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [5, 1], [4, 1], [1, 1], [12, 1], [11, 1], [2, 1], [6, 1], [12, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [12, 1], [5, 1], [1, 1], [3, 1], [18, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [6, 1], [1, 1], [8, 1], [1, 1], [3, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [5, 1], [1, 1], [1, 1], [7, 1], [4, 1], [8, 1], [4, 1], [2, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [6, 1], [2, 1], [7, 1], [1, 1], [7, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [12, 1], [7, 1], [5, 1], [8, 0], [11, 1], [4, 1], [3, 1], [1, 0], [1, 1], [9, 1], [1, 1], [2, 1], [6, 1], [9, 0], [12, 0], [2, 1], [3, 1], [6, 1], [4, 1], [4, 1], [4, 1], [2, 1], [1, 1], [12, 1], [3, 1], [14, 1], [10, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [12, 1], [1, 1], [2, 1], [14, 1], [2, 1], [3, 1], [3, 1], [3, 1], [4, 1], [1, 1], [2, 1], [3, 1], [1, 1], [11, 1], [4, 1], [1, 1], [2, 1], [9, 1], [9, 1], [8, 1], [5, 1], [10, 1], [9, 0], [2, 1], [1, 1], [8, 1], [3, 1], [2, 1], [5, 1], [2, 1], [8, 1], [4, 1], [2, 1], [4, 1], [11, 1], [7, 1], [1, 1], [8, 1], [6, 1], [17, 1], [6, 1], [4, 0], [5, 1], [8, 1], [9, 0], [5, 1], [8, 1], [37, 0], [13, 0], [4, 0], [10, 0], [5, 0], [11, 0], [3, 1], [3, 1], [1, 1], [6, 1], [2, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [4, 1], [16, 1], [12, 1], [7, 0], [5, 1], [6, 0], [12, 0], [2, 1], [14, 1], [10, 1], [7, 1], [2, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [7, 1], [12, 1], [6, 1], [8, 1], [21, 0], [7, 1], [23, 1], [10, 1], [2, 1], [3, 1], [6, 1], [2, 1], [12, 1], [13, 1], [15, 1], [4, 1], [1, 1], [21, 1], [6, 1], [8, 1], [39, 1], [1, 1], [4, 1], [9, 1], [1, 1], [1, 1], [1, 1], [4, 1], [4, 1], [1, 1], [3, 1], [1, 1], [2, 1], [3, 1], [6, 0], [1, 1], [3, 0], [2, 1], [1, 1], [9, 1], [3, 1], [1, 1], [4, 1], [3, 0], [1, 1], [2, 1], [3, 1], [3, 1], [1, 1], [2, 1], [5, 1], [1, 1], [1, 1], [8, 1], [6, 1], [1, 1], [1, 1], [1, 1], [10, 1], [1, 1], [7, 1], [4, 1], [9, 1], [6, 1], [5, 1], [2, 1], [2, 1], [9, 1], [8, 1], [11, 1], [1, 1], [2, 1], [12, 1], [1, 1], [3, 1], [2, 1], [3, 1], [5, 1], [4, 1], [4, 1], [1, 1], [2, 1], [3, 1], [15, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [8, 1], [2, 1], [8, 1], [7, 1], [3, 1], [3, 1], [22, 1], [1, 1], [4, 1], [7, 1], [4, 1], [3, 1], [4, 1], [1, 1], [1, 1], [16, 1], [1, 1], [3, 1], [7, 1], [1, 1], [6, 1], [16, 1], [3, 1], [2, 1], [6, 1], [2, 1], [4, 1], [11, 1], [8, 1], [2, 1], [3, 1], [31, 1], [5, 1], [12, 1], [2, 1], [5, 1], [7, 1], [1, 1], [12, 1], [3, 1], [1, 1], [1, 1], [7, 1], [13, 1], [4, 1], [6, 1], [21, 1], [3, 1], [1, 1], [6, 1], [2, 1], [3, 1], [2, 1], [3, 1], [2, 1], [4, 1], [3, 1], [2, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [4, 1], [1, 1], [7, 1], [9, 1], [22, 1], [2, 1], [12, 0], [9, 1], [3, 1], [20, 1], [14, 1], [22, 1], [21, 1], [2, 1], [8, 1], [7, 1], [2, 1], [1, 1], [1, 1], [1, 1], [6, 1], [3, 1], [4, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [19, 1], [2, 1], [1, 1], [6, 1], [23, 1], [2, 1], [8, 1], [5, 1], [1, 1], [8, 1], [16, 1], [20, 1], [13, 1], [15, 1], [12, 1], [8, 1], [1, 1], [2, 1], [2, 1], [5, 1], [2, 1], [7, 1], [3, 1], [3, 1], [1, 1], [9, 1], [9, 0], [6, 0], [4, 1], [1, 1], [7, 1], [11, 1], [1, 1], [4, 1], [6, 1], [6, 1], [2, 1], [5, 1], [3, 1], [6, 0], [9, 0], [1, 0], [6, 1], [7, 0], [11, 1], [3, 1], [6, 0], [20, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [3, 1], [3, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [5, 1], [13, 1], [8, 1], [26, 1], [1, 1], [3, 1], [7, 1], [1, 1], [6, 1], [7, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [3, 1], [4, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [11, 1], [7, 1], [3, 1], [8, 1], [13, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 1], [1, 1], [4, 1], [3, 1], [11, 1], [18, 1], [41, 1], [4, 1], [4, 1], [6, 1], [2, 1], [1, 1], [8, 1], [4, 1], [1, 1], [19, 1], [20, 1], [21, 1], [11, 1], [10, 1], [35, 0], [71, 1], [3, 1], [10, 0], [11, 0], [32, 0], [8, 1], [4, 1], [55, 1], [2, 1], [3, 1], [8, 1], [6, 1], [4, 1], [2, 1], [1, 1], [13, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [5, 1], [3, 1], [2, 1], [8, 1], [3, 1], [3, 1], [6, 1], [7, 1], [10, 1], [19, 1], [5, 1], [7, 0], [5, 1], [9, 1], [4, 1], [1, 1], [15, 0], [5, 1], [21, 1], [2, 1], [2, 1], [2, 1], [3, 1], [1, 1], [11, 1], [9, 1], [12, 1], [3, 1], [4, 1], [2, 1], [2, 1], [3, 1], [17, 1], [9, 1], [9, 1], [49, 1], [5, 1], [10, 1], [93, 1], [19, 1], [10, 1], [14, 1], [28, 1], [51, 1], [39, 1], [11, 1], [3, 1], [13, 1], [19, 1], [21, 1], [3, 1], [2, 1], [2, 1], [1, 1], [23, 1], [1, 1], [4, 1], [2, 1], [7, 1], [3, 1], [3, 1], [10, 1], [3, 1], [2, 1], [3, 1], [2, 1], [6, 1], [2, 1], [11, 1], [6, 1], [13, 1], [4, 1], [2, 1], [2, 1], [5, 1], [6, 1], [13, 1], [2, 1], [9, 1], [21, 1], [14, 0], [3, 1], [1, 0], [6, 1], [2, 1], [2, 1], [2, 1], [4, 0], [3, 0], [4, 0], [2, 1], [2, 1], [11, 1], [1, 1], [10, 1], [2, 1], [1, 1], [2, 1], [4, 1], [9, 1], [1, 1], [4, 1], [1, 1], [4, 1], [10, 1], [7, 1], [1, 1], [5, 1], [10, 1], [25, 1], [7, 1], [5, 1], [8, 1], [4, 1], [1, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [9, 0], [2, 1], [3, 0], [4, 1], [1, 1], [1, 1], [11, 1], [1, 1], [1, 1], [4, 1], [1, 1], [3, 1], [6, 1], [2, 1], [13, 1], [6, 1], [3, 0], [10, 0], [8, 1], [28, 1], [10, 1], [13, 0], [3, 0], [3, 0], [19, 0], [10, 0], [9, 1], [17, 1], [3, 1], [2, 1], [2, 1], [1, 1], [8, 1], [4, 1], [1, 1], [2, 1], [5, 1], [1, 1], [3, 1], [6, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [7, 1], [9, 0], [18, 0], [4, 1], [27, 0], [6, 1], [9, 0], [1, 1], [2, 1], [11, 1], [17, 1], [12, 0], [7, 1], [3, 1], [6, 1], [7, 1], [2, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [5, 1], [3, 1], [4, 1], [6, 1], [1, 1], [6, 1], [1, 1], [5, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [5, 1], [3, 1], [1, 1], [5, 1], [1, 1], [1, 1], [5, 1], [1, 1], [3, 1], [1, 1], [3, 1], [1, 1], [2, 1], [2, 1], [9, 0], [1, 1], [7, 1], [1, 1], [2, 1], [3, 1], [1, 1], [3, 1], [1, 1], [19, 1], [2, 1], [3, 1], [4, 1], [17, 1], [7, 1], [1, 1], [5, 1], [1, 1], [6, 1], [15, 1], [6, 1], [7, 1], [7, 1], [8, 1], [6, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [7, 1], [4, 1], [1, 1], [6, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [22, 1], [6, 1], [1, 1], [5, 1], [1, 1], [9, 1], [1, 1], [1, 1], [2, 1], [3, 1], [2, 1], [13, 1], [4, 1], [3, 1], [3, 1], [12, 1], [1, 1], [14, 1], [6, 1], [12, 1], [2, 1], [7, 1], [12, 1], [5, 1], [9, 1], [10, 1], [2, 1], [6, 1], [10, 1], [1, 1], [18, 1], [6, 1], [2, 1], [3, 1], [3, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [7, 1], [14, 1], [2, 1], [14, 1], [19, 1], [28, 1], [2, 1], [6, 1], [21, 1], [5, 1], [7, 1], [8, 1], [2, 1], [43, 1], [35, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [3, 1], [2, 1], [3, 1], [4, 1], [6, 1], [3, 1], [19, 0], [2, 1], [4, 1], [2, 1], [2, 1], [7, 1], [3, 1], [5, 1], [2, 1], [2, 1], [2, 1], [1, 1], [5, 1], [5, 1], [2, 1], [3, 1], [1, 1], [6, 1], [19, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [13, 1], [5, 0], [1, 1], [2, 1], [1, 1], [2, 1], [3, 1], [3, 1], [6, 1], [3, 1], [5, 1], [7, 0], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [4, 1], [1, 1], [4, 1], [11, 1], [1, 1], [1, 1], [2, 1], [4, 1], [7, 1], [5, 1], [3, 1], [12, 1], [2, 1], [2, 1], [11, 1], [5, 1], [36, 1], [16, 1], [10, 1], [24, 0], [12, 1], [42, 1], [3, 1], [1, 1], [7, 1], [13, 0], [9, 0], [10, 1], [3, 1], [5, 1], [1, 1], [10, 1], [7, 1], [8, 1], [1, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [5, 1], [1, 1], [8, 1], [3, 1], [1, 1], [2, 1], [1, 0], [2, 1], [3, 1], [1, 1], [2, 1], [1, 1], [6, 0], [3, 1], [8, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [5, 1], [1, 1], [10, 1], [2, 1], [5, 1], [4, 1], [6, 1], [12, 1], [16, 0], [5, 1], [11, 0], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [4, 1], [1, 1], [1, 1], [4, 1], [37, 1], [1, 1], [1, 1], [18, 1], [3, 1], [4, 1], [1, 1], [1, 1], [2, 1], [9, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [40, 1], [5, 1], [1, 1], [1, 1], [2, 1], [1, 1], [5, 1], [14, 1], [15, 1], [14, 1], [3, 1], [4, 1], [1, 0], [4, 1], [5, 1], [11, 1], [2, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [5, 1], [2, 1], [3, 1], [9, 0], [15, 0], [3, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [9, 1], [3, 1], [10, 1], [4, 1], [2, 1], [1, 1], [4, 1], [1, 1], [16, 1], [1, 1], [1, 1], [4, 1], [4, 1], [4, 1], [1, 1], [3, 1], [1, 1], [1, 1], [8, 1], [3, 1], [1, 1], [1, 1], [4, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 0], [1, 1], [3, 0], [7, 1], [8, 0], [10, 0], [1, 1], [5, 0], [2, 0], [2, 1], [5, 1], [1, 1], [4, 1], [2, 1], [1, 1], [9, 1], [7, 1], [8, 1], [3, 1], [4, 1], [7, 1], [1, 1], [4, 1], [6, 1], [3, 1], [1, 1], [6, 1], [11, 1], [3, 1], [1, 1], [2, 1], [2, 1], [15, 1], [2, 1], [4, 1], [1, 1], [1, 1], [5, 1], [11, 1], [2, 1], [5, 1], [18, 1], [3, 1], [3, 1], [2, 1], [1, 1], [10, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [6, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 0], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [10, 0], [3, 1], [2, 1], [7, 0], [3, 1], [1, 1], [1, 1], [3, 1], [3, 1], [2, 1], [10, 1], [6, 0], [2, 0], [3, 1], [9, 0], [6, 0], [4, 1], [1, 1], [4, 1], [3, 1], [1, 1], [3, 1], [12, 0], [1, 1], [9, 0], [7, 0], [6, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [3, 1], [3, 1], [9, 1], [8, 0], [12, 0], [8, 0], [3, 1], [13, 0], [9, 1], [7, 0], [3, 0], [10, 1], [6, 0], [5, 0], [1, 0], [1, 0], [1, 0], [2, 0], [1, 0], [3, 0], [1, 0], [1, 1], [2, 1], [2, 1], [1, 1], [7, 1], [5, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [2, 1], [6, 1], [1, 1], [1, 1], [4, 1], [4, 1], [1, 1], [1, 1], [3, 1], [3, 1], [2, 1], [3, 1], [1, 1], [2, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [5, 1], [2, 1], [4, 1], [14, 1], [15, 1], [1, 1], [2, 1], [7, 1], [3, 1], [1, 1], [3, 1], [13, 1], [7, 1], [8, 1], [3, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [4, 1], [7, 1], [3, 1], [2, 1], [1, 1], [3, 1], [4, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [2, 1], [11, 1], [3, 1], [2, 1], [10, 1], [5, 1], [6, 1], [3, 1], [1, 1], [14, 1], [3, 1], [13, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [15, 1], [8, 1], [4, 0], [2, 1], [2, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [6, 1], [6, 1], [12, 1], [5, 1], [5, 0], [10, 1], [1, 1], [13, 0], [6, 0], [4, 0], [5, 0], [2, 1], [6, 0], [9, 1], [7, 1], [10, 1], [1, 1], [12, 1], [8, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [2, 1], [2, 1], [3, 1], [3, 1], [3, 1], [2, 1], [9, 1], [3, 1], [1, 1], [9, 1], [14, 1], [4, 1], [12, 1], [4, 1], [19, 1], [5, 1], [2, 1], [12, 1], [3, 1], [7, 1], [5, 1], [1, 1], [6, 1], [11, 1], [2, 1], [4, 1], [2, 1], [1, 1], [3, 1], [1, 1], [6, 1], [1, 1], [4, 1], [2, 1], [9, 1], [13, 1], [3, 1], [7, 1], [1, 1], [7, 1], [26, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [2, 1], [1, 1], [3, 1], [2, 1], [2, 1], [5, 1], [4, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [4, 1], [2, 1], [4, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [3, 0], [2, 1], [2, 1], [9, 0], [3, 0], [2, 0], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [6, 0], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [4, 1], [7, 1], [3, 1], [1, 1], [5, 1], [10, 1], [6, 1], [16, 1], [2, 1], [3, 1], [5, 1], [7, 1], [8, 1], [4, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [19, 1], [9, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [2, 1], [8, 1], [21, 1], [9, 1], [3, 1], [8, 1], [9, 1], [1, 1], [5, 1], [2, 1], [4, 1], [9, 1], [7, 1], [8, 1], [6, 1], [11, 1], [33, 1], [9, 1], [21, 1], [9, 1], [2, 1], [6, 1], [7, 1], [60, 1], [10, 1], [38, 1], [5, 1], [2, 1], [1, 1], [1, 1], [2, 1], [7, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [9, 1], [2, 1], [1, 1], [1, 1], [7, 0], [1, 1], [5, 0], [4, 1], [1, 1], [7, 1], [1, 1], [4, 1], [9, 0], [13, 1], [14, 1], [6, 0], [12, 0], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [5, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [8, 1], [18, 0], [5, 1], [6, 1], [5, 1], [27, 0], [3, 1], [14, 0], [11, 1], [2, 1], [3, 1], [26, 1], [5, 1], [6, 1], [15, 1], [1, 1], [5, 1], [1, 1], [1, 1], [1, 1], [10, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [11, 1], [1, 1], [2, 1], [25, 1], [17, 1], [2, 1], [1, 1], [8, 1], [8, 1], [9, 1], [14, 1], [10, 1], [3, 1], [1, 1], [8, 1], [14, 1], [34, 1], [21, 0], [13, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [4, 1], [5, 0], [8, 1], [1, 1], [3, 1], [35, 0], [3, 1], [3, 1], [7, 0], [3, 1], [7, 0], [2, 1], [1, 1], [2, 0], [2, 1], [2, 1], [4, 0], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [9, 1], [1, 1], [1, 1], [8, 1], [18, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [4, 1], [2, 1], [1, 1], [7, 1], [9, 1], [3, 1], [1, 1], [4, 1], [3, 1], [1, 1], [4, 1], [1, 1], [2, 1], [5, 1], [6, 1], [6, 1], [1, 1], [8, 1], [5, 1], [3, 1], [5, 1], [15, 1], [5, 1], [2, 1], [7, 1], [4, 1], [3, 1], [3, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [6, 1], [5, 1], [1, 0], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [6, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [14, 1], [7, 1], [5, 1], [5, 1], [24, 1], [1, 1], [5, 1], [7, 1], [1, 1], [1, 1], [5, 1], [3, 1], [6, 1], [8, 1], [1, 1], [1, 1], [2, 1], [6, 1], [5, 1], [4, 1], [2, 1], [2, 1], [4, 1], [3, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 0], [3, 1], [10, 1], [36, 1], [3, 1], [8, 0], [3, 1], [9, 0], [8, 0], [1, 0], [5, 0], [6, 1], [14, 0], [7, 1], [1, 1], [8, 0], [18, 0], [6, 1], [12, 1], [14, 0], [21, 0], [5, 1], [2, 1], [1, 1], [2, 1], [1, 1], [4, 1], [6, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [4, 1], [1, 1], [5, 1], [4, 1], [1, 1], [2, 1], [4, 1], [13, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [6, 1], [4, 1], [4, 1], [2, 1], [8, 1], [3, 1], [6, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [5, 0], [6, 0], [2, 1], [4, 1], [15, 1], [8, 1], [10, 1], [5, 1], [1, 1], [3, 1], [3, 1], [4, 1], [5, 0], [6, 1], [7, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [7, 1], [4, 1], [2, 1], [6, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [4, 1], [3, 0], [2, 1], [2, 1], [20, 1], [29, 1], [1, 1], [8, 1], [6, 1], [17, 1], [1, 1], [2, 1], [1, 1], [6, 1], [20, 1], [2, 1], [8, 1], [5, 1], [4, 0], [17, 0], [15, 1], [3, 0], [7, 0], [6, 0], [1, 1], [31, 0], [11, 0], [7, 0], [3, 0], [6, 1], [6, 1], [2, 1], [3, 1], [10, 1], [9, 1], [3, 1], [3, 1], [9, 1], [3, 1], [2, 1], [4, 1], [1, 1], [7, 1], [26, 1], [3, 1], [5, 1], [3, 1], [4, 0], [3, 1], [3, 1], [3, 1], [4, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 1], [1, 1], [1, 1], [3, 1], [32, 1], [1, 1], [2, 1], [21, 1], [51, 1], [9, 1], [7, 1], [8, 1], [18, 1], [13, 1], [10, 1], [1, 1], [5, 1], [6, 1], [9, 0], [3, 1], [2, 1], [3, 0], [16, 0], [3, 1], [8, 1], [6, 1], [4, 0], [7, 0], [14, 0], [8, 1], [9, 1], [12, 1], [6, 0], [22, 0], [18, 1], [6, 1], [3, 1], [10, 1], [10, 1], [14, 1], [35, 1], [12, 1], [4, 1], [58, 1], [2, 1], [4, 1], [11, 1], [30, 1], [11, 1], [2, 1], [13, 1], [5, 1], [30, 1], [5, 1], [1, 1], [6, 1], [7, 1], [3, 1], [11, 1], [12, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [10, 1], [1, 1], [1, 1], [3, 1], [5, 1], [12, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [12, 1], [5, 1], [1, 1], [44, 1], [3, 1], [8, 1], [1, 1], [5, 1], [4, 1], [9, 1], [5, 1], [2, 1], [5, 1], [4, 1], [35, 1], [2, 1], [19, 1], [67, 0], [26, 1], [11, 1], [66, 0], [14, 1], [24, 1], [11, 1], [2, 1], [11, 1], [15, 1], [14, 1], [10, 1], [2, 1], [5, 1], [6, 1], [2, 1], [4, 1], [20, 1], [1, 1], [5, 1], [25, 1], [10, 1], [1, 1], [8, 1], [4, 1], [12, 1], [5, 1], [3, 1], [23, 1], [2, 1], [7, 1], [11, 0], [4, 1], [5, 1], [5, 1], [1, 1], [19, 0], [1, 1], [10, 1], [12, 1], [3, 1], [2, 1], [2, 1], [1, 1], [8, 1], [1, 1], [9, 1], [5, 1], [5, 1], [2, 1], [9, 1], [3, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [2, 1], [2, 1], [5, 1], [1, 1], [4, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [15, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [6, 1], [3, 1], [4, 1], [2, 1], [1, 1], [1, 0], [1, 1], [10, 1], [2, 1], [10, 0], [4, 1], [5, 1], [7, 0], [5, 1], [4, 1], [8, 1], [3, 1], [3, 1], [2, 1], [1, 1], [4, 1], [3, 1], [1, 1], [1, 1], [3, 1], [2, 1], [7, 1], [5, 1], [8, 1], [1, 1], [39, 1], [16, 1], [2, 1], [3, 1], [16, 1], [13, 1], [1, 1], [6, 1], [2, 1], [2, 1], [5, 1], [3, 1], [3, 1], [11, 1], [9, 1], [1, 1], [1, 1], [3, 1], [4, 1], [2, 1], [12, 1], [4, 1], [4, 1], [2, 1], [6, 1], [2, 1], [1, 1], [1, 1], [3, 1], [2, 1], [2, 1], [11, 1], [2, 1], [21, 1], [16, 1], [6, 1], [2, 1], [2, 1], [1, 1], [8, 1], [1, 1], [1, 1], [2, 0], [12, 0], [19, 0], [1, 1], [11, 0], [8, 0], [3, 1], [3, 1], [3, 1], [15, 1], [5, 1], [5, 1], [4, 1], [2, 1], [6, 1], [13, 0], [2, 0], [2, 1], [8, 1], [4, 0], [6, 0], [3, 1], [2, 1], [1, 1], [1, 1], [5, 1], [2, 1], [5, 1], [1, 1], [3, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [4, 1], [2, 1], [4, 1], [11, 1], [19, 1], [2, 1], [27, 1], [4, 1], [3, 1], [13, 1], [13, 1], [15, 1], [7, 1], [8, 0], [9, 1], [1, 1], [3, 1], [1, 1], [6, 1], [4, 1], [4, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [5, 1], [4, 1], [3, 1], [2, 1], [3, 1], [1, 1], [1, 1], [2, 1], [3, 1], [8, 1], [5, 1], [2, 1], [2, 1], [1, 1], [4, 0], [8, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [7, 0], [5, 1], [6, 1], [4, 1], [1, 1], [2, 1], [9, 1], [3, 1], [12, 1], [5, 1], [4, 1], [3, 1], [1, 1], [11, 1], [1, 1], [1, 1], [1, 1], [1, 1], [10, 1], [3, 1], [2, 1], [2, 1], [9, 1], [26, 1], [2, 1], [11, 1], [1, 1], [7, 1], [4, 1], [10, 1], [25, 1], [40, 1], [25, 1], [6, 1], [2, 1], [3, 1], [2, 1], [1, 1], [5, 1], [16, 1], [1, 1], [12, 1], [4, 1], [6, 1], [12, 1], [10, 1], [6, 1], [15, 0], [2, 1], [8, 1], [21, 1], [3, 1], [10, 1], [21, 1], [3, 1], [8, 1], [6, 1], [1, 1], [8, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [2, 1], [3, 1], [7, 1], [2, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [6, 1], [8, 1], [17, 1], [2, 1], [3, 1], [1, 1], [14, 1], [8, 1], [3, 1], [9, 1], [32, 1], [7, 1], [7, 1], [3, 1], [2, 1], [43, 1], [5, 1], [2, 1], [26, 1], [3, 1], [3, 1], [3, 1], [30, 1], [14, 1], [3, 1], [5, 1], [7, 1], [6, 1], [12, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [2, 1], [1, 1], [5, 1], [1, 1], [1, 1], [1, 1], [8, 1], [2, 1], [5, 1], [10, 1], [5, 1], [3, 1], [2, 1], [1, 0], [9, 1], [3, 1], [18, 1], [4, 1], [3, 1], [1, 1], [3, 1], [9, 1], [1, 1], [2, 1], [20, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [3, 1], [1, 1], [3, 1], [7, 1], [1, 1], [2, 1], [2, 1], [4, 1], [1, 1], [6, 1], [6, 1], [8, 1], [6, 1], [2, 1], [7, 1], [2, 1], [3, 1], [5, 1], [3, 1], [5, 1], [1, 1], [6, 1], [7, 1], [3, 1], [20, 1], [12, 1], [16, 1], [11, 1], [9, 1], [4, 1], [8, 1], [26, 1], [29, 1], [12, 1], [4, 1], [7, 1], [9, 1], [6, 1], [2, 1], [2, 0], [4, 1], [1, 1], [29, 0], [44, 0], [2, 1], [7, 1], [3, 1], [3, 1], [1, 1], [4, 1], [1, 1], [8, 1], [9, 0], [13, 1], [21, 1], [8, 1], [26, 0], [17, 1], [16, 1], [10, 1], [3, 1], [1, 0], [9, 1], [6, 1], [4, 1], [15, 1], [11, 1], [13, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [7, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 0], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [6, 1], [1, 1], [2, 1], [4, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [6, 1], [3, 1], [1, 1], [1, 1], [9, 1], [1, 1], [5, 1], [2, 1], [13, 0], [1, 1], [5, 1], [9, 1], [2, 1], [6, 1], [1, 1], [2, 1], [2, 1], [2, 1], [2, 1], [4, 1], [14, 1], [3, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [10, 1], [1, 1], [1, 1], [2, 1], [1, 0], [1, 1], [1, 1], [7, 1], [1, 1], [2, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [28, 1], [6, 1], [7, 1], [2, 1], [5, 1], [2, 1], [10, 1], [6, 1], [7, 1], [1, 1], [11, 1], [5, 1], [1, 1], [6, 1], [10, 1], [12, 1], [1, 1], [2, 1], [4, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [2, 1], [1, 1], [6, 1], [1, 1], [2, 1], [4, 1], [3, 1], [9, 1], [1, 1], [6, 1], [1, 1], [17, 1], [4, 1], [1, 1], [1, 1], [2, 1], [5, 1], [9, 1], [4, 1], [1, 1], [3, 1], [1, 1], [6, 1], [4, 1], [7, 1], [1, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [2, 1], [3, 1], [5, 1], [3, 1], [2, 1], [6, 1], [2, 1], [4, 1], [5, 1], [3, 1], [2, 1], [1, 1], [6, 1], [5, 1], [8, 1], [6, 0], [3, 1], [1, 1], [4, 1], [2, 1], [2, 1], [3, 0], [2, 1], [9, 1], [2, 1], [2, 1], [4, 1], [1, 1], [35, 1], [5, 1], [3, 1], [2, 1], [2, 1], [3, 1], [2, 1], [2, 1], [1, 1], [4, 1], [16, 1], [8, 1], [14, 1], [3, 1], [6, 1], [12, 1], [7, 1], [61, 0], [31, 1], [67, 0], [49, 1], [6, 1], [7, 1], [4, 1], [24, 1], [16, 1], [3, 1], [3, 0], [3, 1], [1, 1], [8, 0], [27, 0], [1, 1], [3, 1], [3, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [2, 1], [3, 1], [9, 0], [1, 1], [15, 1], [1, 1], [3, 1], [12, 0], [4, 0], [3, 1], [3, 1], [7, 0], [10, 1], [1, 1], [3, 1], [1, 1], [3, 1], [7, 1], [9, 1], [5, 1], [2, 1], [14, 1], [17, 0], [3, 1], [15, 1], [18, 1], [11, 0], [24, 1], [10, 1], [13, 1], [6, 1], [8, 1], [14, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [10, 1], [1, 1], [1, 1], [2, 1], [1, 1], [24, 1], [3, 1], [12, 1], [4, 1], [6, 1], [24, 1], [22, 1], [9, 1], [14, 0], [27, 1], [4, 0], [6, 1], [3, 1], [8, 1], [27, 1], [3, 1], [3, 1], [1, 1], [3, 1], [2, 1], [3, 1], [10, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [6, 1], [1, 1], [1, 1], [8, 1], [6, 1], [6, 1], [5, 1], [2, 1], [9, 1], [3, 1], [1, 1], [4, 1], [4, 1], [3, 1], [4, 1], [10, 1], [1, 1], [10, 1], [2, 1], [1, 1], [5, 1], [7, 1], [5, 1], [5, 1], [2, 1], [6, 1], [7, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [6, 1], [1, 1], [4, 1], [1, 1], [1, 1], [6, 1], [5, 1], [3, 1], [9, 1], [1, 1], [1, 1], [2, 1], [5, 1], [3, 1], [4, 1], [7, 1], [7, 1], [3, 1], [3, 1], [6, 1], [8, 1], [1, 0], [8, 0], [3, 1], [8, 1], [2, 1], [7, 1], [12, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 0], [1, 1], [1, 1], [5, 1], [10, 1], [10, 1], [27, 1], [4, 1], [2, 1], [2, 1], [2, 1], [4, 1], [1, 1], [3, 1], [2, 1], [4, 1], [3, 1], [1, 1], [4, 1], [6, 1], [10, 1], [23, 1], [1, 1], [12, 1], [3, 1], [23, 1], [5, 1], [6, 1], [14, 1], [16, 1], [15, 1], [13, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [3, 1], [5, 1], [3, 1], [2, 1], [1, 1], [7, 1], [6, 1], [1, 1], [7, 1], [48, 1], [1, 1], [9, 1], [3, 1], [3, 1], [3, 1], [2, 1], [10, 1], [3, 1], [6, 1], [1, 1], [6, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [5, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [14, 1], [1, 1], [3, 1], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [5, 1], [2, 1], [1, 1], [3, 1], [1, 1], [6, 1], [4, 1], [1, 1], [11, 1], [12, 1], [6, 1], [3, 1], [2, 1], [3, 0], [1, 1], [7, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [5, 1], [2, 1], [3, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [13, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [12, 0], [14, 0], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 0], [3, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [2, 1], [3, 1], [2, 1], [3, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [3, 1], [3, 1], [1, 1], [13, 1], [1, 1], [7, 1], [10, 1], [6, 1], [2, 1], [2, 1], [2, 1], [1, 1], [2, 1], [2, 1], [2, 1], [2, 1], [1, 1], [4, 1], [2, 1], [6, 1], [2, 1], [4, 1], [4, 1], [15, 1], [6, 1], [3, 1], [5, 0], [13, 1], [5, 1], [22, 0], [1, 1], [7, 1], [2, 1], [3, 1], [3, 1], [9, 1], [5, 1], [4, 0], [2, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 1], [3, 1], [2, 1], [3, 1], [4, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [6, 1], [2, 1], [9, 1], [4, 1], [22, 1], [2, 1], [4, 0], [1, 1], [3, 0], [2, 1], [1, 1], [2, 1], [6, 1], [4, 1], [15, 0], [3, 1], [2, 1], [2, 1], [3, 1], [8, 1], [4, 1], [3, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [6, 0], [1, 1], [1, 1], [7, 1], [1, 1], [2, 1], [2, 1], [4, 1], [1, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [12, 1], [1, 1], [10, 1], [3, 1], [9, 1], [13, 0], [6, 1], [8, 1], [5, 1], [1, 1], [7, 1], [4, 1], [1, 1], [3, 1], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [4, 1], [1, 1], [9, 1], [2, 1], [2, 1], [7, 1], [58, 0], [2, 1], [11, 1], [1, 1], [4, 1], [1, 1], [4, 1], [10, 1], [4, 1], [28, 1], [3, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 0], [1, 1], [1, 1], [1, 1], [1, 1], [6, 0], [5, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [4, 1], [4, 1], [5, 1], [17, 1], [1, 1], [3, 1], [9, 1], [5, 1], [1, 1], [7, 1], [4, 1], [4, 1], [5, 1], [16, 1], [30, 1], [9, 1], [7, 1], [40, 1], [9, 1], [6, 1], [4, 1], [34, 1], [16, 1], [6, 1], [16, 1], [14, 1], [21, 1], [26, 1], [7, 1], [19, 0], [18, 1], [9, 1], [23, 0], [24, 0], [23, 1], [1, 1], [4, 1], [6, 1], [1, 1], [3, 1], [1, 1], [11, 0], [10, 0], [3, 0], [2, 0], [12, 0], [1, 0], [7, 0], [1, 0], [1, 0], [2, 0], [3, 0], [1, 0], [7, 0], [1, 0], [1, 0], [2, 1], [1, 1], [1, 1], [1, 1], [9, 1], [3, 1], [3, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [2, 1], [1, 0], [5, 0], [7, 1], [2, 0], [4, 0], [5, 0], [2, 1], [3, 0], [4, 1], [7, 1], [3, 0], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [5, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [3, 1], [2, 1], [2, 1], [2, 1], [2, 1], [1, 1], [8, 1], [19, 0], [2, 1], [34, 0], [4, 1], [12, 0], [3, 1], [2, 1], [11, 1], [8, 0], [2, 0], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [15, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [8, 0], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 0], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [3, 1], [3, 1], [2, 1], [1, 1], [3, 1], [1, 1], [2, 1], [2, 1], [2, 1], [2, 1], [11, 1], [9, 0], [16, 1], [5, 1], [17, 1], [23, 1], [7, 1], [24, 1], [9, 1], [7, 1], [2, 1], [6, 1], [21, 1], [5, 1], [21, 1], [2, 1], [2, 1], [4, 1], [2, 1], [1, 1], [4, 1], [3, 1], [1, 1], [2, 1], [9, 1], [1, 1], [1, 1], [6, 1], [1, 1], [2, 1], [5, 0], [8, 0], [6, 1], [2, 1], [7, 0], [3, 0], [5, 1], [12, 0], [7, 0], [4, 0], [2, 0], [4, 0], [1, 1], [7, 0], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [3, 1], [1, 1], [8, 1], [7, 1], [2, 1], [27, 1], [7, 1], [7, 0], [1, 0], [6, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 0], [1, 1], [2, 0], [2, 1], [1, 1], [14, 1], [2, 1], [2, 1], [8, 1], [6, 1], [2, 1], [3, 1], [5, 1], [4, 1], [1, 1], [6, 1], [2, 1], [1, 1], [1, 1], [11, 1], [10, 1], [5, 1], [5, 1], [5, 1], [12, 0], [30, 0], [5, 0], [9, 0], [23, 1], [7, 1], [27, 0], [1, 0], [14, 1], [18, 0], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [3, 1], [1, 1], [5, 1], [1, 1], [2, 1], [2, 1], [1, 1], [2, 1], [3, 1], [12, 1], [20, 1], [6, 1], [20, 1], [12, 1], [3, 1], [3, 1], [3, 1], [7, 1], [4, 1], [18, 1], [11, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 0], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [3, 1], [3, 1], [1, 1], [1, 1], [12, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [6, 1], [5, 1], [4, 1], [17, 1], [19, 1], [4, 1], [5, 1], [1, 1], [7, 1], [5, 1], [2, 1], [15, 1], [8, 1], [9, 1], [11, 1], [3, 1], [6, 1], [1, 1], [1, 1], [5, 1], [5, 1], [5, 1], [1, 1], [8, 1], [11, 1], [3, 1], [4, 1], [1, 1], [6, 1], [20, 1], [6, 0], [4, 1], [4, 1], [2, 0], [3, 1], [6, 1], [9, 0], [13, 1], [8, 1], [7, 1], [3, 1], [6, 0], [5, 0], [9, 0], [8, 1], [2, 1], [9, 0], [4, 0], [2, 1], [32, 1], [28, 1], [3, 1], [14, 0], [8, 1], [5, 1], [6, 1], [2, 1], [4, 1], [11, 0], [4, 1], [8, 0], [6, 1], [1, 1], [1, 1], [10, 1], [1, 1], [5, 0], [2, 0], [3, 1], [5, 1], [5, 1], [26, 1], [3, 0], [6, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1], [10, 1], [1, 1], [2, 1], [1, 1], [10, 1], [27, 1], [1, 1], [10, 1], [2, 1], [4, 1], [1, 1], [2, 1], [1, 1], [5, 1], [4, 1], [4, 1], [5, 1], [3, 1], [7, 1], [5, 1], [1, 1], [5, 1], [5, 1], [6, 0], [8, 1], [10, 1], [19, 0], [1, 1], [11, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [9, 1], [2, 1], [4, 1], [2, 1], [6, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [3, 1], [15, 1], [2, 1], [2, 1], [4, 1], [10, 1], [13, 1], [6, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 0], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [1, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [2, 1], [5, 1], [8, 1], [5, 1], [28, 1], [1, 1], [5, 1], [3, 1], [1, 1], [3, 1], [3, 1], [22, 1], [1, 1], [19, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [21, 1], [9, 1], [2, 1], [1, 1], [4, 1], [6, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [16, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [3, 1], [1, 1], [7, 1], [1, 1], [1, 1], [4, 1], [4, 0], [4, 1], [2, 1], [15, 1], [10, 0], [2, 1], [3, 1], [1, 1], [4, 1], [2, 1], [4, 1], [1, 1], [2, 1], [6, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [5, 1], [3, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [2, 1], [2, 1], [9, 1], [19, 1], [4, 1], [26, 1], [8, 1], [6, 1], [1, 1], [7, 1], [29, 1], [17, 1], [11, 1], [14, 1], [1, 1], [2, 0], [1, 1], [1, 0], [1, 1], [3, 1], [1, 1], [1, 1], [5, 1], [6, 1], [2, 1], [3, 1], [11, 1], [2, 1], [3, 1], [6, 1], [7, 1], [7, 1], [2, 1], [4, 1], [4, 1], [6, 1], [10, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [11, 1], [1, 1], [2, 1], [6, 1], [5, 1], [1, 1], [4, 1], [1, 1], [4, 1], [1, 1], [10, 1], [1, 0], [2, 1], [6, 1], [1, 1], [2, 1], [6, 1], [4, 1], [1, 1], [13, 1], [19, 1], [2, 1], [5, 1], [4, 1], [7, 1], [2, 1], [5, 1], [3, 1], [18, 1], [6, 0], [8, 0], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [2, 1], [18, 1], [14, 1], [2, 1], [2, 1], [6, 1], [7, 1], [3, 1], [3, 1], [2, 1], [8, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [7, 1], [1, 1], [2, 1], [2, 1], [3, 1], [3, 1], [1, 1], [4, 1], [3, 1], [2, 1], [5, 1], [1, 1], [1, 1], [1, 1], [8, 1], [11, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [7, 1], [1, 1], [1, 1], [23, 1], [4, 1], [5, 1], [7, 1], [5, 1], [3, 1], [4, 0], [4, 1], [5, 0], [3, 1], [8, 0], [1, 1], [10, 1], [3, 0], [5, 1], [8, 0], [2, 1], [3, 1], [2, 1], [1, 1], [2, 1], [4, 1], [4, 0], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [2, 1], [2, 1], [1, 0], [1, 1], [1, 1], [2, 0], [2, 0], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 1], [4, 1], [3, 1], [2, 1], [3, 1], [2, 1], [3, 1], [9, 1], [2, 1], [3, 1], [3, 1], [2, 1], [1, 1], [10, 1], [3, 1], [10, 0], [2, 1], [2, 1], [1, 1], [3, 1], [3, 1], [1, 1], [2, 1], [2, 1], [8, 0], [1, 1], [1, 1], [6, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [3, 1], [4, 1], [4, 1], [4, 1], [1, 1], [4, 1], [6, 1], [7, 1], [11, 1], [2, 1], [5, 1], [2, 1], [3, 1], [4, 1], [3, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [16, 1], [3, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [9, 1], [15, 1], [7, 1], [5, 1], [11, 1], [3, 1], [2, 1], [2, 1], [2, 1], [14, 1], [4, 1], [11, 1], [2, 1], [2, 1], [4, 1], [6, 1], [2, 1], [18, 1], [4, 1], [6, 1], [4, 1], [2, 1], [6, 1], [6, 1], [3, 1], [2, 1], [1, 1], [5, 1], [4, 1], [14, 1], [9, 1], [24, 1], [24, 1], [14, 1], [7, 1], [1, 1], [2, 1], [8, 1], [31, 1], [16, 1], [14, 1], [10, 1], [19, 1], [2, 1], [3, 1], [1, 1], [2, 1], [4, 1], [2, 1], [3, 1], [2, 1], [1, 1], [8, 1], [4, 1], [8, 1], [2, 1], [1, 1], [4, 1], [12, 1], [3, 1], [2, 1], [6, 1], [9, 0], [1, 1], [5, 1], [7, 1], [3, 1], [12, 1], [9, 1], [28, 1], [7, 1], [5, 1], [11, 0], [1, 1], [3, 1], [9, 1], [1, 1], [40, 1], [12, 1], [3, 0], [11, 1], [15, 0], [19, 0], [3, 1], [39, 0], [1, 0], [2, 0], [2, 1], [15, 0], [18, 1], [3, 1], [2, 1], [1, 1], [1, 1], [6, 1], [4, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [5, 1], [4, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [9, 1], [11, 1], [7, 1], [5, 1], [5, 1], [4, 1], [4, 1], [2, 1], [2, 1], [1, 1], [11, 1], [7, 1], [4, 1], [5, 1], [6, 1], [3, 1], [1, 1], [2, 1], [1, 1], [3, 1], [2, 1], [3, 1], [21, 1], [8, 1], [5, 1], [24, 1], [4, 1], [13, 1], [7, 1], [3, 1], [3, 1], [4, 1], [8, 1], [3, 1], [10, 1], [2, 1], [4, 1], [5, 1], [2, 1], [4, 1], [22, 0], [4, 1], [1, 1], [2, 1], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [10, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [2, 1], [1, 1], [2, 1], [2, 1], [5, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 1], [6, 1], [3, 1], [5, 1], [7, 1], [4, 1], [4, 1], [16, 1], [8, 1], [3, 1], [7, 1], [7, 1], [6, 1], [12, 1], [1, 1], [4, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [35, 1], [7, 1], [1, 1], [1, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [14, 1], [4, 1], [3, 1], [5, 1], [4, 1], [4, 1], [3, 1], [2, 1], [4, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [4, 1], [4, 1], [18, 1], [10, 1], [6, 1], [4, 1], [7, 1], [2, 1], [2, 1], [8, 1], [11, 1], [9, 1], [11, 1], [9, 1], [5, 1], [2, 1], [4, 1], [1, 1], [1, 1], [2, 1], [11, 1], [3, 1], [12, 1], [8, 1], [14, 1], [6, 1], [32, 1], [15, 1], [9, 1], [19, 1], [6, 1], [8, 0], [23, 1], [33, 1], [6, 1], [2, 1], [1, 1], [3, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [9, 1], [8, 0], [11, 0], [4, 0], [5, 1], [8, 1], [11, 1], [5, 0], [8, 1], [6, 1], [1, 0], [3, 0], [1, 0], [4, 0], [1, 0], [2, 0], [1, 0], [5, 0], [4, 0], [1, 0], [11, 0], [1, 0], [3, 1], [10, 1], [10, 1], [7, 1], [3, 1], [4, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [4, 1], [3, 1], [7, 1], [9, 1], [1, 1], [8, 1], [3, 1], [1, 1], [5, 1], [2, 1], [8, 1], [1, 1], [3, 1], [3, 1], [3, 1], [2, 1], [1, 1], [2, 1], [1, 1], [9, 1], [5, 0], [9, 1], [6, 1], [7, 0], [7, 1], [1, 0], [10, 0], [6, 0], [3, 1], [21, 0], [1, 1], [4, 0], [1, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [2, 1], [1, 1], [1, 1], [3, 1], [3, 1], [3, 1], [3, 1], [1, 1], [1, 1], [3, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [2, 1], [2, 1], [2, 1], [2, 1], [2, 1], [4, 1], [3, 1], [3, 1], [8, 1], [3, 1], [5, 1], [3, 1], [4, 1], [11, 1], [7, 1], [2, 1], [5, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [2, 1], [9, 1], [6, 1], [2, 1], [5, 1], [16, 1], [2, 1], [3, 1], [8, 1], [4, 1], [2, 1], [5, 1], [1, 1], [9, 1], [17, 1], [11, 1], [7, 1], [5, 1], [32, 0], [8, 0], [4, 0], [6, 0], [2, 1], [2, 0], [2, 1], [1, 0], [1, 0], [7, 0], [13, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [7, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [6, 1], [3, 1], [1, 1], [4, 1], [7, 1], [2, 1], [1, 1], [2, 1], [2, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [12, 1], [5, 1], [2, 1], [14, 1], [10, 1], [14, 1], [5, 1], [1, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 0], [7, 1], [2, 1], [16, 0], [16, 1], [2, 0], [1, 1], [2, 1], [4, 1], [1, 1], [9, 1], [1, 1], [1, 1], [9, 1], [1, 1], [10, 1], [2, 1], [10, 1], [22, 0], [15, 1], [10, 0], [6, 0], [4, 1], [4, 0], [6, 1], [7, 1], [4, 0], [4, 0], [17, 0], [1, 1], [1, 1], [1, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [7, 1], [1, 1], [3, 1], [28, 1], [2, 1], [2, 1], [17, 1], [1, 1], [1, 1], [4, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1]]\n" + ] + } + ], + "source": [ + "#cargando conjunto de datos\n", + "archivoS19Early=open(\"data/S19/Train/early.csv\")\n", + "\n", + "archivoS19Early.readline()\n", + "\n", + "datos_x=[]\n", + "datos_y=[]\n", + "\n", + "\n", + "for linea in archivoS19Early:\n", + " \n", + " linea=linea.strip(\"\\n\").split(\",\")\n", + " \n", + " subjectID=linea[0]\n", + " assignmentID=linea[1]\n", + " problemID=linea[2]\n", + " attempts=int(linea[3])\n", + " correctEventually=linea[4]\n", + " label=linea[5]\n", + " \n", + " if(label==\"True\"):\n", + " label=1\n", + " else:\n", + " label=0\n", + " \n", + " if(correctEventually==\"True\"):\n", + " correctEventually=1\n", + " else:\n", + " correctEventually=0\n", + " \n", + " fila=[attempts,correctEventually]\n", + " datos_x.append(fila)\n", + " \n", + " datos_y.append(label)\n", + "\n", + "archivoS19Early.close()\n", + "print(datos_x)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "dac41137", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression()" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#entrenando al modelo\n", + "regresionLogistica=sk.LogisticRegression()\n", + "regresionLogistica.fit(datos_x,datos_y)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "eb19bc1e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1, 1], [2, 1], [3, 1], [1, 1], [2, 1], [11, 1], [7, 1], [7, 1], [3, 1], [2, 1], [1, 1], [2, 1], [1, 1], [6, 1], [1, 1], [9, 1], [4, 1], [18, 1], [22, 1], [45, 1], [3, 1], [37, 1], [7, 1], [30, 1], [5, 1], [28, 1], [11, 1], [13, 1], [19, 1], [16, 1], [2, 1], [4, 1], [1, 1], [1, 1], [2, 1], [5, 1], [8, 1], [3, 1], [2, 1], [2, 1], [9, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [11, 1], [1, 1], [2, 1], [2, 1], [31, 1], [1, 1], [6, 1], [17, 1], [1, 1], [24, 1], [22, 1], [10, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [11, 1], [5, 1], [2, 1], [7, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [10, 1], [21, 1], [2, 1], [1, 1], [5, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [12, 1], [6, 1], [4, 1], [7, 1], [6, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [6, 1], [3, 1], [1, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [2, 1], [11, 1], [4, 1], [1, 1], [4, 1], [6, 1], [2, 0], [1, 1], [5, 1], [4, 1], [4, 1], [5, 1], [10, 0], [10, 1], [3, 1], [15, 0], [8, 0], [22, 1], [5, 1], [6, 0], [6, 0], [1, 1], [14, 0], [19, 1], [9, 0], [9, 0], [2, 1], [13, 1], [4, 1], [1, 1], [12, 1], [1, 1], [2, 1], [6, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [2, 0], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [8, 1], [1, 1], [6, 1], [31, 1], [15, 1], [1, 1], [2, 1], [6, 1], [2, 1], [5, 1], [6, 1], [4, 1], [25, 1], [6, 1], [3, 1], [10, 1], [2, 1], [7, 1], [40, 0], [2, 1], [3, 1], [4, 0], [6, 0], [4, 1], [17, 1], [2, 0], [8, 1], [27, 0], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [3, 1], [13, 1], [3, 1], [1, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [5, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [1, 1], [19, 0], [1, 1], [2, 1], [3, 1], [1, 0], [7, 1], [5, 1], [6, 0], [7, 1], [3, 0], [2, 1], [5, 1], [18, 1], [1, 0], [10, 1], [4, 1], [3, 1], [4, 1], [1, 1], [1, 1], [14, 1], [2, 1], [1, 1], [3, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [4, 1], [9, 1], [1, 1], [2, 1], [8, 1], [12, 1], [4, 1], [2, 1], [7, 1], [13, 1], [3, 1], [2, 1], [21, 1], [3, 1], [16, 1], [33, 1], [6, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [4, 1], [2, 1], [3, 1], [1, 1], [3, 1], [3, 1], [37, 1], [14, 1], [16, 1], [18, 1], [9, 0], [7, 1], [4, 1], [6, 1], [11, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 0], [1, 1], [5, 1], [5, 1], [1, 1], [16, 1], [12, 1], [3, 1], [1, 1], [3, 1], [5, 1], [1, 1], [5, 1], [6, 1], [9, 1], [15, 1], [1, 1], [8, 0], [3, 1], [3, 1], [14, 0], [1, 1], [1, 1], [1, 0], [6, 0], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [1, 1], [11, 1], [4, 1], [14, 0], [2, 1], [41, 1], [9, 1], [10, 1], [5, 1], [2, 1], [15, 1], [5, 1], [2, 1], [2, 1], [9, 0], [9, 1], [12, 0], [3, 1], [3, 1], [7, 1], [6, 1], [10, 0], [7, 1], [1, 1], [19, 0], [4, 1], [1, 1], [10, 0], [7, 1], [12, 1], [6, 1], [7, 1], [2, 1], [1, 1], [2, 1], [3, 1], [1, 1], [22, 1], [32, 1], [3, 1], [3, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [2, 1], [3, 1], [2, 1], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [8, 1], [1, 1], [4, 1], [3, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [11, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 0], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [10, 0], [8, 1], [3, 1], [4, 1], [16, 1], [11, 1], [17, 0], [17, 0], [13, 0], [4, 1], [11, 1], [5, 1], [3, 0], [18, 0], [8, 1], [11, 0], [1, 0], [4, 0], [9, 0], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [6, 0], [9, 0], [1, 0], [3, 0], [14, 0], [1, 1], [1, 1], [3, 1], [1, 1], [18, 1], [3, 1], [2, 1], [4, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [11, 1], [2, 1], [1, 1], [4, 1], [3, 1], [1, 1], [5, 1], [2, 1], [1, 1], [1, 1], [4, 1], [16, 1], [1, 1], [1, 1], [5, 1], [1, 1], [3, 1], [3, 1], [4, 1], [3, 1], [8, 1], [2, 1], [6, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [11, 1], [8, 1], [1, 1], [13, 1], [3, 1], [2, 1], [3, 1], [4, 1], [1, 1], [2, 1], [7, 1], [9, 1], [5, 1], [1, 1], [17, 1], [19, 1], [4, 1], [21, 1], [2, 1], [3, 1], [1, 1], [12, 1], [18, 0], [4, 1], [48, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [9, 1], [4, 1], [1, 1], [2, 1], [1, 1], [5, 1], [5, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [3, 1], [3, 1], [1, 1], [5, 1], [3, 1], [2, 1], [4, 1], [1, 1], [4, 1], [1, 1], [4, 1], [3, 0], [4, 1], [3, 1], [1, 1], [5, 1], [9, 0], [18, 0], [6, 1], [4, 1], [2, 0], [9, 0], [2, 1], [16, 0], [5, 1], [3, 0], [9, 1], [6, 1], [5, 1], [3, 1], [6, 1], [1, 1], [11, 1], [10, 1], [2, 1], [2, 1], [1, 1], [4, 1], [1, 1], [6, 1], [2, 1], [3, 1], [3, 1], [1, 1], [9, 1], [6, 1], [1, 1], [3, 1], [3, 1], [3, 1], [6, 0], [1, 1], [4, 1], [2, 1], [7, 1], [4, 1], [1, 1], [17, 1], [6, 1], [11, 1], [4, 1], [3, 1], [9, 1], [8, 1], [1, 1], [2, 1], [14, 1], [16, 1], [1, 1], [7, 1], [1, 1], [1, 1], [10, 1], [3, 1], [4, 1], [6, 1], [8, 1], [2, 1], [5, 1], [11, 1], [4, 1], [1, 1], [9, 1], [6, 1], [3, 1], [1, 1], [2, 1], [6, 0], [6, 0], [6, 0], [10, 0], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [6, 1], [4, 1], [8, 1], [6, 1], [11, 1], [7, 1], [3, 1], [34, 1], [16, 1], [17, 1], [5, 1], [4, 1], [7, 1], [6, 1], [16, 1], [5, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [6, 1], [2, 1], [1, 1], [1, 1], [6, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [5, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [1, 1], [3, 1], [2, 1], [3, 1], [3, 1], [5, 1], [1, 1], [17, 1], [4, 1], [4, 1], [4, 1], [5, 1], [7, 1], [2, 1], [5, 1], [3, 1], [2, 1], [6, 1], [1, 1], [6, 1], [2, 1], [7, 0], [3, 1], [1, 1], [2, 1], [4, 1], [2, 1], [2, 1], [4, 1], [3, 1], [3, 1], [5, 0], [4, 1], [1, 1], [2, 1], [7, 1], [3, 1], [8, 1], [6, 1], [3, 1], [3, 1], [4, 1], [2, 1], [1, 1], [4, 1], [1, 1], [2, 1], [6, 1], [3, 1], [4, 1], [9, 0], [14, 0], [16, 1], [8, 1], [33, 0], [5, 1], [14, 1], [8, 1], [1, 1], [1, 1], [12, 0], [2, 0], [16, 1], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [5, 1], [2, 1], [2, 1], [5, 1], [11, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [5, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [9, 1], [3, 1], [8, 1], [4, 1], [6, 1], [7, 1], [2, 1], [1, 1], [2, 1], [2, 1], [2, 1], [9, 1], [2, 1], [3, 1], [3, 1], [6, 1], [2, 1], [6, 1], [2, 1], [1, 1], [4, 1], [1, 1], [4, 1], [8, 1], [2, 1], [5, 1], [4, 1], [55, 1], [8, 1], [1, 1], [5, 1], [7, 1], [5, 1], [11, 1], [14, 1], [4, 1], [4, 1], [2, 1], [1, 1], [17, 1], [2, 1], [8, 1], [4, 1], [14, 1], [23, 1], [5, 1], [18, 1], [5, 1], [7, 1], [1, 1], [6, 1], [1, 1], [10, 1], [1, 1], [20, 1], [24, 1], [6, 1], [53, 1], [37, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 0], [1, 1], [1, 1], [3, 0], [1, 0], [1, 1], [10, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [2, 1], [2, 1], [1, 1], [3, 1], [17, 1], [4, 1], [3, 1], [5, 1], [9, 1], [12, 1], [1, 1], [13, 1], [6, 1], [8, 1], [3, 1], [5, 1], [18, 1], [4, 1], [5, 1], [6, 1], [3, 1], [2, 1], [1, 1], [2, 1], [25, 1], [2, 1], [2, 1], [2, 1], [1, 1], [8, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [4, 1], [5, 1], [52, 1], [6, 1], [46, 1], [6, 1], [8, 1], [6, 1], [24, 0], [2, 1], [15, 1], [14, 1], [16, 1], [42, 1], [8, 1], [4, 1], [2, 1], [3, 1], [2, 1], [6, 1], [9, 1], [5, 1], [10, 1], [1, 1], [2, 1], [2, 1], [5, 1], [2, 1], [1, 1], [4, 1], [7, 1], [3, 1], [3, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [4, 1], [10, 1], [1, 1], [4, 1], [2, 1], [4, 1], [1, 1], [1, 1], [6, 1], [7, 1], [1, 1], [2, 1], [15, 1], [10, 1], [4, 1], [10, 0], [9, 0], [11, 0], [6, 1], [24, 0], [1, 0], [6, 0], [11, 1], [2, 1], [4, 1], [1, 1], [1, 1], [11, 1], [8, 1], [6, 1], [1, 1], [3, 1], [5, 1], [3, 1], [3, 1], [1, 0], [4, 1], [8, 0], [3, 0], [7, 1], [16, 0], [15, 1], [9, 0], [3, 1], [2, 1], [5, 1], [10, 1], [18, 1], [5, 1], [6, 1], [5, 1], [27, 1], [3, 1], [20, 0], [21, 1], [35, 0], [8, 1], [11, 0], [3, 1], [2, 1], [6, 1], [1, 1], [1, 1], [8, 1], [1, 1], [9, 1], [2, 1], [8, 0], [2, 1], [1, 1], [2, 1], [4, 1], [1, 1], [4, 1], [6, 1], [6, 1], [1, 1], [2, 1], [10, 0], [1, 1], [3, 1], [3, 1], [1, 1], [1, 1], [3, 1], [5, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [7, 1], [2, 1], [1, 1], [2, 1], [4, 1], [1, 1], [7, 1], [7, 1], [3, 1], [1, 1], [3, 1], [4, 1], [3, 1], [4, 1], [1, 1], [8, 1], [10, 1], [3, 1], [10, 1], [8, 1], [9, 1], [10, 1], [6, 1], [3, 1], [4, 1], [2, 1], [1, 1], [2, 1], [2, 1], [4, 1], [6, 1], [6, 1], [4, 1], [7, 1], [1, 1], [3, 1], [2, 1], [3, 1], [1, 1], [1, 1], [3, 1], [3, 1], [2, 1], [1, 1], [1, 1], [3, 1], [3, 1], [1, 1], [16, 1], [3, 1], [5, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [2, 1], [9, 1], [2, 1], [4, 1], [11, 1], [21, 1], [7, 1], [14, 1], [7, 1], [7, 1], [3, 1], [2, 1], [26, 1], [18, 1], [20, 1], [11, 1], [1, 1], [1, 1], [2, 1], [1, 1], [19, 1], [2, 1], [1, 1], [8, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [5, 1], [4, 1], [1, 1], [12, 1], [11, 1], [2, 1], [6, 1], [12, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [12, 1], [5, 1], [1, 1], [3, 1], [18, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [6, 1], [1, 1], [8, 1], [1, 1], [3, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [5, 1], [1, 1], [1, 1], [7, 1], [4, 1], [8, 1], [4, 1], [2, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [6, 1], [2, 1], [7, 1], [1, 1], [7, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [12, 1], [7, 1], [5, 1], [8, 0], [11, 1], [4, 1], [3, 1], [1, 0], [1, 1], [9, 1], [1, 1], [2, 1], [6, 1], [9, 0], [12, 0], [2, 1], [3, 1], [6, 1], [4, 1], [4, 1], [4, 1], [2, 1], [1, 1], [12, 1], [3, 1], [14, 1], [10, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [12, 1], [1, 1], [2, 1], [14, 1], [2, 1], [3, 1], [3, 1], [3, 1], [4, 1], [1, 1], [2, 1], [3, 1], [1, 1], [11, 1], [4, 1], [1, 1], [2, 1], [9, 1], [9, 1], [8, 1], [5, 1], [10, 1], [9, 0], [2, 1], [1, 1], [8, 1], [3, 1], [2, 1], [5, 1], [2, 1], [8, 1], [4, 1], [2, 1], [4, 1], [11, 1], [7, 1], [1, 1], [8, 1], [6, 1], [17, 1], [6, 1], [4, 0], [5, 1], [8, 1], [9, 0], [5, 1], [8, 1], [37, 0], [13, 0], [4, 0], [10, 0], [5, 0], [11, 0], [3, 1], [3, 1], [1, 1], [6, 1], [2, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [4, 1], [16, 1], [12, 1], [7, 0], [5, 1], [6, 0], [12, 0], [2, 1], [14, 1], [10, 1], [7, 1], [2, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [7, 1], [12, 1], [6, 1], [8, 1], [21, 0], [7, 1], [23, 1], [10, 1], [2, 1], [3, 1], [6, 1], [2, 1], [12, 1], [13, 1], [15, 1], [4, 1], [1, 1], [21, 1], [6, 1], [8, 1], [39, 1], [1, 1], [4, 1], [9, 1], [1, 1], [1, 1], [1, 1], [4, 1], [4, 1], [1, 1], [3, 1], [1, 1], [2, 1], [3, 1], [6, 0], [1, 1], [3, 0], [2, 1], [1, 1], [9, 1], [3, 1], [1, 1], [4, 1], [3, 0], [1, 1], [2, 1], [3, 1], [3, 1], [1, 1], [2, 1], [5, 1], [1, 1], [1, 1], [8, 1], [6, 1], [1, 1], [1, 1], [1, 1], [10, 1], [1, 1], [7, 1], [4, 1], [9, 1], [6, 1], [5, 1], [2, 1], [2, 1], [9, 1], [8, 1], [11, 1], [1, 1], [2, 1], [12, 1], [1, 1], [3, 1], [2, 1], [3, 1], [5, 1], [4, 1], [4, 1], [1, 1], [2, 1], [3, 1], [15, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [8, 1], [2, 1], [8, 1], [7, 1], [3, 1], [3, 1], [22, 1], [1, 1], [4, 1], [7, 1], [4, 1], [3, 1], [4, 1], [1, 1], [1, 1], [16, 1], [1, 1], [3, 1], [7, 1], [1, 1], [6, 1], [16, 1], [3, 1], [2, 1], [6, 1], [2, 1], [4, 1], [11, 1], [8, 1], [2, 1], [3, 1], [31, 1], [5, 1], [12, 1], [2, 1], [5, 1], [7, 1], [1, 1], [12, 1], [3, 1], [1, 1], [1, 1], [7, 1], [13, 1], [4, 1], [6, 1], [21, 1], [3, 1], [1, 1], [6, 1], [2, 1], [3, 1], [2, 1], [3, 1], [2, 1], [4, 1], [3, 1], [2, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [4, 1], [1, 1], [7, 1], [9, 1], [22, 1], [2, 1], [12, 0], [9, 1], [3, 1], [20, 1], [14, 1], [22, 1], [21, 1], [2, 1], [8, 1], [7, 1], [2, 1], [1, 1], [1, 1], [1, 1], [6, 1], [3, 1], [4, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [19, 1], [2, 1], [1, 1], [6, 1], [23, 1], [2, 1], [8, 1], [5, 1], [1, 1], [8, 1], [16, 1], [20, 1], [13, 1], [15, 1], [12, 1], [8, 1], [1, 1], [2, 1], [2, 1], [5, 1], [2, 1], [7, 1], [3, 1], [3, 1], [1, 1], [9, 1], [9, 0], [6, 0], [4, 1], [1, 1], [7, 1], [11, 1], [1, 1], [4, 1], [6, 1], [6, 1], [2, 1], [5, 1], [3, 1], [6, 0], [9, 0], [1, 0], [6, 1], [7, 0], [11, 1], [3, 1], [6, 0], [20, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [3, 1], [3, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [5, 1], [13, 1], [8, 1], [26, 1], [1, 1], [3, 1], [7, 1], [1, 1], [6, 1], [7, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [3, 1], [4, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [11, 1], [7, 1], [3, 1], [8, 1], [13, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 1], [1, 1], [4, 1], [3, 1], [11, 1], [18, 1], [41, 1], [4, 1], [4, 1], [6, 1], [2, 1], [1, 1], [8, 1], [4, 1], [1, 1], [19, 1], [20, 1], [21, 1], [11, 1], [10, 1], [35, 0], [71, 1], [3, 1], [10, 0], [11, 0], [32, 0], [8, 1], [4, 1], [55, 1], [2, 1], [3, 1], [8, 1], [6, 1], [4, 1], [2, 1], [1, 1], [13, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [5, 1], [3, 1], [2, 1], [8, 1], [3, 1], [3, 1], [6, 1], [7, 1], [10, 1], [19, 1], [5, 1], [7, 0], [5, 1], [9, 1], [4, 1], [1, 1], [15, 0], [5, 1], [21, 1], [2, 1], [2, 1], [2, 1], [3, 1], [1, 1], [11, 1], [9, 1], [12, 1], [3, 1], [4, 1], [2, 1], [2, 1], [3, 1], [17, 1], [9, 1], [9, 1], [49, 1], [5, 1], [10, 1], [93, 1], [19, 1], [10, 1], [14, 1], [28, 1], [51, 1], [39, 1], [11, 1], [3, 1], [13, 1], [19, 1], [21, 1], [3, 1], [2, 1], [2, 1], [1, 1], [23, 1], [1, 1], [4, 1], [2, 1], [7, 1], [3, 1], [3, 1], [10, 1], [3, 1], [2, 1], [3, 1], [2, 1], [6, 1], [2, 1], [11, 1], [6, 1], [13, 1], [4, 1], [2, 1], [2, 1], [5, 1], [6, 1], [13, 1], [2, 1], [9, 1], [21, 1], [14, 0], [3, 1], [1, 0], [6, 1], [2, 1], [2, 1], [2, 1], [4, 0], [3, 0], [4, 0], [2, 1], [2, 1], [11, 1], [1, 1], [10, 1], [2, 1], [1, 1], [2, 1], [4, 1], [9, 1], [1, 1], [4, 1], [1, 1], [4, 1], [10, 1], [7, 1], [1, 1], [5, 1], [10, 1], [25, 1], [7, 1], [5, 1], [8, 1], [4, 1], [1, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [9, 0], [2, 1], [3, 0], [4, 1], [1, 1], [1, 1], [11, 1], [1, 1], [1, 1], [4, 1], [1, 1], [3, 1], [6, 1], [2, 1], [13, 1], [6, 1], [3, 0], [10, 0], [8, 1], [28, 1], [10, 1], [13, 0], [3, 0], [3, 0], [19, 0], [10, 0], [9, 1], [17, 1], [3, 1], [2, 1], [2, 1], [1, 1], [8, 1], [4, 1], [1, 1], [2, 1], [5, 1], [1, 1], [3, 1], [6, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [7, 1], [9, 0], [18, 0], [4, 1], [27, 0], [6, 1], [9, 0], [1, 1], [2, 1], [11, 1], [17, 1], [12, 0], [7, 1], [3, 1], [6, 1], [7, 1], [2, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [5, 1], [3, 1], [4, 1], [6, 1], [1, 1], [6, 1], [1, 1], [5, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [5, 1], [3, 1], [1, 1], [5, 1], [1, 1], [1, 1], [5, 1], [1, 1], [3, 1], [1, 1], [3, 1], [1, 1], [2, 1], [2, 1], [9, 0], [1, 1], [7, 1], [1, 1], [2, 1], [3, 1], [1, 1], [3, 1], [1, 1], [19, 1], [2, 1], [3, 1], [4, 1], [17, 1], [7, 1], [1, 1], [5, 1], [1, 1], [6, 1], [15, 1], [6, 1], [7, 1], [7, 1], [8, 1], [6, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [7, 1], [4, 1], [1, 1], [6, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [22, 1], [6, 1], [1, 1], [5, 1], [1, 1], [9, 1], [1, 1], [1, 1], [2, 1], [3, 1], [2, 1], [13, 1], [4, 1], [3, 1], [3, 1], [12, 1], [1, 1], [14, 1], [6, 1], [12, 1], [2, 1], [7, 1], [12, 1], [5, 1], [9, 1], [10, 1], [2, 1], [6, 1], [10, 1], [1, 1], [18, 1], [6, 1], [2, 1], [3, 1], [3, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [7, 1], [14, 1], [2, 1], [14, 1], [19, 1], [28, 1], [2, 1], [6, 1], [21, 1], [5, 1], [7, 1], [8, 1], [2, 1], [43, 1], [35, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [3, 1], [2, 1], [3, 1], [4, 1], [6, 1], [3, 1], [19, 0], [2, 1], [4, 1], [2, 1], [2, 1], [7, 1], [3, 1], [5, 1], [2, 1], [2, 1], [2, 1], [1, 1], [5, 1], [5, 1], [2, 1], [3, 1], [1, 1], [6, 1], [19, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [13, 1], [5, 0], [1, 1], [2, 1], [1, 1], [2, 1], [3, 1], [3, 1], [6, 1], [3, 1], [5, 1], [7, 0], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [4, 1], [1, 1], [4, 1], [11, 1], [1, 1], [1, 1], [2, 1], [4, 1], [7, 1], [5, 1], [3, 1], [12, 1], [2, 1], [2, 1], [11, 1], [5, 1], [36, 1], [16, 1], [10, 1], [24, 0], [12, 1], [42, 1], [3, 1], [1, 1], [7, 1], [13, 0], [9, 0], [10, 1], [3, 1], [5, 1], [1, 1], [10, 1], [7, 1], [8, 1], [1, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [5, 1], [1, 1], [8, 1], [3, 1], [1, 1], [2, 1], [1, 0], [2, 1], [3, 1], [1, 1], [2, 1], [1, 1], [6, 0], [3, 1], [8, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [5, 1], [1, 1], [10, 1], [2, 1], [5, 1], [4, 1], [6, 1], [12, 1], [16, 0], [5, 1], [11, 0], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [4, 1], [1, 1], [1, 1], [4, 1], [37, 1], [1, 1], [1, 1], [18, 1], [3, 1], [4, 1], [1, 1], [1, 1], [2, 1], [9, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [40, 1], [5, 1], [1, 1], [1, 1], [2, 1], [1, 1], [5, 1], [14, 1], [15, 1], [14, 1], [3, 1], [4, 1], [1, 0], [4, 1], [5, 1], [11, 1], [2, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [5, 1], [2, 1], [3, 1], [9, 0], [15, 0], [3, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [9, 1], [3, 1], [10, 1], [4, 1], [2, 1], [1, 1], [4, 1], [1, 1], [16, 1], [1, 1], [1, 1], [4, 1], [4, 1], [4, 1], [1, 1], [3, 1], [1, 1], [1, 1], [8, 1], [3, 1], [1, 1], [1, 1], [4, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 0], [1, 1], [3, 0], [7, 1], [8, 0], [10, 0], [1, 1], [5, 0], [2, 0], [2, 1], [5, 1], [1, 1], [4, 1], [2, 1], [1, 1], [9, 1], [7, 1], [8, 1], [3, 1], [4, 1], [7, 1], [1, 1], [4, 1], [6, 1], [3, 1], [1, 1], [6, 1], [11, 1], [3, 1], [1, 1], [2, 1], [2, 1], [15, 1], [2, 1], [4, 1], [1, 1], [1, 1], [5, 1], [11, 1], [2, 1], [5, 1], [18, 1], [3, 1], [3, 1], [2, 1], [1, 1], [10, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [6, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 0], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [10, 0], [3, 1], [2, 1], [7, 0], [3, 1], [1, 1], [1, 1], [3, 1], [3, 1], [2, 1], [10, 1], [6, 0], [2, 0], [3, 1], [9, 0], [6, 0], [4, 1], [1, 1], [4, 1], [3, 1], [1, 1], [3, 1], [12, 0], [1, 1], [9, 0], [7, 0], [6, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [3, 1], [3, 1], [9, 1], [8, 0], [12, 0], [8, 0], [3, 1], [13, 0], [9, 1], [7, 0], [3, 0], [10, 1], [6, 0], [5, 0], [1, 0], [1, 0], [1, 0], [2, 0], [1, 0], [3, 0], [1, 0], [1, 1], [2, 1], [2, 1], [1, 1], [7, 1], [5, 1], [1, 1], [1, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [2, 1], [6, 1], [1, 1], [1, 1], [4, 1], [4, 1], [1, 1], [1, 1], [3, 1], [3, 1], [2, 1], [3, 1], [1, 1], [2, 1], [1, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [5, 1], [2, 1], [4, 1], [14, 1], [15, 1], [1, 1], [2, 1], [7, 1], [3, 1], [1, 1], [3, 1], [13, 1], [7, 1], [8, 1], [3, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [4, 1], [7, 1], [3, 1], [2, 1], [1, 1], [3, 1], [4, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [2, 1], [11, 1], [3, 1], [2, 1], [10, 1], [5, 1], [6, 1], [3, 1], [1, 1], [14, 1], [3, 1], [13, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [15, 1], [8, 1], [4, 0], [2, 1], [2, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [6, 1], [6, 1], [12, 1], [5, 1], [5, 0], [10, 1], [1, 1], [13, 0], [6, 0], [4, 0], [5, 0], [2, 1], [6, 0], [9, 1], [7, 1], [10, 1], [1, 1], [12, 1], [8, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [2, 1], [2, 1], [3, 1], [3, 1], [3, 1], [2, 1], [9, 1], [3, 1], [1, 1], [9, 1], [14, 1], [4, 1], [12, 1], [4, 1], [19, 1], [5, 1], [2, 1], [12, 1], [3, 1], [7, 1], [5, 1], [1, 1], [6, 1], [11, 1], [2, 1], [4, 1], [2, 1], [1, 1], [3, 1], [1, 1], [6, 1], [1, 1], [4, 1], [2, 1], [9, 1], [13, 1], [3, 1], [7, 1], [1, 1], [7, 1], [26, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [2, 1], [1, 1], [3, 1], [2, 1], [2, 1], [5, 1], [4, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [4, 1], [2, 1], [4, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [3, 0], [2, 1], [2, 1], [9, 0], [3, 0], [2, 0], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [6, 0], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [4, 1], [7, 1], [3, 1], [1, 1], [5, 1], [10, 1], [6, 1], [16, 1], [2, 1], [3, 1], [5, 1], [7, 1], [8, 1], [4, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [19, 1], [9, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [2, 1], [8, 1], [21, 1], [9, 1], [3, 1], [8, 1], [9, 1], [1, 1], [5, 1], [2, 1], [4, 1], [9, 1], [7, 1], [8, 1], [6, 1], [11, 1], [33, 1], [9, 1], [21, 1], [9, 1], [2, 1], [6, 1], [7, 1], [60, 1], [10, 1], [38, 1], [5, 1], [2, 1], [1, 1], [1, 1], [2, 1], [7, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [9, 1], [2, 1], [1, 1], [1, 1], [7, 0], [1, 1], [5, 0], [4, 1], [1, 1], [7, 1], [1, 1], [4, 1], [9, 0], [13, 1], [14, 1], [6, 0], [12, 0], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [5, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [8, 1], [18, 0], [5, 1], [6, 1], [5, 1], [27, 0], [3, 1], [14, 0], [11, 1], [2, 1], [3, 1], [26, 1], [5, 1], [6, 1], [15, 1], [1, 1], [5, 1], [1, 1], [1, 1], [1, 1], [10, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [11, 1], [1, 1], [2, 1], [25, 1], [17, 1], [2, 1], [1, 1], [8, 1], [8, 1], [9, 1], [14, 1], [10, 1], [3, 1], [1, 1], [8, 1], [14, 1], [34, 1], [21, 0], [13, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [4, 1], [5, 0], [8, 1], [1, 1], [3, 1], [35, 0], [3, 1], [3, 1], [7, 0], [3, 1], [7, 0], [2, 1], [1, 1], [2, 0], [2, 1], [2, 1], [4, 0], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [9, 1], [1, 1], [1, 1], [8, 1], [18, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [4, 1], [2, 1], [1, 1], [7, 1], [9, 1], [3, 1], [1, 1], [4, 1], [3, 1], [1, 1], [4, 1], [1, 1], [2, 1], [5, 1], [6, 1], [6, 1], [1, 1], [8, 1], [5, 1], [3, 1], [5, 1], [15, 1], [5, 1], [2, 1], [7, 1], [4, 1], [3, 1], [3, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [6, 1], [5, 1], [1, 0], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [6, 1], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [14, 1], [7, 1], [5, 1], [5, 1], [24, 1], [1, 1], [5, 1], [7, 1], [1, 1], [1, 1], [5, 1], [3, 1], [6, 1], [8, 1], [1, 1], [1, 1], [2, 1], [6, 1], [5, 1], [4, 1], [2, 1], [2, 1], [4, 1], [3, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 0], [3, 1], [10, 1], [36, 1], [3, 1], [8, 0], [3, 1], [9, 0], [8, 0], [1, 0], [5, 0], [6, 1], [14, 0], [7, 1], [1, 1], [8, 0], [18, 0], [6, 1], [12, 1], [14, 0], [21, 0], [5, 1], [2, 1], [1, 1], [2, 1], [1, 1], [4, 1], [6, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [4, 1], [1, 1], [5, 1], [4, 1], [1, 1], [2, 1], [4, 1], [13, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [6, 1], [4, 1], [4, 1], [2, 1], [8, 1], [3, 1], [6, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [5, 0], [6, 0], [2, 1], [4, 1], [15, 1], [8, 1], [10, 1], [5, 1], [1, 1], [3, 1], [3, 1], [4, 1], [5, 0], [6, 1], [7, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [7, 1], [4, 1], [2, 1], [6, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [4, 1], [3, 0], [2, 1], [2, 1], [20, 1], [29, 1], [1, 1], [8, 1], [6, 1], [17, 1], [1, 1], [2, 1], [1, 1], [6, 1], [20, 1], [2, 1], [8, 1], [5, 1], [4, 0], [17, 0], [15, 1], [3, 0], [7, 0], [6, 0], [1, 1], [31, 0], [11, 0], [7, 0], [3, 0], [6, 1], [6, 1], [2, 1], [3, 1], [10, 1], [9, 1], [3, 1], [3, 1], [9, 1], [3, 1], [2, 1], [4, 1], [1, 1], [7, 1], [26, 1], [3, 1], [5, 1], [3, 1], [4, 0], [3, 1], [3, 1], [3, 1], [4, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 1], [1, 1], [1, 1], [3, 1], [32, 1], [1, 1], [2, 1], [21, 1], [51, 1], [9, 1], [7, 1], [8, 1], [18, 1], [13, 1], [10, 1], [1, 1], [5, 1], [6, 1], [9, 0], [3, 1], [2, 1], [3, 0], [16, 0], [3, 1], [8, 1], [6, 1], [4, 0], [7, 0], [14, 0], [8, 1], [9, 1], [12, 1], [6, 0], [22, 0], [18, 1], [6, 1], [3, 1], [10, 1], [10, 1], [14, 1], [35, 1], [12, 1], [4, 1], [58, 1], [2, 1], [4, 1], [11, 1], [30, 1], [11, 1], [2, 1], [13, 1], [5, 1], [30, 1], [5, 1], [1, 1], [6, 1], [7, 1], [3, 1], [11, 1], [12, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [10, 1], [1, 1], [1, 1], [3, 1], [5, 1], [12, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [12, 1], [5, 1], [1, 1], [44, 1], [3, 1], [8, 1], [1, 1], [5, 1], [4, 1], [9, 1], [5, 1], [2, 1], [5, 1], [4, 1], [35, 1], [2, 1], [19, 1], [67, 0], [26, 1], [11, 1], [66, 0], [14, 1], [24, 1], [11, 1], [2, 1], [11, 1], [15, 1], [14, 1], [10, 1], [2, 1], [5, 1], [6, 1], [2, 1], [4, 1], [20, 1], [1, 1], [5, 1], [25, 1], [10, 1], [1, 1], [8, 1], [4, 1], [12, 1], [5, 1], [3, 1], [23, 1], [2, 1], [7, 1], [11, 0], [4, 1], [5, 1], [5, 1], [1, 1], [19, 0], [1, 1], [10, 1], [12, 1], [3, 1], [2, 1], [2, 1], [1, 1], [8, 1], [1, 1], [9, 1], [5, 1], [5, 1], [2, 1], [9, 1], [3, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [2, 1], [2, 1], [5, 1], [1, 1], [4, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [15, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [6, 1], [3, 1], [4, 1], [2, 1], [1, 1], [1, 0], [1, 1], [10, 1], [2, 1], [10, 0], [4, 1], [5, 1], [7, 0], [5, 1], [4, 1], [8, 1], [3, 1], [3, 1], [2, 1], [1, 1], [4, 1], [3, 1], [1, 1], [1, 1], [3, 1], [2, 1], [7, 1], [5, 1], [8, 1], [1, 1], [39, 1], [16, 1], [2, 1], [3, 1], [16, 1], [13, 1], [1, 1], [6, 1], [2, 1], [2, 1], [5, 1], [3, 1], [3, 1], [11, 1], [9, 1], [1, 1], [1, 1], [3, 1], [4, 1], [2, 1], [12, 1], [4, 1], [4, 1], [2, 1], [6, 1], [2, 1], [1, 1], [1, 1], [3, 1], [2, 1], [2, 1], [11, 1], [2, 1], [21, 1], [16, 1], [6, 1], [2, 1], [2, 1], [1, 1], [8, 1], [1, 1], [1, 1], [2, 0], [12, 0], [19, 0], [1, 1], [11, 0], [8, 0], [3, 1], [3, 1], [3, 1], [15, 1], [5, 1], [5, 1], [4, 1], [2, 1], [6, 1], [13, 0], [2, 0], [2, 1], [8, 1], [4, 0], [6, 0], [3, 1], [2, 1], [1, 1], [1, 1], [5, 1], [2, 1], [5, 1], [1, 1], [3, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [4, 1], [2, 1], [4, 1], [11, 1], [19, 1], [2, 1], [27, 1], [4, 1], [3, 1], [13, 1], [13, 1], [15, 1], [7, 1], [8, 0], [9, 1], [1, 1], [3, 1], [1, 1], [6, 1], [4, 1], [4, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [5, 1], [4, 1], [3, 1], [2, 1], [3, 1], [1, 1], [1, 1], [2, 1], [3, 1], [8, 1], [5, 1], [2, 1], [2, 1], [1, 1], [4, 0], [8, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [7, 0], [5, 1], [6, 1], [4, 1], [1, 1], [2, 1], [9, 1], [3, 1], [12, 1], [5, 1], [4, 1], [3, 1], [1, 1], [11, 1], [1, 1], [1, 1], [1, 1], [1, 1], [10, 1], [3, 1], [2, 1], [2, 1], [9, 1], [26, 1], [2, 1], [11, 1], [1, 1], [7, 1], [4, 1], [10, 1], [25, 1], [40, 1], [25, 1], [6, 1], [2, 1], [3, 1], [2, 1], [1, 1], [5, 1], [16, 1], [1, 1], [12, 1], [4, 1], [6, 1], [12, 1], [10, 1], [6, 1], [15, 0], [2, 1], [8, 1], [21, 1], [3, 1], [10, 1], [21, 1], [3, 1], [8, 1], [6, 1], [1, 1], [8, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [2, 1], [3, 1], [7, 1], [2, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [6, 1], [8, 1], [17, 1], [2, 1], [3, 1], [1, 1], [14, 1], [8, 1], [3, 1], [9, 1], [32, 1], [7, 1], [7, 1], [3, 1], [2, 1], [43, 1], [5, 1], [2, 1], [26, 1], [3, 1], [3, 1], [3, 1], [30, 1], [14, 1], [3, 1], [5, 1], [7, 1], [6, 1], [12, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [2, 1], [1, 1], [5, 1], [1, 1], [1, 1], [1, 1], [8, 1], [2, 1], [5, 1], [10, 1], [5, 1], [3, 1], [2, 1], [1, 0], [9, 1], [3, 1], [18, 1], [4, 1], [3, 1], [1, 1], [3, 1], [9, 1], [1, 1], [2, 1], [20, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [3, 1], [1, 1], [3, 1], [7, 1], [1, 1], [2, 1], [2, 1], [4, 1], [1, 1], [6, 1], [6, 1], [8, 1], [6, 1], [2, 1], [7, 1], [2, 1], [3, 1], [5, 1], [3, 1], [5, 1], [1, 1], [6, 1], [7, 1], [3, 1], [20, 1], [12, 1], [16, 1], [11, 1], [9, 1], [4, 1], [8, 1], [26, 1], [29, 1], [12, 1], [4, 1], [7, 1], [9, 1], [6, 1], [2, 1], [2, 0], [4, 1], [1, 1], [29, 0], [44, 0], [2, 1], [7, 1], [3, 1], [3, 1], [1, 1], [4, 1], [1, 1], [8, 1], [9, 0], [13, 1], [21, 1], [8, 1], [26, 0], [17, 1], [16, 1], [10, 1], [3, 1], [1, 0], [9, 1], [6, 1], [4, 1], [15, 1], [11, 1], [13, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [7, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 0], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [6, 1], [1, 1], [2, 1], [4, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [6, 1], [3, 1], [1, 1], [1, 1], [9, 1], [1, 1], [5, 1], [2, 1], [13, 0], [1, 1], [5, 1], [9, 1], [2, 1], [6, 1], [1, 1], [2, 1], [2, 1], [2, 1], [2, 1], [4, 1], [14, 1], [3, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [10, 1], [1, 1], [1, 1], [2, 1], [1, 0], [1, 1], [1, 1], [7, 1], [1, 1], [2, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [28, 1], [6, 1], [7, 1], [2, 1], [5, 1], [2, 1], [10, 1], [6, 1], [7, 1], [1, 1], [11, 1], [5, 1], [1, 1], [6, 1], [10, 1], [12, 1], [1, 1], [2, 1], [4, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [2, 1], [1, 1], [6, 1], [1, 1], [2, 1], [4, 1], [3, 1], [9, 1], [1, 1], [6, 1], [1, 1], [17, 1], [4, 1], [1, 1], [1, 1], [2, 1], [5, 1], [9, 1], [4, 1], [1, 1], [3, 1], [1, 1], [6, 1], [4, 1], [7, 1], [1, 1], [1, 1], [4, 1], [2, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [2, 1], [3, 1], [5, 1], [3, 1], [2, 1], [6, 1], [2, 1], [4, 1], [5, 1], [3, 1], [2, 1], [1, 1], [6, 1], [5, 1], [8, 1], [6, 0], [3, 1], [1, 1], [4, 1], [2, 1], [2, 1], [3, 0], [2, 1], [9, 1], [2, 1], [2, 1], [4, 1], [1, 1], [35, 1], [5, 1], [3, 1], [2, 1], [2, 1], [3, 1], [2, 1], [2, 1], [1, 1], [4, 1], [16, 1], [8, 1], [14, 1], [3, 1], [6, 1], [12, 1], [7, 1], [61, 0], [31, 1], [67, 0], [49, 1], [6, 1], [7, 1], [4, 1], [24, 1], [16, 1], [3, 1], [3, 0], [3, 1], [1, 1], [8, 0], [27, 0], [1, 1], [3, 1], [3, 1], [7, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [2, 1], [3, 1], [9, 0], [1, 1], [15, 1], [1, 1], [3, 1], [12, 0], [4, 0], [3, 1], [3, 1], [7, 0], [10, 1], [1, 1], [3, 1], [1, 1], [3, 1], [7, 1], [9, 1], [5, 1], [2, 1], [14, 1], [17, 0], [3, 1], [15, 1], [18, 1], [11, 0], [24, 1], [10, 1], [13, 1], [6, 1], [8, 1], [14, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [10, 1], [1, 1], [1, 1], [2, 1], [1, 1], [24, 1], [3, 1], [12, 1], [4, 1], [6, 1], [24, 1], [22, 1], [9, 1], [14, 0], [27, 1], [4, 0], [6, 1], [3, 1], [8, 1], [27, 1], [3, 1], [3, 1], [1, 1], [3, 1], [2, 1], [3, 1], [10, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [6, 1], [1, 1], [1, 1], [8, 1], [6, 1], [6, 1], [5, 1], [2, 1], [9, 1], [3, 1], [1, 1], [4, 1], [4, 1], [3, 1], [4, 1], [10, 1], [1, 1], [10, 1], [2, 1], [1, 1], [5, 1], [7, 1], [5, 1], [5, 1], [2, 1], [6, 1], [7, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [6, 1], [1, 1], [4, 1], [1, 1], [1, 1], [6, 1], [5, 1], [3, 1], [9, 1], [1, 1], [1, 1], [2, 1], [5, 1], [3, 1], [4, 1], [7, 1], [7, 1], [3, 1], [3, 1], [6, 1], [8, 1], [1, 0], [8, 0], [3, 1], [8, 1], [2, 1], [7, 1], [12, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 0], [1, 1], [1, 1], [5, 1], [10, 1], [10, 1], [27, 1], [4, 1], [2, 1], [2, 1], [2, 1], [4, 1], [1, 1], [3, 1], [2, 1], [4, 1], [3, 1], [1, 1], [4, 1], [6, 1], [10, 1], [23, 1], [1, 1], [12, 1], [3, 1], [23, 1], [5, 1], [6, 1], [14, 1], [16, 1], [15, 1], [13, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [3, 1], [5, 1], [3, 1], [2, 1], [1, 1], [7, 1], [6, 1], [1, 1], [7, 1], [48, 1], [1, 1], [9, 1], [3, 1], [3, 1], [3, 1], [2, 1], [10, 1], [3, 1], [6, 1], [1, 1], [6, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [5, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [14, 1], [1, 1], [3, 1], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [5, 1], [2, 1], [1, 1], [3, 1], [1, 1], [6, 1], [4, 1], [1, 1], [11, 1], [12, 1], [6, 1], [3, 1], [2, 1], [3, 0], [1, 1], [7, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [5, 1], [2, 1], [3, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [13, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [12, 0], [14, 0], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 0], [3, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [2, 1], [3, 1], [2, 1], [3, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [3, 1], [1, 1], [3, 1], [3, 1], [3, 1], [1, 1], [13, 1], [1, 1], [7, 1], [10, 1], [6, 1], [2, 1], [2, 1], [2, 1], [1, 1], [2, 1], [2, 1], [2, 1], [2, 1], [1, 1], [4, 1], [2, 1], [6, 1], [2, 1], [4, 1], [4, 1], [15, 1], [6, 1], [3, 1], [5, 0], [13, 1], [5, 1], [22, 0], [1, 1], [7, 1], [2, 1], [3, 1], [3, 1], [9, 1], [5, 1], [4, 0], [2, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 1], [3, 1], [2, 1], [3, 1], [4, 1], [1, 1], [3, 1], [1, 1], [1, 1], [4, 1], [6, 1], [2, 1], [9, 1], [4, 1], [22, 1], [2, 1], [4, 0], [1, 1], [3, 0], [2, 1], [1, 1], [2, 1], [6, 1], [4, 1], [15, 0], [3, 1], [2, 1], [2, 1], [3, 1], [8, 1], [4, 1], [3, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [6, 0], [1, 1], [1, 1], [7, 1], [1, 1], [2, 1], [2, 1], [4, 1], [1, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [12, 1], [1, 1], [10, 1], [3, 1], [9, 1], [13, 0], [6, 1], [8, 1], [5, 1], [1, 1], [7, 1], [4, 1], [1, 1], [3, 1], [4, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [4, 1], [1, 1], [9, 1], [2, 1], [2, 1], [7, 1], [58, 0], [2, 1], [11, 1], [1, 1], [4, 1], [1, 1], [4, 1], [10, 1], [4, 1], [28, 1], [3, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 0], [1, 1], [1, 1], [1, 1], [1, 1], [6, 0], [5, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [4, 1], [4, 1], [5, 1], [17, 1], [1, 1], [3, 1], [9, 1], [5, 1], [1, 1], [7, 1], [4, 1], [4, 1], [5, 1], [16, 1], [30, 1], [9, 1], [7, 1], [40, 1], [9, 1], [6, 1], [4, 1], [34, 1], [16, 1], [6, 1], [16, 1], [14, 1], [21, 1], [26, 1], [7, 1], [19, 0], [18, 1], [9, 1], [23, 0], [24, 0], [23, 1], [1, 1], [4, 1], [6, 1], [1, 1], [3, 1], [1, 1], [11, 0], [10, 0], [3, 0], [2, 0], [12, 0], [1, 0], [7, 0], [1, 0], [1, 0], [2, 0], [3, 0], [1, 0], [7, 0], [1, 0], [1, 0], [2, 1], [1, 1], [1, 1], [1, 1], [9, 1], [3, 1], [3, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [1, 1], [1, 1], [2, 1], [1, 0], [5, 0], [7, 1], [2, 0], [4, 0], [5, 0], [2, 1], [3, 0], [4, 1], [7, 1], [3, 0], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [5, 1], [2, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [3, 1], [2, 1], [2, 1], [2, 1], [2, 1], [1, 1], [8, 1], [19, 0], [2, 1], [34, 0], [4, 1], [12, 0], [3, 1], [2, 1], [11, 1], [8, 0], [2, 0], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [15, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [8, 0], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 0], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [3, 1], [3, 1], [2, 1], [1, 1], [3, 1], [1, 1], [2, 1], [2, 1], [2, 1], [2, 1], [11, 1], [9, 0], [16, 1], [5, 1], [17, 1], [23, 1], [7, 1], [24, 1], [9, 1], [7, 1], [2, 1], [6, 1], [21, 1], [5, 1], [21, 1], [2, 1], [2, 1], [4, 1], [2, 1], [1, 1], [4, 1], [3, 1], [1, 1], [2, 1], [9, 1], [1, 1], [1, 1], [6, 1], [1, 1], [2, 1], [5, 0], [8, 0], [6, 1], [2, 1], [7, 0], [3, 0], [5, 1], [12, 0], [7, 0], [4, 0], [2, 0], [4, 0], [1, 1], [7, 0], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [3, 1], [1, 1], [8, 1], [7, 1], [2, 1], [27, 1], [7, 1], [7, 0], [1, 0], [6, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 0], [1, 1], [2, 0], [2, 1], [1, 1], [14, 1], [2, 1], [2, 1], [8, 1], [6, 1], [2, 1], [3, 1], [5, 1], [4, 1], [1, 1], [6, 1], [2, 1], [1, 1], [1, 1], [11, 1], [10, 1], [5, 1], [5, 1], [5, 1], [12, 0], [30, 0], [5, 0], [9, 0], [23, 1], [7, 1], [27, 0], [1, 0], [14, 1], [18, 0], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [5, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [3, 1], [1, 1], [5, 1], [1, 1], [2, 1], [2, 1], [1, 1], [2, 1], [3, 1], [12, 1], [20, 1], [6, 1], [20, 1], [12, 1], [3, 1], [3, 1], [3, 1], [7, 1], [4, 1], [18, 1], [11, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 0], [6, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [3, 1], [3, 1], [1, 1], [1, 1], [12, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [6, 1], [5, 1], [4, 1], [17, 1], [19, 1], [4, 1], [5, 1], [1, 1], [7, 1], [5, 1], [2, 1], [15, 1], [8, 1], [9, 1], [11, 1], [3, 1], [6, 1], [1, 1], [1, 1], [5, 1], [5, 1], [5, 1], [1, 1], [8, 1], [11, 1], [3, 1], [4, 1], [1, 1], [6, 1], [20, 1], [6, 0], [4, 1], [4, 1], [2, 0], [3, 1], [6, 1], [9, 0], [13, 1], [8, 1], [7, 1], [3, 1], [6, 0], [5, 0], [9, 0], [8, 1], [2, 1], [9, 0], [4, 0], [2, 1], [32, 1], [28, 1], [3, 1], [14, 0], [8, 1], [5, 1], [6, 1], [2, 1], [4, 1], [11, 0], [4, 1], [8, 0], [6, 1], [1, 1], [1, 1], [10, 1], [1, 1], [5, 0], [2, 0], [3, 1], [5, 1], [5, 1], [26, 1], [3, 0], [6, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1], [10, 1], [1, 1], [2, 1], [1, 1], [10, 1], [27, 1], [1, 1], [10, 1], [2, 1], [4, 1], [1, 1], [2, 1], [1, 1], [5, 1], [4, 1], [4, 1], [5, 1], [3, 1], [7, 1], [5, 1], [1, 1], [5, 1], [5, 1], [6, 0], [8, 1], [10, 1], [19, 0], [1, 1], [11, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [9, 1], [2, 1], [4, 1], [2, 1], [6, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [3, 1], [15, 1], [2, 1], [2, 1], [4, 1], [10, 1], [13, 1], [6, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 0], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [1, 1], [2, 1], [3, 1], [2, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [2, 1], [5, 1], [8, 1], [5, 1], [28, 1], [1, 1], [5, 1], [3, 1], [1, 1], [3, 1], [3, 1], [22, 1], [1, 1], [19, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [21, 1], [9, 1], [2, 1], [1, 1], [4, 1], [6, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [3, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [16, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [3, 1], [1, 1], [7, 1], [1, 1], [1, 1], [4, 1], [4, 0], [4, 1], [2, 1], [15, 1], [10, 0], [2, 1], [3, 1], [1, 1], [4, 1], [2, 1], [4, 1], [1, 1], [2, 1], [6, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [5, 1], [3, 1], [1, 1], [1, 1], [2, 1], [2, 1], [2, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [2, 1], [2, 1], [9, 1], [19, 1], [4, 1], [26, 1], [8, 1], [6, 1], [1, 1], [7, 1], [29, 1], [17, 1], [11, 1], [14, 1], [1, 1], [2, 0], [1, 1], [1, 0], [1, 1], [3, 1], [1, 1], [1, 1], [5, 1], [6, 1], [2, 1], [3, 1], [11, 1], [2, 1], [3, 1], [6, 1], [7, 1], [7, 1], [2, 1], [4, 1], [4, 1], [6, 1], [10, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [11, 1], [1, 1], [2, 1], [6, 1], [5, 1], [1, 1], [4, 1], [1, 1], [4, 1], [1, 1], [10, 1], [1, 0], [2, 1], [6, 1], [1, 1], [2, 1], [6, 1], [4, 1], [1, 1], [13, 1], [19, 1], [2, 1], [5, 1], [4, 1], [7, 1], [2, 1], [5, 1], [3, 1], [18, 1], [6, 0], [8, 0], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [2, 1], [2, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [5, 1], [2, 1], [18, 1], [14, 1], [2, 1], [2, 1], [6, 1], [7, 1], [3, 1], [3, 1], [2, 1], [8, 1], [2, 1], [2, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [7, 1], [1, 1], [2, 1], [2, 1], [3, 1], [3, 1], [1, 1], [4, 1], [3, 1], [2, 1], [5, 1], [1, 1], [1, 1], [1, 1], [8, 1], [11, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [7, 1], [1, 1], [1, 1], [23, 1], [4, 1], [5, 1], [7, 1], [5, 1], [3, 1], [4, 0], [4, 1], [5, 0], [3, 1], [8, 0], [1, 1], [10, 1], [3, 0], [5, 1], [8, 0], [2, 1], [3, 1], [2, 1], [1, 1], [2, 1], [4, 1], [4, 0], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [2, 1], [2, 1], [1, 0], [1, 1], [1, 1], [2, 0], [2, 0], [2, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [3, 1], [4, 1], [3, 1], [2, 1], [3, 1], [2, 1], [3, 1], [9, 1], [2, 1], [3, 1], [3, 1], [2, 1], [1, 1], [10, 1], [3, 1], [10, 0], [2, 1], [2, 1], [1, 1], [3, 1], [3, 1], [1, 1], [2, 1], [2, 1], [8, 0], [1, 1], [1, 1], [6, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [3, 1], [4, 1], [4, 1], [4, 1], [1, 1], [4, 1], [6, 1], [7, 1], [11, 1], [2, 1], [5, 1], [2, 1], [3, 1], [4, 1], [3, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [16, 1], [3, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [9, 1], [15, 1], [7, 1], [5, 1], [11, 1], [3, 1], [2, 1], [2, 1], [2, 1], [14, 1], [4, 1], [11, 1], [2, 1], [2, 1], [4, 1], [6, 1], [2, 1], [18, 1], [4, 1], [6, 1], [4, 1], [2, 1], [6, 1], [6, 1], [3, 1], [2, 1], [1, 1], [5, 1], [4, 1], [14, 1], [9, 1], [24, 1], [24, 1], [14, 1], [7, 1], [1, 1], [2, 1], [8, 1], [31, 1], [16, 1], [14, 1], [10, 1], [19, 1], [2, 1], [3, 1], [1, 1], [2, 1], [4, 1], [2, 1], [3, 1], [2, 1], [1, 1], [8, 1], [4, 1], [8, 1], [2, 1], [1, 1], [4, 1], [12, 1], [3, 1], [2, 1], [6, 1], [9, 0], [1, 1], [5, 1], [7, 1], [3, 1], [12, 1], [9, 1], [28, 1], [7, 1], [5, 1], [11, 0], [1, 1], [3, 1], [9, 1], [1, 1], [40, 1], [12, 1], [3, 0], [11, 1], [15, 0], [19, 0], [3, 1], [39, 0], [1, 0], [2, 0], [2, 1], [15, 0], [18, 1], [3, 1], [2, 1], [1, 1], [1, 1], [6, 1], [4, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [5, 1], [4, 1], [1, 1], [1, 1], [7, 1], [1, 1], [1, 1], [9, 1], [11, 1], [7, 1], [5, 1], [5, 1], [4, 1], [4, 1], [2, 1], [2, 1], [1, 1], [11, 1], [7, 1], [4, 1], [5, 1], [6, 1], [3, 1], [1, 1], [2, 1], [1, 1], [3, 1], [2, 1], [3, 1], [21, 1], [8, 1], [5, 1], [24, 1], [4, 1], [13, 1], [7, 1], [3, 1], [3, 1], [4, 1], [8, 1], [3, 1], [10, 1], [2, 1], [4, 1], [5, 1], [2, 1], [4, 1], [22, 0], [4, 1], [1, 1], [2, 1], [3, 1], [3, 1], [1, 1], [1, 1], [1, 1], [10, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [1, 1], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [2, 1], [1, 1], [2, 1], [2, 1], [5, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [4, 1], [3, 1], [6, 1], [3, 1], [5, 1], [7, 1], [4, 1], [4, 1], [16, 1], [8, 1], [3, 1], [7, 1], [7, 1], [6, 1], [12, 1], [1, 1], [4, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [8, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [4, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [35, 1], [7, 1], [1, 1], [1, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 1], [14, 1], [4, 1], [3, 1], [5, 1], [4, 1], [4, 1], [3, 1], [2, 1], [4, 1], [2, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [4, 1], [4, 1], [18, 1], [10, 1], [6, 1], [4, 1], [7, 1], [2, 1], [2, 1], [8, 1], [11, 1], [9, 1], [11, 1], [9, 1], [5, 1], [2, 1], [4, 1], [1, 1], [1, 1], [2, 1], [11, 1], [3, 1], [12, 1], [8, 1], [14, 1], [6, 1], [32, 1], [15, 1], [9, 1], [19, 1], [6, 1], [8, 0], [23, 1], [33, 1], [6, 1], [2, 1], [1, 1], [3, 1], [1, 1], [4, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [9, 1], [8, 0], [11, 0], [4, 0], [5, 1], [8, 1], [11, 1], [5, 0], [8, 1], [6, 1], [1, 0], [3, 0], [1, 0], [4, 0], [1, 0], [2, 0], [1, 0], [5, 0], [4, 0], [1, 0], [11, 0], [1, 0], [3, 1], [10, 1], [10, 1], [7, 1], [3, 1], [4, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [2, 1], [4, 1], [3, 1], [7, 1], [9, 1], [1, 1], [8, 1], [3, 1], [1, 1], [5, 1], [2, 1], [8, 1], [1, 1], [3, 1], [3, 1], [3, 1], [2, 1], [1, 1], [2, 1], [1, 1], [9, 1], [5, 0], [9, 1], [6, 1], [7, 0], [7, 1], [1, 0], [10, 0], [6, 0], [3, 1], [21, 0], [1, 1], [4, 0], [1, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [4, 1], [2, 1], [1, 1], [1, 1], [3, 1], [3, 1], [3, 1], [3, 1], [1, 1], [1, 1], [3, 1], [5, 1], [1, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [3, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [4, 1], [2, 1], [2, 1], [2, 1], [2, 1], [2, 1], [4, 1], [3, 1], [3, 1], [8, 1], [3, 1], [5, 1], [3, 1], [4, 1], [11, 1], [7, 1], [2, 1], [5, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [7, 1], [1, 1], [2, 1], [9, 1], [6, 1], [2, 1], [5, 1], [16, 1], [2, 1], [3, 1], [8, 1], [4, 1], [2, 1], [5, 1], [1, 1], [9, 1], [17, 1], [11, 1], [7, 1], [5, 1], [32, 0], [8, 0], [4, 0], [6, 0], [2, 1], [2, 0], [2, 1], [1, 0], [1, 0], [7, 0], [13, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [7, 1], [3, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [6, 1], [3, 1], [1, 1], [4, 1], [7, 1], [2, 1], [1, 1], [2, 1], [2, 1], [1, 1], [2, 1], [3, 1], [1, 1], [1, 1], [1, 1], [1, 1], [12, 1], [5, 1], [2, 1], [14, 1], [10, 1], [14, 1], [5, 1], [1, 1], [5, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 0], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [2, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 0], [7, 1], [2, 1], [16, 0], [16, 1], [2, 0], [1, 1], [2, 1], [4, 1], [1, 1], [9, 1], [1, 1], [1, 1], [9, 1], [1, 1], [10, 1], [2, 1], [10, 1], [22, 0], [15, 1], [10, 0], [6, 0], [4, 1], [4, 0], [6, 1], [7, 1], [4, 0], [4, 0], [17, 0], [1, 1], [1, 1], [1, 1], [1, 1], [9, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [4, 0], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [1, 1], [3, 1], [2, 1], [7, 1], [1, 1], [3, 1], [28, 1], [2, 1], [2, 1], [17, 1], [1, 1], [1, 1], [4, 1], [1, 1], [3, 1], [2, 1], [1, 1], [1, 1], [1, 1], [3, 1], [1, 1]]\n" + ] + } + ], + "source": [ + "#cargando conjunto de datos\n", + "archivoF19Early=open(\"data/F19/Test/early.csv\")\n", + "\n", + "archivoF19Early.readline()\n", + "\n", + "datos_x_test=[]\n", + "datos_y_test=[]\n", + "\n", + "\n", + "for linea in archivoF19Early:\n", + " \n", + " linea=linea.strip(\"\\n\").split(\",\")\n", + " \n", + " subjectID=linea[0]\n", + " assignmentID=linea[1]\n", + " problemID=linea[2]\n", + " attempts=int(linea[3])\n", + " correctEventually=linea[4]\n", + " label=linea[5]\n", + " \n", + " if(label==\"True\"):\n", + " label=1\n", + " else:\n", + " label=0\n", + " \n", + " if(correctEventually==\"True\"):\n", + " correctEventually=1\n", + " else:\n", + " correctEventually=0\n", + " \n", + " fila=[attempts,correctEventually]\n", + " \n", + " datos_x_test.append(fila)\n", + " \n", + " datos_y_test.append(label)\n", + "\n", + "archivoF19Early.close()\n", + "print(datos_x)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "c8363b0a", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\Oculus\\miniconda3\\lib\\site-packages\\sklearn\\base.py:561: FutureWarning: Arrays of bytes/strings is being converted to decimal numbers if dtype='numeric'. This behavior is deprecated in 0.24 and will be removed in 1.1 (renaming of 0.26). Please convert your data to numeric values explicitly instead.\n", + " X = check_array(X, **check_params)\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Unable to convert array of bytes/strings into decimal numbers with dtype='numeric'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)\u001b[0m\n\u001b[0;32m 778\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 779\u001b[1;33m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 780\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'True'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_14864/561092016.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprediction_y\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mregresionLogistica\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdatos_x_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\sklearn\\linear_model\\_base.py\u001b[0m in \u001b[0;36mpredict\u001b[1;34m(self, X)\u001b[0m\n\u001b[0;32m 423\u001b[0m \u001b[0mPredicted\u001b[0m \u001b[1;32mclass\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0mper\u001b[0m \u001b[0msample\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 424\u001b[0m \"\"\"\n\u001b[1;32m--> 425\u001b[1;33m \u001b[0mscores\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdecision_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 426\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mscores\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 427\u001b[0m \u001b[0mindices\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mscores\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\sklearn\\linear_model\\_base.py\u001b[0m in \u001b[0;36mdecision_function\u001b[1;34m(self, X)\u001b[0m\n\u001b[0;32m 405\u001b[0m \u001b[0mcheck_is_fitted\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 406\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 407\u001b[1;33m \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"csr\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreset\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 408\u001b[0m \u001b[0mscores\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msafe_sparse_dot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mT\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdense_output\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mintercept_\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 409\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mscores\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mscores\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mscores\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\sklearn\\base.py\u001b[0m in \u001b[0;36m_validate_data\u001b[1;34m(self, X, y, reset, validate_separately, **check_params)\u001b[0m\n\u001b[0;32m 559\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Validation should be done on X, y or both.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 560\u001b[0m \u001b[1;32melif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mno_val_X\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mno_val_y\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 561\u001b[1;33m \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mcheck_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 562\u001b[0m \u001b[0mout\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 563\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mno_val_X\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mno_val_y\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)\u001b[0m\n\u001b[0;32m 779\u001b[0m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 780\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 781\u001b[1;33m raise ValueError(\n\u001b[0m\u001b[0;32m 782\u001b[0m \u001b[1;34m\"Unable to convert array of bytes/strings \"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 783\u001b[0m \u001b[1;34m\"into decimal numbers with dtype='numeric'\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mValueError\u001b[0m: Unable to convert array of bytes/strings into decimal numbers with dtype='numeric'" + ] + } + ], + "source": [ + "prediction_y=regresionLogistica.predict(datos_x_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "6dbe6f23", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1018 2543]\n", + " [ 0 0]]\n" + ] + } + ], + "source": [ + "#verificando la matriz de confusión\n", + "from sklearn.metrics import confusion_matrix\n", + "matriz=confusion_matrix(datos_y_test,prediction_y)\n", + "print(matriz)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a9d552f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/naive_model.ipynb b/naive_model.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..f31f66b7ebc9c1abb88020a0aec7be0aa97294d8 --- /dev/null +++ b/naive_model.ipynb @@ -0,0 +1,1056 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from ProgSnap2 import ProgSnap2Dataset\n", + "from ProgSnap2 import PS2\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "import numpy as np\n", + "import os\n", + "from os import path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "semester = 'S19'\n", + "BASE_PATH = os.path.join('data', 'Release', semester)\n", + "TRAIN_PATH = os.path.join(BASE_PATH, 'Train')\n", + "TEST_PATH = os.path.join(BASE_PATH, 'Test')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "train_ps2 = ProgSnap2Dataset(os.path.join(TRAIN_PATH, 'Data')) " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Attempts</th>\n", + " <th>CorrectEventually</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>5</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>12</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Attempts \\\n", + "0 04c32d4d95425f73b3a1d6502aed4d48 439.0 1 1 \n", + "1 04c32d4d95425f73b3a1d6502aed4d48 439.0 3 2 \n", + "2 04c32d4d95425f73b3a1d6502aed4d48 439.0 5 3 \n", + "3 04c32d4d95425f73b3a1d6502aed4d48 439.0 12 1 \n", + "4 04c32d4d95425f73b3a1d6502aed4d48 439.0 13 2 \n", + "\n", + " CorrectEventually Label \n", + "0 True True \n", + "1 True True \n", + "2 True True \n", + "3 True True \n", + "4 True True " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The early dataset will help us to feature extraction,\n", + "# but we're not actually predicting anything here\n", + "# Note: we could still use this for model training if desired.\n", + "early_train = pd.read_csv(os.path.join(TRAIN_PATH, 'early.csv'))\n", + "early_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>41</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>43</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>44</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>46</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>49</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Label\n", + "0 04c32d4d95425f73b3a1d6502aed4d48 494.0 41 False\n", + "1 04c32d4d95425f73b3a1d6502aed4d48 494.0 43 True\n", + "2 04c32d4d95425f73b3a1d6502aed4d48 494.0 44 True\n", + "3 04c32d4d95425f73b3a1d6502aed4d48 494.0 46 True\n", + "4 04c32d4d95425f73b3a1d6502aed4d48 494.0 49 True" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# The late dataset contains the problems that we're actually predicting for.\n", + "# The training portion of it includes labels.\n", + "late_train = pd.read_csv(os.path.join(TRAIN_PATH, 'late.csv'))\n", + "late_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_base = late_train.copy().drop('Label', axis=1)\n", + "y_train = late_train['Label'].values" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "problem_encoder = OneHotEncoder().fit(X_train_base[PS2.ProblemID].values.reshape(-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0., ..., 0., 0., 0.],\n", + " [0., 1., 0., ..., 0., 0., 0.],\n", + " [0., 0., 1., ..., 0., 0., 0.],\n", + " ...,\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 1., 0.],\n", + " [0., 0., 0., ..., 0., 0., 1.]])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "problem_encoder.transform(X_train_base[PS2.ProblemID].values.reshape(-1, 1)).toarray()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Task 1\n", + "\n", + "In this task, we do per-problem prediction, extracting features from performance on the 30 early problems for a given student to predict performance on each of 20 later problems. Our model should, in effect, learn the releationship between the knowledge practiced in these problems (though our naive example here won't get that far)." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_instance_features(instance, early_df):\n", + " instance = instance.copy()\n", + " subject_id = instance[PS2.SubjectID]\n", + " early_problems = early_df[early_df[PS2.SubjectID] == subject_id]\n", + " # Extract very naive features about the student\n", + " # (without respect to the problem bring predicted)\n", + " # Number of early problems attempted\n", + " instance['ProblemsAttempted'] = early_problems.shape[0]\n", + " # Percentage of early problems gotten correct eventually\n", + " instance['PercCorrectEventually'] = np.mean(early_problems['CorrectEventually'])\n", + " # Median attempts made on early problems\n", + " instance['MedAttempts'] = np.median(early_problems['Attempts'])\n", + " # Max attempts made on early problems\n", + " instance['MaxAttempts'] = np.max(early_problems['Attempts'])\n", + " # Percentage of problems gotten correct on the first try\n", + " instance['PercCorrectFirstTry'] = np.mean(early_problems['Attempts'] == 1)\n", + " #instance = instance.drop('AssignmentID')\n", + " #instance = instance.drop('ProblemID')\n", + " instance = instance.drop('SubjectID')\n", + " return instance" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "SubjectID 04c32d4d95425f73b3a1d6502aed4d48\n", + "AssignmentID 494.0\n", + "ProblemID 41\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train_base.iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "AssignmentID 494.0\n", + "ProblemID 41\n", + "ProblemsAttempted 30\n", + "PercCorrectEventually 1.0\n", + "MedAttempts 6.5\n", + "MaxAttempts 45\n", + "PercCorrectFirstTry 0.166667\n", + "Name: 0, dtype: object" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "extract_instance_features(X_train_base.iloc[0], early_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_features(X, early_df, scaler, is_train):\n", + " # First extract performance features for each row\n", + " features = X.apply(lambda instance: extract_instance_features(instance, early_df), axis=1)\n", + " # Then one-hot encode the problem_id and append it\n", + " problem_ids = problem_encoder.transform(features[PS2.ProblemID].values.reshape(-1, 1)).toarray()\n", + " # Then get rid of nominal features\n", + " features.drop([PS2.AssignmentID, PS2.ProblemID], axis=1, inplace=True)\n", + " # Then scale the continuous features, fitting the scaler if this is training\n", + " if is_train:\n", + " scaler.fit(features)\n", + " features = scaler.transform(features)\n", + " \n", + " # Return continuous and one-hot features together\n", + " return np.concatenate([features, problem_ids], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "scaler = StandardScaler()\n", + "X_train = extract_features(X_train_base, early_train, scaler, True)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4201, 25)\n" + ] + }, + { + "data": { + "text/plain": [ + "array([[ 0.51751812, 0.58371895, 1.76922077, 1.70602676, -0.89569333,\n", + " 1. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ,\n", + " 0. , 0. , 0. , 0. , 0. ]])" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(X_train.shape)\n", + "X_train[:1,]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate the Training Performance of the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4201,)\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegressionCV\n", + "\n", + "model = LogisticRegressionCV()\n", + "model.fit(X_train, y_train) #entrenamos el modelo\n", + "train_predictions = model.predict(X_train) #testeamos el modelo\n", + "print(train_predictions.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'y_train' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_16304/2480459126.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmetrics\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mf1_score\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mclassification_report\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_predictions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'AUC: '\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mroc_auc_score\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_predictions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Macro F1: '\u001b[0m \u001b[1;33m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mf1_score\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtrain_predictions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maverage\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'macro'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mNameError\u001b[0m: name 'y_train' is not defined" + ] + } + ], + "source": [ + "from sklearn.metrics import classification_report\n", + "from sklearn.metrics import roc_auc_score\n", + "from sklearn.metrics import f1_score\n", + "\n", + "print(classification_report(y_train, train_predictions))\n", + "print('AUC: ' + str(roc_auc_score(y_train, train_predictions)))\n", + "print('Macro F1: ' + str(f1_score(y_train, train_predictions, average='macro')))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from sklearn.metrics import plot_roc_curve\n", + "\n", + "plot_roc_curve(model, X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate the CV Performance of the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_validate\n", + "\n", + "model = LogisticRegressionCV()\n", + "cv_results = cross_validate(model, X_train, y_train, cv=10, scoring=['accuracy', 'f1_macro', 'roc_auc'])\n", + "print(f'Accuracy: {np.mean(cv_results[\"test_accuracy\"])}')\n", + "print(f'AUC: {np.mean(cv_results[\"test_roc_auc\"])}')\n", + "print(f'Macro F1: {np.mean(cv_results[\"test_f1_macro\"])}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predict on the test data (S19)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "early_test = pd.read_csv(os.path.join(TEST_PATH, 'early.csv'))\n", + "late_test = pd.read_csv(os.path.join(TEST_PATH, 'late.csv'))\n", + "X_test = extract_features(late_test, early_test, scaler, False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When writing output to be judged, make _sure_ to **output probabilities** for the positive class, so that we can calculate AUC when judging!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticRegressionCV()\n", + "model.fit(X_train, y_train)\n", + "# Note the use of predict_proba (the [:,1] gets the positive probabilities)\n", + "predictions = model.predict_proba(X_test)[:,1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "predictions_df = late_test.copy()\n", + "predictions_df['Label'] = predictions\n", + "predictions_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We don't have the test labels - you have to submit to evaluate it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We use res/predictions.csv, since that's where the scoring program expects it\n", + "# but you can change this directory\n", + "path = os.path.join('data', 'Prediction', semester, 'basic_LR_task1', 'res')\n", + "os.makedirs(path, exist_ok=True)\n", + "predictions_df.to_csv(os.path.join(path, 'predictions.csv'), index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predict on the test data for the _next_ semester (F19)\n", + "\n", + "Here we see if our model will still be useful next semester. Again, we don't have labels for this test dataset, so we'll have to submit to see how well we did.\n", + "\n", + "Later in the year, there will be _training_ data released for F19, which will be tested on a different track (not cross-semester)." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "F19_TEST_PATH = os.path.join('data', 'Release', 'F19', 'Test')" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "early_test = pd.read_csv(os.path.join(F19_TEST_PATH, 'early.csv'))\n", + "late_test = pd.read_csv(os.path.join(F19_TEST_PATH, 'late.csv'))\n", + "X_test = extract_features(late_test, early_test, scaler, False)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2365, 25)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogisticRegressionCV()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict_proba(X_test)[:,1]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.81344772, 0.74948573, 0.87591233, ..., 0.40992631, 0.37947527,\n", + " 0.35654374])" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "predictions_df = late_test.copy()\n", + "predictions_df['Label'] = predictions\n", + "predictions_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We don't have the test labels - you have to submit to evaluate it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "path = os.path.join('data', 'Prediction', 'F19', 'basic_LR_task1', 'res')\n", + "os.makedirs(path, exist_ok=True)\n", + "predictions_df.to_csv(os.path.join(path, 'predictions.csv'), index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Task 2\n", + "\n", + "In this task, we are predicting final performance, extracting features from performance on the 30 early problems for a given student to predict the final exam grade." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "train_base = train_ps2.load_link_table('Subject')\n", + "train_base" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_base = late_train.groupby('SubjectID')['Label'].sum().to_frame('X-Grade').reset_index()\n", + "train_base['X-Grade'] = train_base['X-Grade'] / 20\n", + "train_base" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_train = train_base['X-Grade']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The base of our X dataset is just a set of SubjectIDs\n", + "X_train_base = train_base.drop('X-Grade', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from sklearn.preprocessing import StandardScaler\n", + "X_train_unscaled = X_train_base.apply(lambda row: extract_instance_features(row, early_train), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "X_train_unscaled" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "scaler = StandardScaler()\n", + "X_train = scaler.fit_transform(X_train_unscaled)\n", + "X_train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_unscaled.join(train_base).drop(PS2.SubjectID, axis=1).corr()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate the Training Performance of the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LassoCV\n", + "from sklearn.linear_model import LinearRegression\n", + "\n", + "model = LassoCV()\n", + "model.fit(X_train, y_train)\n", + "train_predictions = model.predict(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.scatter(y_train, train_predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.metrics import r2_score\n", + "\n", + "print('Training MSE: ' + str(mean_squared_error(y_train, train_predictions)))\n", + "print('Training R2: ' + str(r2_score(y_train, train_predictions)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate the CV Performance of the Model\n", + "\n", + "The naive model performs quite poorly evaluated by CV on the training data (R2 of 0.083). Can you improve on it?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_validate\n", + "\n", + "model = LassoCV()\n", + "cv_results = cross_validate(model, X_train, y_train, cv=10, scoring=['neg_mean_squared_error', 'r2'])\n", + "print(f'MSE: {-np.mean(cv_results[\"test_neg_mean_squared_error\"])}')\n", + "print(f'R2: {np.mean(cv_results[\"test_r2\"])}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Predict on the test data (S19)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": false + }, + "outputs": [], + "source": [ + "test_ps2 = ProgSnap2Dataset(os.path.join(TEST_PATH, 'Data'))\n", + "early_test = pd.read_csv(os.path.join(TEST_PATH, 'early.csv'))\n", + "late_test = pd.read_csv(os.path.join(TEST_PATH, 'late.csv'))\n", + "\n", + "# The Subject.csv link table is just the SubjectIDs to be predicted\n", + "X_test_base = test_ps2.load_link_table('Subject')\n", + "X_test_unscaled = X_test_base.apply(lambda row: extract_instance_features(row, early_test), axis=1)\n", + "X_test_unscaled" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "X_test = scaler.transform(X_test_unscaled)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "When writing output to be judged, make _sure_ to **output probabilities** for the positive class, so that we can calculate AUC when judging!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = LassoCV()\n", + "model.fit(X_train, y_train)\n", + "predictions = model.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "predictions_df = X_test_base.copy()\n", + "predictions_df['X-Grade'] = predictions\n", + "predictions_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We don't have the test labels - you have to submit to evaluate it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# We use res/predictions.csv, since that's where the scoring rogram expects it\n", + "# but you can change this directory\n", + "path = os.path.join('data', 'Prediction', semester, 'basic_LR_task2', 'res')\n", + "os.makedirs(path, exist_ok=True)\n", + "predictions_df.to_csv(os.path.join(path, 'predictions.csv'), index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/preprocess.ipynb b/preprocess.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0181048a9ef1073efa9efc3d8485a856d4381345 --- /dev/null +++ b/preprocess.ipynb @@ -0,0 +1,2921 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from ProgSnap2 import ProgSnap2Dataset\n", + "from ProgSnap2 import PS2\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import train_test_split\n", + "import numpy as np\n", + "import os\n", + "from os import path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading and Cleaning Data\n", + "We load our data using the ProgSnap2Dataset class. This comes with both a main event table and a LinkTable giving students final exam data." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "semester = 'S19'\n", + "PATH = \"data/CodeWorkout/\" + semester+\"/Train\"+\"/Data\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "data = ProgSnap2Dataset(PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"['Attempt'] not found in axis\"", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_12600/3379611682.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Drop the attempt column, since it's calculated incorrectly\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop_main_table_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Attempt'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;32m~\\Documents\\Ing. Juan Vera\\retoIA\\proyectos\\CSEDMDataChallenge\\ProgSnap2.py\u001b[0m in \u001b[0;36mdrop_main_table_column\u001b[1;34m(self, column)\u001b[0m\n\u001b[0;32m 130\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mdrop_main_table_column\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_main_table\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 132\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmain_table\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 133\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 134\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0msave_subset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmain_table_filterer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy_link_tables\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\util\\_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 309\u001b[0m \u001b[0mstacklevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstacklevel\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 310\u001b[0m )\n\u001b[1;32m--> 311\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 312\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 313\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m 4904\u001b[0m \u001b[0mweight\u001b[0m \u001b[1;36m1.0\u001b[0m \u001b[1;36m0.8\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4905\u001b[0m \"\"\"\n\u001b[1;32m-> 4906\u001b[1;33m return super().drop(\n\u001b[0m\u001b[0;32m 4907\u001b[0m \u001b[0mlabels\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4908\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[0;32m 4148\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4149\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4150\u001b[1;33m \u001b[0mobj\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4151\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4152\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[1;34m(self, labels, axis, level, errors)\u001b[0m\n\u001b[0;32m 4183\u001b[0m \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4184\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4185\u001b[1;33m \u001b[0mnew_axis\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4186\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4187\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mdrop\u001b[1;34m(self, labels, errors)\u001b[0m\n\u001b[0;32m 6015\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0many\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6016\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;34m\"ignore\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 6017\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"{labels[mask]} not found in axis\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 6018\u001b[0m \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6019\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mKeyError\u001b[0m: \"['Attempt'] not found in axis\"" + ] + } + ], + "source": [ + "# Drop the attempt column, since it's calculated incorrectly\n", + "data.drop_main_table_column('Attempt')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "main_table = data.get_main_table()\n", + "student_table = data.load_link_table('Subject')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Order</th>\n", + " <th>SubjectID</th>\n", + " <th>ToolInstances</th>\n", + " <th>ServerTimestamp</th>\n", + " <th>ServerTimezone</th>\n", + " <th>CourseID</th>\n", + " <th>CourseSectionID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>CodeStateID</th>\n", + " <th>IsEventOrderingConsistent</th>\n", + " <th>EventType</th>\n", + " <th>Score</th>\n", + " <th>Compile.Result</th>\n", + " <th>CompileMessageType</th>\n", + " <th>CompileMessageData</th>\n", + " <th>EventID</th>\n", + " <th>ParentEventID</th>\n", + " <th>SourceLocation</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>119441</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>Java 8; CodeWorkout</td>\n", + " <td>2019-02-23T22:44:51</td>\n", + " <td>UTC</td>\n", + " <td>CS 1</td>\n", + " <td>2</td>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>4531059d41ba170482b4e43d4d94d857c0e45dbb</td>\n", + " <td>True</td>\n", + " <td>Run.Program</td>\n", + " <td>1.0000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>1-68976</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>119442</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>Java 8; CodeWorkout</td>\n", + " <td>2019-02-23T22:44:51</td>\n", + " <td>UTC</td>\n", + " <td>CS 1</td>\n", + " <td>2</td>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>4531059d41ba170482b4e43d4d94d857c0e45dbb</td>\n", + " <td>True</td>\n", + " <td>Compile</td>\n", + " <td>NaN</td>\n", + " <td>Success</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>1-68977</td>\n", + " <td>1-68976</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>134115</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>Java 8; CodeWorkout</td>\n", + " <td>2019-02-23T22:49:34</td>\n", + " <td>0</td>\n", + " <td>CS 1</td>\n", + " <td>2</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>69089e4182ecddd4b48c39c86c8ae2edb337b07c</td>\n", + " <td>True</td>\n", + " <td>Run.Program</td>\n", + " <td>0.8125</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>3-67872</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>134116</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>Java 8; CodeWorkout</td>\n", + " <td>2019-02-23T22:49:34</td>\n", + " <td>0</td>\n", + " <td>CS 1</td>\n", + " <td>2</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>69089e4182ecddd4b48c39c86c8ae2edb337b07c</td>\n", + " <td>True</td>\n", + " <td>Compile</td>\n", + " <td>NaN</td>\n", + " <td>Success</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>3-67873</td>\n", + " <td>3-67872</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>134117</td>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>Java 8; CodeWorkout</td>\n", + " <td>2019-02-23T22:50:47</td>\n", + " <td>0</td>\n", + " <td>CS 1</td>\n", + " <td>2</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>d565ccacd2e63b9414077ff2b4888622e37b80c6</td>\n", + " <td>True</td>\n", + " <td>Run.Program</td>\n", + " <td>1.0000</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>3-67874</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Order SubjectID ToolInstances \\\n", + "0 119441 04c32d4d95425f73b3a1d6502aed4d48 Java 8; CodeWorkout \n", + "1 119442 04c32d4d95425f73b3a1d6502aed4d48 Java 8; CodeWorkout \n", + "2 134115 04c32d4d95425f73b3a1d6502aed4d48 Java 8; CodeWorkout \n", + "3 134116 04c32d4d95425f73b3a1d6502aed4d48 Java 8; CodeWorkout \n", + "4 134117 04c32d4d95425f73b3a1d6502aed4d48 Java 8; CodeWorkout \n", + "\n", + " ServerTimestamp ServerTimezone CourseID CourseSectionID AssignmentID \\\n", + "0 2019-02-23T22:44:51 UTC CS 1 2 439.0 \n", + "1 2019-02-23T22:44:51 UTC CS 1 2 439.0 \n", + "2 2019-02-23T22:49:34 0 CS 1 2 439.0 \n", + "3 2019-02-23T22:49:34 0 CS 1 2 439.0 \n", + "4 2019-02-23T22:50:47 0 CS 1 2 439.0 \n", + "\n", + " ProblemID CodeStateID \\\n", + "0 1 4531059d41ba170482b4e43d4d94d857c0e45dbb \n", + "1 1 4531059d41ba170482b4e43d4d94d857c0e45dbb \n", + "2 3 69089e4182ecddd4b48c39c86c8ae2edb337b07c \n", + "3 3 69089e4182ecddd4b48c39c86c8ae2edb337b07c \n", + "4 3 d565ccacd2e63b9414077ff2b4888622e37b80c6 \n", + "\n", + " IsEventOrderingConsistent EventType Score Compile.Result \\\n", + "0 True Run.Program 1.0000 NaN \n", + "1 True Compile NaN Success \n", + "2 True Run.Program 0.8125 NaN \n", + "3 True Compile NaN Success \n", + "4 True Run.Program 1.0000 NaN \n", + "\n", + " CompileMessageType CompileMessageData EventID ParentEventID SourceLocation \n", + "0 NaN NaN 1-68976 NaN NaN \n", + "1 NaN NaN 1-68977 1-68976 NaN \n", + "2 NaN NaN 3-67872 NaN NaN \n", + "3 NaN NaN 3-67873 3-67872 NaN \n", + "4 NaN NaN 3-67874 NaN NaN " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "main_table.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fall 2019 Preprocessing\n", + "\n", + "There were some differences between F19 and S19:\n", + "* In F19 there was an additional assignment (between Assignment 4 and 5), which only ~70% of students completed, likely additional optional practice. We will not use this assignment for prediction, since it is abnormal and not in S19. Since it comes in between the two assignments we are using for prediction, we simply remove it.\n", + "* In F19 the AssignmentIDs were renamed, so we will update their names\n", + "* In F19 2 ProblemIDs were renamed (though the solutions were unchanged), so we will update their names" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5\n", + "50\n" + ] + } + ], + "source": [ + "print(len(main_table[PS2.AssignmentID].unique()))\n", + "print(len(main_table[PS2.ProblemID].unique()))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# This assignment has no analogue, but we use 500 to put it between the other 2\n", + "NEW_F19_ASSIGNMENT = 500" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "if semester == 'F19':\n", + " assignment_map = {\n", + " 597: 439,\n", + " 600: 487,\n", + " 609: 492,\n", + " 615: 494,\n", + " 622: NEW_F19_ASSIGNMENT,\n", + " 631: 502,\n", + " }\n", + " print(np.mean(main_table[PS2.AssignmentID].isin(assignment_map)))\n", + " main_table[PS2.AssignmentID] = main_table[PS2.AssignmentID].map(assignment_map)\n", + " \n", + " # Two problems were renamed but are equivalent\n", + " problem_map = {problem_id: problem_id for problem_id in main_table[PS2.ProblemID].unique()}\n", + " problem_map[736] = 45\n", + " problem_map[737] = 48\n", + " print(np.mean(main_table[PS2.ProblemID].isin(problem_map)))\n", + " main_table[PS2.ProblemID] = main_table[PS2.ProblemID].map(problem_map, na_action='ignore')\n", + " \n", + " # Overwrite the main table so this is the one that's copied\n", + " data.set_main_table(main_table)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5\n", + "50\n" + ] + } + ], + "source": [ + "print(len(main_table[PS2.AssignmentID].unique()))\n", + "print(len(main_table[PS2.ProblemID].unique()))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 1\n", + "2 3\n", + "3 3\n", + "4 3\n", + " ... \n", + "134503 71\n", + "134504 112\n", + "134505 112\n", + "134506 118\n", + "134507 118\n", + "Name: ProblemID, Length: 134508, dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "main_table[PS2.ProblemID]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Filtering Students\n", + "\n", + "Here we remove studens who did not take the final exam, since we cannot use these for Task 2 (final exam score prediction). While this does somewhat bias the dataset for Task 1, it also ensures a consistent set of training/testing students for both tasks.\n", + "\n", + "We can also see that few students are actually removed this way (381 -> 348 for S19)." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "247\n" + ] + }, + { + "data": { + "text/plain": [ + "246" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get the SubjectIDs where the final grade is non-0\n", + "# A 0 grade indicates the student did not take the final\n", + "print(len(student_table.index))\n", + "subject_ids = set(student_table[student_table['X-Grade'] != 0][PS2.SubjectID].unique())\n", + "subject_ids = subject_ids.intersection(set(student_table['SubjectID'].unique()))\n", + "len(subject_ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### EDA\n", + "\n", + "We want to confirm that our selected students have a good and well-distributed number of attempts at all the problems in the dataset, and the most problems were well-attempted. The stats and figures below suggest that this is the case: most problems are attempted by ~300/350 students, and most students complete ~40/50 problems." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 246.000000\n", + "mean 44.077236\n", + "std 8.137475\n", + "min 13.000000\n", + "25% 40.000000\n", + "50% 49.500000\n", + "75% 50.000000\n", + "max 50.000000\n", + "dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# First we check how many problems each student attempted\n", + "main_table_filtered = main_table[main_table[PS2.SubjectID].isin(subject_ids)]\n", + "problems_per_student = main_table_filtered.groupby(by=['SubjectID']).apply(lambda rows: len(rows[PS2.ProblemID].unique()))\n", + "# 75% of problems were attempted by at least 40 studens, so that's good\n", + "problems_per_student.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Only 3 attempted fewer than 10 problems\n", + "sum(problems_per_student < 10)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 50.000000\n", + "mean 216.860000\n", + "std 9.856128\n", + "min 195.000000\n", + "25% 209.000000\n", + "50% 216.500000\n", + "75% 226.000000\n", + "max 233.000000\n", + "dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Next we see how many students attempted each problem\n", + "students_per_problem = main_table_filtered.groupby(by=['AssignmentID', 'ProblemID']).apply(lambda rows: len(rows[PS2.SubjectID].unique()))\n", + "students_per_problem.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<BarContainer object of 50 artists>" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAANOUlEQVR4nO3df6zd9V3H8eeLgmi2JYIUJICWmWoGxnVJgyTsDzZ01LkIRjElcekfmPoHRJawKOwf1KTJ/nDTmIhJFUITN7CGIWQxuqZuwSVm7KIolII0gFBb2040w38wwNs/7rfhcHsv98c5p/fe930+kuac8znfc87nk6bP8+Vzzz2kqpAk9XLOak9AkjR5xl2SGjLuktSQcZekhoy7JDV07mpPAOCiiy6qLVu2rPY0JGldeeqpp75XVZvnu29NxH3Lli3MzMys9jQkaV1J8u8L3ee2jCQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDW0Jn5Dda37+XNuOWPswDt/tQoz0Wr48B9/6Yyxl37rrlV/Lun9GPcxGH1Ja5Vxl1bIM3qtZe65S1JDxl2SGnJb5ixaaI/evfu1wa0RddIi7nPjuFgYpx3TST7/ctemtWHuG4VvEjrbWsR9IZ4R9+KZtbR0reO+XL4ZSOrCuGteH/+VPzhj7NuPfH4VZrJynulrI/PTMpLUkGfuWvc6n6HvevK2M8b2XXP/KsxE641n7pLUkGfu0jq00Bm9Z/o6zbhLWlPe+c+fPGPsnB/9twXHNT/jrqnq8Kmbjcj/Alj/jLu0AbiNs/EYd62auWf1Z+uMvvOna6TTjLukdc29+PkZdy2Le+jS+mDcJU3Vau3rb/QzeuO+Ti33DHo9nXGvp7lKa5Vxb2a1wmiQtV5slDN64y5pWeZus2yEj06uxzcE4y5pIpa7t+5n7KfLuG9wbqdI75p7hr7Wz87fz6LfCpnkiiTfTHI4yaEkdw7jFyY5kOTF4fKCkcfck+RIkheS3DjNBUiSzrSUr/x9C7irqj4CXAvcnuQq4G7gYFVtBQ4Otxnu2wlcDewA7kuyaRqTlyTNb9Ftmao6Dhwfrr+R5DBwGXATcP1w2D7gW8DvDOMPV9WbwMtJjgDXAP846clL0npyNn8wu6z/WUeSLcDHgO8AlwzhP/0GcPFw2GXAayMPOzqMzX2u3UlmksycOnVqBVOXJC1kyXFP8kHgEeBzVfX99zt0nrE6Y6Bqb1Vtr6rtmzdvXuo0JElLsKRPyyQ5j9mwf6WqvjYMn0hyaVUdT3IpcHIYPwpcMfLwy4Fjk5qwJK0Va/nz70v5tEyA+4HDVfXlkbseB3YN13cBj42M70xyfpIrga3Ak5ObsiRpMUs5c78O+CzwTJKnh7EvAF8E9ie5DXgVuAWgqg4l2Q88x+wnbW6vqrcnPXFJWqvWwhn9Uj4t823m30cHuGGBx+wB9owxL0nSGJb1aRlJ0vpg3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ8Zdkhoy7pLUkHGXpIaMuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDS0a9yQPJDmZ5NmRsd9N8h9Jnh7+fHrkvnuSHEnyQpIbpzVxSdLClnLm/iCwY57xP6yqbcOfvwFIchWwE7h6eMx9STZNarKSpKVZNO5V9QTw+hKf7ybg4ap6s6peBo4A14wxP0nSCoyz535Hkn8dtm0uGMYuA14bOeboMHaGJLuTzCSZOXXq1BjTkCTNtdK4/ynwE8A24DjwpWE88xxb8z1BVe2tqu1VtX3z5s0rnIYkaT4rintVnaiqt6vqHeDPeHfr5ShwxcihlwPHxpuiJGm5VhT3JJeO3Pxl4PQnaR4HdiY5P8mVwFbgyfGmKElarnMXOyDJQ8D1wEVJjgL3Atcn2cbslssrwG8CVNWhJPuB54C3gNur6u2pzFyStKBF415Vt84zfP/7HL8H2DPOpCRJ4/E3VCWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ8Zdkhoy7pLUkHGXpIaMuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ4vGPckDSU4meXZk7MIkB5K8OFxeMHLfPUmOJHkhyY3TmrgkaWFLOXN/ENgxZ+xu4GBVbQUODrdJchWwE7h6eMx9STZNbLaSpCVZNO5V9QTw+pzhm4B9w/V9wM0j4w9X1ZtV9TJwBLhmMlOVJC3VSvfcL6mq4wDD5cXD+GXAayPHHR3GzpBkd5KZJDOnTp1a4TQkSfOZ9A9UM89YzXdgVe2tqu1VtX3z5s0TnoYkbWwrjfuJJJcCDJcnh/GjwBUjx10OHFv59CRJK7HSuD8O7Bqu7wIeGxnfmeT8JFcCW4Enx5uiJGm5zl3sgCQPAdcDFyU5CtwLfBHYn+Q24FXgFoCqOpRkP/Ac8BZwe1W9PaW5S5IWsGjcq+rWBe66YYHj9wB7xpmUJGk8/oaqJDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ8Zdkhoy7pLUkHGXpIaMuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDZ07zoOTvAK8AbwNvFVV25NcCPwlsAV4Bfi1qvrv8aYpSVqOSZy5f6KqtlXV9uH23cDBqtoKHBxuS5LOomlsy9wE7Buu7wNunsJrSJLex7hxL+AbSZ5KsnsYu6SqjgMMlxfP98Aku5PMJJk5derUmNOQJI0aa88duK6qjiW5GDiQ5PmlPrCq9gJ7AbZv315jzkOSNGKsM/eqOjZcngQeBa4BTiS5FGC4PDnuJCVJy7PiuCf5QJIPnb4OfAp4Fngc2DUctgt4bNxJSpKWZ5xtmUuAR5Ocfp6vVtXfJvkusD/JbcCrwC3jT1OStBwrjntVvQR8dJ7x/wJuGGdSkqTx+BuqktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJakh4y5JDRl3SWrIuEtSQ8Zdkhoy7pLUkHGXpIaMuyQ1ZNwlqSHjLkkNGXdJasi4S1JDxl2SGjLuktSQcZekhoy7JDVk3CWpIeMuSQ0Zd0lqyLhLUkPGXZIaMu6S1JBxl6SGjLskNWTcJamhqcU9yY4kLyQ5kuTuab2OJOlMU4l7kk3AnwC/AFwF3Jrkqmm8liTpTNM6c78GOFJVL1XV/wEPAzdN6bUkSXOkqib/pMmvAjuq6jeG258Ffraq7hg5Zjewe7j5U8ALE3jpi4DvTeB51hPXvDG45o1jOev+8araPN8d505uPu+Recbe8y5SVXuBvRN90WSmqrZP8jnXOte8MbjmjWNS657WtsxR4IqR25cDx6b0WpKkOaYV9+8CW5NcmeQHgJ3A41N6LUnSHFPZlqmqt5LcAfwdsAl4oKoOTeO15pjoNs864Zo3Bte8cUxk3VP5gaokaXX5G6qS1JBxl6SGWsR9o3zVQZIHkpxM8uzI2IVJDiR5cbi8YDXnOGlJrkjyzSSHkxxKcucw3nbdSX4wyZNJ/mVY8+8N423XfFqSTUn+OcnXh9ut15zklSTPJHk6ycwwNpE1r/u4b7CvOngQ2DFn7G7gYFVtBQ4Otzt5C7irqj4CXAvcPvz9dl73m8Anq+qjwDZgR5Jr6b3m0+4EDo/c3ghr/kRVbRv5bPtE1rzu484G+qqDqnoCeH3O8E3AvuH6PuDmszmnaauq41X1T8P1N5j9h38Zjddds/53uHne8KdovGaAJJcDvwj8+chw6zUvYCJr7hD3y4DXRm4fHcY2ikuq6jjMhhC4eJXnMzVJtgAfA75D83UP2xNPAyeBA1XVfs3AHwG/DbwzMtZ9zQV8I8lTw1eywITWPK2vHzibFv2qA61/ST4IPAJ8rqq+n8z3195HVb0NbEvyw8CjSX56lac0VUk+A5ysqqeSXL/K0zmbrquqY0kuBg4keX5ST9zhzH2jf9XBiSSXAgyXJ1d5PhOX5Dxmw/6VqvraMNx+3QBV9T/At5j9WUvnNV8H/FKSV5jdWv1kkr+g95qpqmPD5UngUWa3mSey5g5x3+hfdfA4sGu4vgt4bBXnMnGZPUW/HzhcVV8euavtupNsHs7YSfJDwM8Bz9N4zVV1T1VdXlVbmP03/PdV9es0XnOSDyT50OnrwKeAZ5nQmlv8hmqSTzO7X3f6qw72rO6MpiPJQ8D1zH4l6AngXuCvgf3AjwGvArdU1dwfuq5bST4O/APwDO/uxX6B2X33lutO8jPM/iBtE7MnYPur6veT/AhN1zxq2Jb5fFV9pvOak3yY2bN1mN0i/2pV7ZnUmlvEXZL0Xh22ZSRJcxh3SWrIuEtSQ8Zdkhoy7pLUkHGXpIaMuyQ19P8F5dB6UlXNXQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 432x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Next we plot the number of attemptes on each problem (x) / assignment (color)\n", + "from matplotlib.cm import viridis\n", + "\n", + "assignment_ids = list(students_per_problem.keys().map(lambda x: x[0]))\n", + "assignment_ids = [sorted(assignment_ids).index(x) for x in assignment_ids]\n", + "colors = [viridis((float(i)-min(assignment_ids))/(max(assignment_ids)-min(assignment_ids))) for i in assignment_ids]\n", + "\n", + "# There's a slight drop-off by assignment, but overall they're well-attempted\n", + "plt.bar(range(0, len(students_per_problem)), students_per_problem, color=colors)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Creating the Class Label: Identifying Struggling Students\n", + "\n", + "In any student modeling task, our goal is to predict if a student will struggle on the next problem. For this dataset, it's not obvious how to define that struggle.\n", + "\n", + "We will define struggle as either:\n", + "1. Never getting a problem correct or \n", + "2. Taking more attempts at a problem than 75% of students before getting it correct.\n", + "\n", + "The code below justifies this decision by showing that most students get the problem correct _eventually_, and most student with more than the 75th percentil of attempts end up with many more attempts than their peers, indicating struggle." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Aggregate data by problem\n", + "\n", + "We first get all scored submissions (`Run.Program` events) and aggregate them by SubjectID and ProblemID, counting the number of attempts until a correct submission." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "runs = main_table_filtered[main_table_filtered[PS2.EventType] == 'Run.Program'].copy()\n", + "runs['TimeInt'] = pd.to_datetime(runs[PS2.ServerTimestamp]).apply(lambda x: x.value)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 10843.000000\n", + "mean 4.178179\n", + "std 5.739292\n", + "min 1.000000\n", + "25% 1.000000\n", + "50% 2.000000\n", + "75% 5.000000\n", + "max 93.000000\n", + "dtype: float64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_attempts(rows):\n", + " scores = rows[PS2.Score]\n", + " # If they scored 1, we return the first time they did so\n", + " if (scores.max() == 1):\n", + " # Argmax returns the first index of the highest score\n", + " # Since the array is 0-indexed, we return +1\n", + " return rows[PS2.Score].argmax() + 1\n", + " return len(rows.index)\n", + " \n", + "\n", + "scores = runs.groupby([PS2.SubjectID, PS2.AssignmentID, PS2.ProblemID]).apply(get_attempts)\n", + "scores.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 10843.000000\n", + "mean 4.318454\n", + "std 5.870877\n", + "min 1.000000\n", + "25% 1.000000\n", + "50% 2.000000\n", + "75% 5.000000\n", + "max 93.000000\n", + "dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Confirm that this is different than just the count of runs\n", + "runs.groupby([PS2.SubjectID, PS2.AssignmentID, PS2.ProblemID]).apply(lambda x: len(x.index)).describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(10843, 4)\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Attempts</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>5</td>\n", + " <td>3</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>12</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Attempts\n", + "0 04c32d4d95425f73b3a1d6502aed4d48 439.0 1 1\n", + "1 04c32d4d95425f73b3a1d6502aed4d48 439.0 3 2\n", + "2 04c32d4d95425f73b3a1d6502aed4d48 439.0 5 3\n", + "3 04c32d4d95425f73b3a1d6502aed4d48 439.0 12 1\n", + "4 04c32d4d95425f73b3a1d6502aed4d48 439.0 13 2" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "student_attempts = scores.to_frame('Attempts').reset_index()\n", + "print(student_attempts.shape)\n", + "student_attempts.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Checking Eventual Success\n", + "\n", + "Most student get each problem correct eventually, suggesting that the number of attempts is a more meaningful indicator of succeess." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'runs' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_8216/4164252205.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mcorrect_eventually\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mruns\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgroupby\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mPS2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSubjectID\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mPS2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mAssignmentID\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mPS2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mProblemID\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mPS2\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mScore\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[1;31mNameError\u001b[0m: name 'runs' is not defined" + ] + } + ], + "source": [ + "correct_eventually = runs.groupby([PS2.SubjectID, PS2.AssignmentID, PS2.ProblemID])[PS2.Score].apply(lambda x: max(x) == 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9288019920686157" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.mean(correct_eventually)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Attempts</th>\n", + " <th>CorrectEventually</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>5</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>12</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10838</th>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>502.0</td>\n", + " <td>64</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10839</th>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>502.0</td>\n", + " <td>70</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10840</th>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>502.0</td>\n", + " <td>71</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10841</th>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>502.0</td>\n", + " <td>112</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10842</th>\n", + " <td>ffb72475a81de0e95b910ffad039f5c2</td>\n", + " <td>502.0</td>\n", + " <td>118</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>10843 rows × 5 columns</p>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Attempts \\\n", + "0 04c32d4d95425f73b3a1d6502aed4d48 439.0 1 1 \n", + "1 04c32d4d95425f73b3a1d6502aed4d48 439.0 3 2 \n", + "2 04c32d4d95425f73b3a1d6502aed4d48 439.0 5 3 \n", + "3 04c32d4d95425f73b3a1d6502aed4d48 439.0 12 1 \n", + "4 04c32d4d95425f73b3a1d6502aed4d48 439.0 13 2 \n", + "... ... ... ... ... \n", + "10838 ffb72475a81de0e95b910ffad039f5c2 502.0 64 2 \n", + "10839 ffb72475a81de0e95b910ffad039f5c2 502.0 70 2 \n", + "10840 ffb72475a81de0e95b910ffad039f5c2 502.0 71 1 \n", + "10841 ffb72475a81de0e95b910ffad039f5c2 502.0 112 1 \n", + "10842 ffb72475a81de0e95b910ffad039f5c2 502.0 118 1 \n", + "\n", + " CorrectEventually \n", + "0 True \n", + "1 True \n", + "2 True \n", + "3 True \n", + "4 True \n", + "... ... \n", + "10838 True \n", + "10839 True \n", + "10840 True \n", + "10841 True \n", + "10842 True \n", + "\n", + "[10843 rows x 5 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "student_scores = student_attempts.merge(correct_eventually.to_frame('CorrectEventually'), on=[PS2.SubjectID, PS2.AssignmentID, PS2.ProblemID])\n", + "student_scores" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True 10071\n", + "False 772\n", + "Name: CorrectEventually, dtype: int64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "student_scores.CorrectEventually.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Choosing a cutoff for \"struggling\"\n", + "We choose the 75th percentile of attempts as the cutoff for struggling, and visualize this to verify that it meaningfully separates the \"tail\" of more struggling students from the main body. The chart below shows this for all 50 problems, and suggests that this is a reasonable (though by no means objectively correct) cutoff." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ProblemID\n", + "1 3.0\n", + "3 4.0\n", + "5 4.0\n", + "12 2.0\n", + "13 11.0\n", + "dtype: float64" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "problem_attempt_75th = student_scores.groupby(PS2.ProblemID).apply(lambda x: x.Attempts.quantile(0.75))\n", + "problem_attempt_75th.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 1080x720 with 50 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib\n", + "matplotlib.rcParams['figure.figsize'] = [15, 10]\n", + "\n", + "problem_ids = list(student_scores.ProblemID.unique())\n", + "fig, axs = plt.subplots(5, 10)\n", + "for i in range(5):\n", + " for j in range(10):\n", + " problem_id = problem_ids[i * 10 + j]\n", + " attempts = student_scores[student_scores[PS2.ProblemID] == problem_id].Attempts\n", + " p75 = problem_attempt_75th[problem_id] + 1\n", + " axs[i, j].hist(attempts)\n", + " axs[i, j].vlines(p75, 0, 50)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True 8014\n", + "False 2829\n", + "Name: Label, dtype: int64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cutoffs = student_scores['ProblemID'].apply(lambda x: problem_attempt_75th[x])\n", + "student_scores['Label'] = np.logical_and(student_scores['Attempts'] <= cutoffs, student_scores['CorrectEventually'])\n", + "student_scores['Label'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 1., 1., 6., 16., 17., 23., 34., 41., 55., 52.]),\n", + " array([0.04347826, 0.13913043, 0.23478261, 0.33043478, 0.42608696,\n", + " 0.52173913, 0.6173913 , 0.71304348, 0.80869565, 0.90434783,\n", + " 1. ]),\n", + " <BarContainer object of 10 artists>)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAD4CAYAAAA0JjXXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAANcElEQVR4nO3dbaykd1nH8d9lFwIKSmvPbjYt9aCpSGMs4IqNqAFqtRTj1gQMqLAhTTbGh2BiIisvNMY35Y0hRg3ZIGGNCjZS7MoqulmsaHjcaimtBVsRa8OmuxSUBxNNy+WLM+ha9nDu3TMz/57p55OczNz3zOxc+eecfPeemXOf6u4AAMv1daMHAIAnIgEGgAEEGAAGEGAAGECAAWCAXct8sksvvbTX19eX+ZQAMMwdd9zxme5eO9dtSw3w+vp6Tp48ucynBIBhqupfN7vNS9AAMIAAA8AAAgwAAwgwAAwgwAAwgAADwAACDAADCDAADCDAADDAUs+EBcDirB86NnqEr+lTN79s9AiPK46AAWAAAQaAAQQYAAYQYAAYQIABYAABBoABBBgABhBgABhAgAFgAAEGgAGcihKApXi8nyozWe7pMh0BA8AAAgwAAwgwAAww6T3gqvpUki8keTTJI929r6ouSfLHSdaTfCrJT3T35xYzJgCslvM5An5xdz+3u/fNtg8lOdHdVyY5MdsGACbYzkvQ+5McmV0/kuTGbU8DAE8QUwPcSf6qqu6oqoOzfXu6+1SSzC53L2JAAFhFU38P+IXd/emq2p3keFV9fOoTzIJ9MEmuuOKKCxgRAFbPpCPg7v707PJ0kncleUGSh6pqb5LMLk9v8tjD3b2vu/etra3NZ2oA2OG2DHBVfUNVPf0r15P8cJK7kxxNcmB2twNJblvUkACwaqa8BL0nybuq6iv3/6Pufk9VfSTJLVV1U5IHkrxicWMCwGrZMsDd/ckkV59j/8NJrl3EUACw6pwJCwAGEGAAGECAAWAAAQaAAQQYAAYQYAAYQIABYAABBoABBBgABhBgABhAgAFgAAEGgAEEGAAGEGAAGECAAWAAAQaAAQQYAAYQYAAYQIABYAABBoABBBgABhBgABhAgAFgAAEGgAEEGAAGEGAAGECAAWAAAQaAAXaNHgBgJ1g/dGz0CKyYyUfAVXVRVf1DVb17tn1JVR2vqvtmlxcvbkwAWC3n8xL065Lce9b2oSQnuvvKJCdm2wDABJMCXFWXJ3lZkrectXt/kiOz60eS3DjXyQBghU09An5Tkl9O8uWz9u3p7lNJMrvcfa4HVtXBqjpZVSfPnDmznVkBYGVsGeCq+tEkp7v7jgt5gu4+3N37unvf2trahfwTALBypnwK+oVJfqyqbkjylCTfWFV/kOShqtrb3aeqam+S04scFABWyZZHwN39K919eXevJ3llkvd2908nOZrkwOxuB5LctrApAWDFbOdEHDcnua6q7kty3WwbAJjgvE7E0d23J7l9dv3hJNfOfyQAWH1ORQkAAwgwAAwgwAAwgAADwAACDAADCDAADCDAADCAAAPAAAIMAAMIMAAMIMAAMIAAA8AAAgwAAwgwAAwgwAAwgAADwAACDAAD7Bo9AECSrB86NnoEWCpHwAAwgAADwAACDAADCDAADCDAADCAAAPAAAIMAAMIMAAMIMAAMIAAA8AAAgwAA2wZ4Kp6SlV9uKo+WlX3VNWvz/ZfUlXHq+q+2eXFix8XAFbDlCPg/0ryku6+Oslzk1xfVdckOZTkRHdfmeTEbBsAmGDLAPeGL842nzT76iT7kxyZ7T+S5MZFDAgAq2jSe8BVdVFV3ZnkdJLj3f2hJHu6+1SSzC53b/LYg1V1sqpOnjlzZk5jA8DONinA3f1odz83yeVJXlBV3zn1Cbr7cHfv6+59a2trFzgmAKyW8/oUdHf/e5Lbk1yf5KGq2psks8vT8x4OAFbVlE9Br1XVM2bXn5rkh5J8PMnRJAdmdzuQ5LYFzQgAK2fXhPvsTXKkqi7KRrBv6e53V9UHktxSVTcleSDJKxY4JwCslC0D3N13JXneOfY/nOTaRQwFAKvOmbAAYAABBoABBBgABhBgABhAgAFgAAEGgAEEGAAGEGAAGECAAWAAAQaAAQQYAAYQYAAYQIABYAABBoABBBgABhBgABhAgAFgAAEGgAEEGAAGEGAAGGDX6AGAxVs/dGz0CMBjOAIGgAEEGAAGEGAAGECAAWAAAQaAAQQYAAYQYAAYYMsAV9Uzq+qvq+reqrqnql43239JVR2vqvtmlxcvflwAWA1TjoAfSfJL3f2cJNck+bmquirJoSQnuvvKJCdm2wDABFsGuLtPdfffz65/Icm9SS5Lsj/JkdndjiS5cUEzAsDKOa/3gKtqPcnzknwoyZ7uPpVsRDrJ7k0ec7CqTlbVyTNnzmxzXABYDZMDXFVPS/LOJL/Y3Z+f+rjuPtzd+7p739ra2oXMCAArZ1KAq+pJ2YjvH3b3rbPdD1XV3tnte5OcXsyIALB6pnwKupL8XpJ7u/s3z7rpaJIDs+sHktw2//EAYDVN+XOEL0zy6iQfq6o7Z/vekOTmJLdU1U1JHkjyioVMCAAraMsAd/ffJalNbr52vuMAwBODM2EBwAACDAADCDAADCDAADCAAAPAAAIMAANM+T1gYAvrh46NHgHYYRwBA8AAAgwAAwgwAAzgPWAe97y/CqwiR8AAMIAAA8AAAgwAAwgwAAwgwAAwgAADwAACDAADCDAADCDAADCAAAPAAAIMAAMIMAAMIMAAMIAAA8AAAgwAAwgwAAwgwAAwgAADwABbBriq3lpVp6vq7rP2XVJVx6vqvtnlxYsdEwBWy5Qj4Lcluf4x+w4lOdHdVyY5MdsGACbaMsDd/b4kn33M7v1JjsyuH0ly43zHAoDVdqHvAe/p7lNJMrvcvdkdq+pgVZ2sqpNnzpy5wKcDgNWy8A9hdffh7t7X3fvW1tYW/XQAsCNcaIAfqqq9STK7PD2/kQBg9V1ogI8mOTC7fiDJbfMZBwCeGKb8GtLbk3wgybOr6sGquinJzUmuq6r7klw32wYAJtq11R26+1Wb3HTtnGcBgCcMZ8ICgAEEGAAGEGAAGECAAWAAAQaAAQQYAAYQYAAYQIABYAABBoABtjwTFqtt/dCx0SMAPCE5AgaAAQQYAAYQYAAYQIABYAABBoABBBgABhBgABhAgAFgAAEGgAEEGAAGEGAAGECAAWAAAQaAAQQYAAYQYAAYQIABYAABBoABBBgABtg1eoDtWD90bPQIAHBBtnUEXFXXV9Unqur+qjo0r6EAYNVdcICr6qIkv5PkpUmuSvKqqrpqXoMBwCrbzhHwC5Lc392f7O7/TvKOJPvnMxYArLbtvAd8WZJ/O2v7wSTf+9g7VdXBJAdnm1+sqk8kuTTJZ7bx3Hw1azp/1nS+rOf8WdM5qzfOfU2/ZbMbthPgOse+/qod3YeTHP5/D6w62d37tvHcPIY1nT9rOl/Wc/6s6fwtc0238xL0g0meedb25Uk+vb1xAOCJYTsB/kiSK6vqWVX15CSvTHJ0PmMBwGq74Jegu/uRqvr5JH+Z5KIkb+3ueyY+/PDWd+E8WdP5s6bzZT3nz5rO39LWtLq/6m1bAGDBnIoSAAYQYAAYYGEB3uo0lbXht2a331VVz1/ULKtiwpr+1Gwt76qq91fV1SPm3Emmnk61qr6nqh6tqpcvc76daMqaVtWLqurOqrqnqv5m2TPuNBN+9r+pqv6sqj46W9PXjphzp6iqt1bV6aq6e5Pbl9On7p77VzY+lPXPSb41yZOTfDTJVY+5zw1J/iIbv098TZIPLWKWVfmauKbfl+Ti2fWXWtPtr+lZ93tvkj9P8vLRcz+evyZ+nz4jyT8muWK2vXv03I/nr4lr+oYkb5xdX0vy2SRPHj374/UryQ8meX6Suze5fSl9WtQR8JTTVO5P8vu94YNJnlFVexc0zyrYck27+/3d/bnZ5gez8bvZbG7q6VR/Ick7k5xe5nA71JQ1/ckkt3b3A0nS3db1a5uypp3k6VVVSZ6WjQA/stwxd47ufl821mgzS+nTogJ8rtNUXnYB9+H/nO963ZSN/8GxuS3XtKouS/LjSd68xLl2sinfp9+e5OKqur2q7qiq1yxtup1pypr+dpLnZONkSB9L8rru/vJyxltJS+nTov4e8JTTVE46lSX/a/J6VdWLsxHg71/oRDvflDV9U5LXd/ejGwcXbGHKmu5K8t1Jrk3y1CQfqKoPdvc/LXq4HWrKmv5IkjuTvCTJtyU5XlV/292fX/Bsq2opfVpUgKecptKpLM/PpPWqqu9K8pYkL+3uh5c02041ZU33JXnHLL6XJrmhqh7p7j9dyoQ7z9Sf/c9095eSfKmq3pfk6iQCfG5T1vS1SW7ujTcw76+qf0nyHUk+vJwRV85S+rSol6CnnKbyaJLXzD5tdk2S/+juUwuaZxVsuaZVdUWSW5O82tHEJFuuaXc/q7vXu3s9yZ8k+Vnx/Zqm/OzfluQHqmpXVX19Nv6K2r1LnnMnmbKmD2TjFYVU1Z4kz07yyaVOuVqW0qeFHAH3JqeprKqfmd3+5mx8ovSGJPcn+c9s/A+OTUxc019N8s1Jfnd2xPZI+0spm5q4ppyHKWva3fdW1XuS3JXky0ne0t3n/HUQJn+f/kaSt1XVx7Lx8unru9ufKdxEVb09yYuSXFpVDyb5tSRPSpbbJ6eiBIABnAkLAAYQYAAYQIABYAABBoABBBgABhBgABhAgAFggP8BvjVhMXKKbbMAAAAASUVORK5CYII=\n", + "text/plain": [ + "<Figure size 576x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "matplotlib.rcParams['figure.figsize'] = [8, 4]\n", + "\n", + "# The percentage of struggling problems per student is well-distributed\n", + "plt.hist(student_scores.groupby(PS2.SubjectID)['Label'].mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Identifying Late Assignments\n", + "\n", + "Below we confirm that the 5 assignments are well-spaced out, with a consistent ordering accross students.\n", + "\n", + "The latter 2 assignments are what is predicted in Task 1 of the data challenge.\n", + "\n", + "We divide the data by assignment, rather than by problem, since within a given assignment studens do problems in a variety of orders (see analysis at the end of this document)." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "problem_times = runs.groupby([PS2.AssignmentID, PS2.ProblemID])['TimeInt'].median()\n", + "start_time = min(problem_times)\n", + "problem_times = (problem_times - start_time) / 10**9\n", + "\n", + "problem_successes = runs[runs[PS2.Score] == 1].groupby([PS2.AssignmentID, PS2.ProblemID])['TimeInt'].median()\n", + "problem_successes = (problem_successes - start_time) / 10**9" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>MedTime</th>\n", + " <th>MedSuccess</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>7960.5</td>\n", + " <td>8063.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>8269.5</td>\n", + " <td>8954.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>439.0</td>\n", + " <td>5</td>\n", + " <td>6310.0</td>\n", + " <td>6585.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>439.0</td>\n", + " <td>12</td>\n", + " <td>3891.5</td>\n", + " <td>7772.5</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>439.0</td>\n", + " <td>13</td>\n", + " <td>0.0</td>\n", + " <td>1005.5</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " AssignmentID ProblemID MedTime MedSuccess\n", + "0 439.0 1 7960.5 8063.5\n", + "1 439.0 3 8269.5 8954.5\n", + "2 439.0 5 6310.0 6585.5\n", + "3 439.0 12 3891.5 7772.5\n", + "4 439.0 13 0.0 1005.5" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "problem_stats = problem_times.to_frame('MedTime').join(problem_successes.to_frame('MedSuccess')).reset_index()\n", + "problem_stats.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<matplotlib.collections.PathCollection at 0x2e512ddc160>" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAEFCAYAAAA7XTSkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8/fFQqAAAACXBIWXMAAAsTAAALEwEAmpwYAAAUNUlEQVR4nO3dW4ycd3nH8e+zdlzY4Iq42URWnHhpa2gRKgkdRbSpUEkICgHh3ASBlsqqou4NoKAeUGAvKiqtxBXihlZaJWldMZCmHJoIRUBkQISKQ9YcCiHQRMEbrLjxEoNMulJx4qcX8xqvvbPr8Zzew3w/kjXz/md230evLP/8zjP//z8yE0mSNF5TZRcgSdIkMoAlSSqBASxJUgkMYEmSSmAAS5JUgu3jPNnll1+es7Oz4zylJEmlOXz48M8zc6bba2MN4NnZWZaXl8d5SkmSShMRK5u95kfQkiSVoKc74Ig4AvwKeBF4ITNbEbEL+DdgFjgCvCMzfzGaMiVJapaLuQN+Y2Zem5mt4vgu4FBm7gMOFceSJKkHg3wEvR84WDw/CNw2cDWSJE2IXgM4gS9FxOGImC/GrszMYwDF4xXdfjAi5iNiOSKWV1dXB69YkqQG6DWAb8jM1wFvAd4TEW/o9QSZuZSZrcxszcx0/Sa2JEnlardhdhampjqP7fbIT9nTl7Ay85ni8XhEfA64Hng2InZn5rGI2A0cH2GdkiSNRrsN8/OwttY5XlnpHAPMzY3stBe8A46ISyNi55nnwJuBHwIPAgeKtx0AHhhVkZIkjczCwtnwPWNtrTM+Qr3cAV8JfC4izrz/k5n5hYh4FLg/Iu4AngZuH12ZkiSNyNNPX9z4kFwwgDPzKeC1XcafA24aRVGSJI3NNdd0PnbuNj5CroQlSZpst956ceNDYgBLkibbQw9d3PiQGMCSpMlWUg/YAJYkTbZdu7qP2wOWJGlE2m04eXLj+I4dsLg40lMbwJKkybWwAKdObRzfuXOki3CAASxJmmSb9XlPnBj5qQ1gSdLk2qzPO+L+LxjAkqRJVtIcYDCAJUmTrKQ5wGAAS5ImWUlzgMEAliRNMnvAkiSVwB6wJEklsAcsSVIJ7AFLkjRm7TZMbRKD9oAlSRqBdhvm5+HFFze+Nj098nWgwQCWJE2ihQVYW9s4vm0bLC2NfB1oMIAlSZNosx7v6dNjCV8wgCVJk6ikPYDXM4AlSZOlxD2A1zOAJUmTpcQ9gNczgCVJk6XEPYDXM4AlSZOlxPWf1zOAJUmTpcT1n9czgCVJk6XE9Z/XM4AlSZOlxPWf1zOAJUmTxR6wJEklWFzszPldb8xzgMEAliRNosytj8fAAJYkTZZuC3GcOtUZH6OeAzgitkXEdyPi88Xxroh4OCKeKB4vG12ZkiQNSQ2/hHUn8Pi647uAQ5m5DzhUHEuSVG0V2IgBegzgiNgDvBW4e93wfuBg8fwgcNtQK5MkadgqshED9H4H/DHgA8DpdWNXZuYxgOLxiuGWJknSkFVkIwboIYAj4m3A8cw83M8JImI+IpYjYnl1dbWfXyFJ0nBUZCMG6O0O+Abg7RFxBLgPuDEiPgE8GxG7AYrH491+ODOXMrOVma2ZmZkhlS1JA2i3YXYWpqY6j+122RVpXCqyCAf0EMCZ+cHM3JOZs8A7gS9n5ruBB4EDxdsOAA+MrEpJGpZ2G+bnYWWlM/dzZaVzbAhPhsVFmJ4+d2x6euz9XxhsHvBHgJsj4gng5uJYkqptYQHW1s4dW1sb+xxQlWRuDpaWYO9eiOg8Li2Nvf8LEDnG1T9arVYuLy+P7XyStMHUVPdVjyLg9OmN49IAIuJwZra6veZKWJImS0XmgEoGsKTJUaE5oJIBLGlyVGgOqGQAS5ocFZoDKhnAkiaH/V9ViAEsaTLY/1XFGMCSJoP9X1WMASxpMtj/VcUYwJKa7cy6z5stOmT/VyXZXnYBkjQyZ9Z9Pn/pyTNKWgNYAu+AJTVZt3WfzyhxDWAJvAOW1GSb9X0j4MiRsZYinc87YEnNVaG9X6XzGcCSmuvWWy9uXBojA1hScz300MWNS2NkAEtqrs16wJuNS2NkAEtqLnvAqjADWFJz2QNWhRnAkprLHrAqzACW1Fz2gFVhBrCk5nL/X1WYASypmdz/VxVnAEtqJvf/VcUZwJKayf1/VXEGsKRmcg6wKs4AltRMzgFWxRnAkprJOcCqOANYUjM5B1gVZwBLaibnAKviDGBJzeMcYNWAASypeZwDrBowgCU1j3OAVQMXDOCIeElEfDsivh8Rj0XEh4vxXRHxcEQ8UTxeNvpyJWkT7TbMzsLUVOdPN/Z/VSG93AH/H3BjZr4WuBa4JSJeD9wFHMrMfcCh4liSxq/dhvl5WFmBTHjxxY3vmZ62/6tKuWAAZ8fzxeElxZ8E9gMHi/GDwG2jKFCSLmhhAdbWNo5v2wYRsHcvLC3Z/1WlbO/lTRGxDTgM/D7w8cz8VkRcmZnHADLzWERcscnPzgPzANf48Y+kUdis53v6dOePVEE9fQkrM1/MzGuBPcD1EfGaXk+QmUuZ2crM1szMTJ9lShW1vu84O9s51vi57rNq6KK+BZ2ZvwS+CtwCPBsRuwGKx+PDLk6qtPP7jisrnWNDePwWFzs93vXs+arievkW9ExEvLx4/lLgTcCPgQeBA8XbDgAPjKhGqZq69R3X1jrjGq+5uU6Pd+9ee76qjcjMrd8Q8Ud0vmS1jU5g35+Z/xARvwPcD1wDPA3cnplbTrJrtVq5vLw8lMKl0k1Nde58zxdh31ESABFxODNb3V674JewMvO/gOu6jD8H3DR4eVJNXXNN52PnbuOSdAGuhCX1y/1mJQ3AAJb65X6zkgZgAEv9cr9ZSQMwgKV+OfdU0gAMYKlf9oAlDcAAlvplD1jSAAxgqV/2gCUNwACW+mUPWNIADGCpX/aAJQ3AAJb6ZQ9Y0gAMYKlf9oAlDcAAlvq1a1f3cXvAknpgAEv9aLfh5MmN4zt2uAetpJ4YwFI/Fhbg1KmN4zt3ugetpJ4YwFI/NuvznthyS2xJ+g0DWOqHc4AlDcgAlvrhHGBJAzKApX44B1jSgAxgqR/OAZY0IANY6oc9YEkDMoClftgDljQgA1jqhz1gSQMygKV+2AOWNCADWOqHPWBJAzKApX7YA5Y0IANY6oc9YEkDMoClftgDljQgA1i6GO02zM5CZvfX7QFL6tH2sguQaqPdhvl5WFvr/vr0tHsBS+qZd8BSrxYWNg/fvXthacm9gCX1zDtgqVeb9Xcj4MiRsZYiqf4ueAccEVdHxFci4vGIeCwi7izGd0XEwxHxRPF42ejLlUrk3F9JQ9TLR9AvAH+TmX8IvB54T0S8GrgLOJSZ+4BDxbHUXM79lTREFwzgzDyWmd8pnv8KeBy4CtgPHCzedhC4bUQ1StXg3F9JQ3RRX8KKiFngOuBbwJWZeQw6IQ1cscnPzEfEckQsr66uDliuVCLn/koaop4DOCJeBnwGeH9mnuz15zJzKTNbmdmamZnpp0Z1c2Y+6tRU57HdLrui5rMHLGmIegrgiLiETvi2M/OzxfCzEbG7eH03cHw0JWqDM/NRV1Y6C0KsrHSODeHRWlzszPVdz7m/kvrUy7egA7gHeDwzP7rupQeBA8XzA8ADwy9PXXWbj7q21hnX6MzNdeb67t3bmXrk3F9JA4jcbEm9M2+I+DPgEeAHwOli+EN0+sD3A9cATwO3Z+aJrX5Xq9XK5eXlQWvW1FT3pRAj4PTpjeOSpFJExOHMbHV77YILcWTm14HY5OWbBilMfdq1C557buO4vUhJqg2XoqybdhtOdvkO3I4d9iIlqUYM4LpZWIBTpzaO79xpL1KSasQArpvN5pye2LL9LkmqGAO4bpyLKkmNYADXjesRS1IjGMB143rEktQIBnDduB6xJDWCAVwXZ9Z+3mzhFHvAklQrF1yIQxVwZu3n85efPMP1iCWpdrwDroNuaz+f4XrEklRL3gHXwWb93Qg4cmSspUiShsM74Dpw7q8kNY4BXAfO/ZWkxjGA68C5v5LUOAZwHTj3V5IaxwCug127uo/bA5ak2jKAq879fyWpkQzgqnP/X0lqJAO46tz/V5IayQCuOucAS1IjGcBV5xxgSWokA7jqnAMsSY1kAFedc4AlqZEM4KqzByxJjWQAV93iYmfO73rOAZak2jOA6yBz62NJUu0YwFXXbSGOU6c645Kk2jKAq84vYUlSIxnAVedGDJLUSAZwlbkRgyQ1lgFcZW7EIEmNdcEAjoh7I+J4RPxw3diuiHg4Ip4oHi8bbZkTyo0YJKmxerkD/hfglvPG7gIOZeY+4FBxrGFzEQ5JaqwLBnBmfg04/5ZrP3CweH4QuG24ZQlwIwZJarB+e8BXZuYxgOLxiuGVpN9wIwZJaqyRfwkrIuYjYjkilldXV0d9umZxDrAkNVa/AfxsROwGKB6Pb/bGzFzKzFZmtmZmZvo8XRftNszOwtRU57HdHt7vrgp7wJLUWP0G8IPAgeL5AeCB4ZTTo3Yb5udhZaWzLvLKSue4aSG8uAjT0+eOTU87B1iSGqCXaUifAr4BvCoijkbEHcBHgJsj4gng5uJ4fBYWYG3t3LG1teatjzw3B0tLsHcvRHQel5acAyxJDRA5xp11Wq1WLi8vD/6Lpqa67wgUAadPD/77JUkagog4nJmtbq/VcyUse6OSpJqrZwA7P1aSVHP1DGDnx0qSaq6eAez8WElSzdUzgO0BS5Jqrp4BbA9YklRz9Qxge8CSpJqrZwDbA5Yk1Vw9A9gesCSp5uoZwPaAJUk1V88AtgcsSaq5egawPWBJUs3VM4DtAUuSaq6eAWwPWJJUc/UMYHvAkqSaq2cA2wOWJNVcPQPYHrAkqebqGcD2gCVJNVfPALYHLEmquXoGsD1gSVLN1TOA7QFLkmqungFsD1iSVHP1DGB7wJKkmqtnANsDliTVXD0D2B6wJKnm6hnA9oAlSTVXzwC2ByxJqrl6BrA9YElSzdUzgHft6j5uD1iSVBP1C+B2G06e3Di+YwcsLo6/HkmS+lC/AF5YgFOnNo7v3Alzc+OvR5KkPtQvgDfr8544Md46JEkawEABHBG3RMRPIuLJiLhrWEVtyTnAkqQG6DuAI2Ib8HHgLcCrgXdFxKuHVdimnAMsSWqAQe6ArweezMynMvPXwH3A/uGUtQXnAEuSGmCQAL4K+Nm646PF2DkiYj4iliNieXV1dYDTFZwDLElqgEECOLqM5YaBzKXMbGVma2ZmZoDTFewBS5IaYJAAPgpcve54D/DMYOX0YHERpqfPHZuedg6wJKlWBgngR4F9EfGKiNgBvBN4cDhlbWFuDpaWYO9eiOg8Li05B1iSVCvb+/3BzHwhIt4LfBHYBtybmY8NrbKtzM0ZuJKkWus7gAEy8yHArx9LknSR6rcSliRJDWAAS5JUAgNYkqQSGMCSJJUgMjesnTG6k0WsAitD/JWXAz8f4u+rM6/FWV6Lc3k9zvJanOW1OGuU12JvZnZdhWqsATxsEbGcma2y66gCr8VZXotzeT3O8lqc5bU4q6xr4UfQkiSVwACWJKkEdQ/gpbILqBCvxVlei3N5Pc7yWpzltTirlGtR6x6wJEl1Vfc7YEmSaskAliSpBLUN4Ii4JSJ+EhFPRsRdZddTloi4NyKOR8QPy66lbBFxdUR8JSIej4jHIuLOsmsqS0S8JCK+HRHfL67Fh8uuqWwRsS0ivhsRny+7ljJFxJGI+EFEfC8ilsuup0wR8fKI+HRE/Lj4d+NPxnr+OvaAI2Ib8N/AzcBROnsTvyszf1RqYSWIiDcAzwP/mpmvKbueMkXEbmB3Zn4nInYCh4HbJvTvRQCXZubzEXEJ8HXgzsz8ZsmllSYi/hpoAb+dmW8ru56yRMQRoJWZE78IR0QcBB7JzLuLfe2nM/OX4zp/Xe+ArweezMynMvPXwH3A/pJrKkVmfg04UXYdVZCZxzLzO8XzXwGPA1eVW1U5suP54vCS4k/9/rc9JBGxB3grcHfZtagaIuK3gTcA9wBk5q/HGb5Q3wC+CvjZuuOjTOg/tOouImaB64BvlVxKaYqPXL8HHAcezsyJvRbAx4APAKdLrqMKEvhSRByOiPmyiynR7wKrwD8XrYm7I+LScRZQ1wCOLmMT+797nSsiXgZ8Bnh/Zp4su56yZOaLmXktsAe4PiImskUREW8Djmfm4bJrqYgbMvN1wFuA9xRtrEm0HXgd8E+ZeR3wv8BYv09U1wA+Cly97ngP8ExJtahCin7nZ4B2Zn627HqqoPhY7avALeVWUpobgLcXvc/7gBsj4hPlllSezHymeDwOfI5OS28SHQWOrvtk6NN0Anls6hrAjwL7IuIVReP8ncCDJdekkhVfPLoHeDwzP1p2PWWKiJmIeHnx/KXAm4Afl1pUSTLzg5m5JzNn6fxb8eXMfHfJZZUiIi4tvqBI8XHrm4GJnEGRmf8D/CwiXlUM3QSM9Qub28d5smHJzBci4r3AF4FtwL2Z+VjJZZUiIj4F/DlweUQcBf4+M+8pt6rS3AD8BfCDovcJ8KHMfKi8kkqzGzhYzBiYAu7PzImefiMArgQ+1/m/KtuBT2bmF8otqVTvA9rFjdxTwF+O8+S1nIYkSVLd1fUjaEmSas0AliSpBAawJEklMIAlSSqBASxJUhcXu9lNRLwjIn5UbIDyyQu+329BS5K00cVsdhMR+4D7gRsz8xcRcUWx2MmmvAOWJKmLbpvdRMTvRcQXirW0H4mIPyhe+ivg45n5i+JntwxfMIAlSboYS8D7MvOPgb8F/rEYfyXwyoj4z4j4ZkRccOnXWq6EJUnSuBUbvfwp8O/FamIAv1U8bgf20VmZcA/wSES8ZqstDg1gSZJ6MwX8sthl7HxHgW9m5ingpxHxEzqB/OhWv0ySJF1Asb3pTyPiduhsABMRry1e/g/gjcX45XQ+kn5qq99nAEuS1EWx2c03gFdFxNGIuAOYA+6IiO8DjwH7i7d/EXguIn4EfAX4u8x8bsvf7zQkSZLGzztgSZJKYABLklQCA1iSpBIYwJIklcAAliSpBAawJEklMIAlSSrB/wPnXfQVYcrXOAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 576x288 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "y = range(0, len(problem_stats.index))\n", + "problem_stats.sort_values('MedTime', inplace=True)\n", + "plt.scatter(problem_stats['MedTime'], y, c='red')\n", + "# plt.scatter(problem_stats['MedSuccess'], y, c='blue')" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AssignmentID\n", + "439.0 1.551041e+18\n", + "487.0 1.551933e+18\n", + "492.0 1.553454e+18\n", + "494.0 1.555024e+18\n", + "502.0 1.557064e+18\n", + "dtype: float64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Assignments are, thankfully, already in order\n", + "assignment_stats = runs.groupby(PS2.AssignmentID).apply(lambda x: np.median(x['TimeInt']))\n", + "assignment_stats" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[439.0, 487.0, 492.0, 494.0, 502.0]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "valid_assignments = list(assignment_stats.sort_values().index)\n", + "if (semester == 'F19'):\n", + " valid_assignments.remove(NEW_F19_ASSIGNMENT)\n", + "valid_assignments" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[494.0, 502.0]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "late_assignments = valid_assignments[-2:]\n", + "late_assignments" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[439.0, 487.0, 492.0]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "early_assignments = valid_assignments[:-2]\n", + "early_assignments" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Attempts</th>\n", + " <th>CorrectEventually</th>\n", + " <th>Label</th>\n", + " <th>IsLateProblem</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>5</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>12</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>232</td>\n", + " <td>11</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>233</td>\n", + " <td>7</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>234</td>\n", + " <td>7</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>235</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>236</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>17</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>20</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>21</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>22</td>\n", + " <td>6</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>24</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>15</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>25</td>\n", + " <td>9</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>16</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>28</td>\n", + " <td>4</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>17</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>100</td>\n", + " <td>18</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>18</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>101</td>\n", + " <td>22</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>19</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>487.0</td>\n", + " <td>102</td>\n", + " <td>45</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>20</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>31</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>21</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>32</td>\n", + " <td>37</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>22</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>33</td>\n", + " <td>7</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>23</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>34</td>\n", + " <td>30</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>24</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>36</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>25</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>37</td>\n", + " <td>28</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>26</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>38</td>\n", + " <td>11</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>27</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>39</td>\n", + " <td>13</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>28</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>40</td>\n", + " <td>19</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>29</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>492.0</td>\n", + " <td>128</td>\n", + " <td>16</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>41</td>\n", + " <td>4</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>43</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>44</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>46</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>49</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>35</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>67</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>36</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>104</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>37</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>106</td>\n", + " <td>8</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>38</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>107</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>39</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>108</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>40</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>45</td>\n", + " <td>9</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>41</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>48</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>42</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>51</td>\n", + " <td>10</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>43</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>56</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>44</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>57</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>45</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>64</td>\n", + " <td>10</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>46</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>70</td>\n", + " <td>9</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>47</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>71</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>48</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>112</td>\n", + " <td>9</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>49</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>502.0</td>\n", + " <td>118</td>\n", + " <td>5</td>\n", + " <td>True</td>\n", + " <td>False</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Attempts \\\n", + "0 04c32d4d95425f73b3a1d6502aed4d48 439.0 1 1 \n", + "1 04c32d4d95425f73b3a1d6502aed4d48 439.0 3 2 \n", + "2 04c32d4d95425f73b3a1d6502aed4d48 439.0 5 3 \n", + "3 04c32d4d95425f73b3a1d6502aed4d48 439.0 12 1 \n", + "4 04c32d4d95425f73b3a1d6502aed4d48 439.0 13 2 \n", + "5 04c32d4d95425f73b3a1d6502aed4d48 439.0 232 11 \n", + "6 04c32d4d95425f73b3a1d6502aed4d48 439.0 233 7 \n", + "7 04c32d4d95425f73b3a1d6502aed4d48 439.0 234 7 \n", + "8 04c32d4d95425f73b3a1d6502aed4d48 439.0 235 3 \n", + "9 04c32d4d95425f73b3a1d6502aed4d48 439.0 236 2 \n", + "10 04c32d4d95425f73b3a1d6502aed4d48 487.0 17 1 \n", + "11 04c32d4d95425f73b3a1d6502aed4d48 487.0 20 2 \n", + "12 04c32d4d95425f73b3a1d6502aed4d48 487.0 21 1 \n", + "13 04c32d4d95425f73b3a1d6502aed4d48 487.0 22 6 \n", + "14 04c32d4d95425f73b3a1d6502aed4d48 487.0 24 1 \n", + "15 04c32d4d95425f73b3a1d6502aed4d48 487.0 25 9 \n", + "16 04c32d4d95425f73b3a1d6502aed4d48 487.0 28 4 \n", + "17 04c32d4d95425f73b3a1d6502aed4d48 487.0 100 18 \n", + "18 04c32d4d95425f73b3a1d6502aed4d48 487.0 101 22 \n", + "19 04c32d4d95425f73b3a1d6502aed4d48 487.0 102 45 \n", + "20 04c32d4d95425f73b3a1d6502aed4d48 492.0 31 3 \n", + "21 04c32d4d95425f73b3a1d6502aed4d48 492.0 32 37 \n", + "22 04c32d4d95425f73b3a1d6502aed4d48 492.0 33 7 \n", + "23 04c32d4d95425f73b3a1d6502aed4d48 492.0 34 30 \n", + "24 04c32d4d95425f73b3a1d6502aed4d48 492.0 36 5 \n", + "25 04c32d4d95425f73b3a1d6502aed4d48 492.0 37 28 \n", + "26 04c32d4d95425f73b3a1d6502aed4d48 492.0 38 11 \n", + "27 04c32d4d95425f73b3a1d6502aed4d48 492.0 39 13 \n", + "28 04c32d4d95425f73b3a1d6502aed4d48 492.0 40 19 \n", + "29 04c32d4d95425f73b3a1d6502aed4d48 492.0 128 16 \n", + "30 04c32d4d95425f73b3a1d6502aed4d48 494.0 41 4 \n", + "31 04c32d4d95425f73b3a1d6502aed4d48 494.0 43 2 \n", + "32 04c32d4d95425f73b3a1d6502aed4d48 494.0 44 2 \n", + "33 04c32d4d95425f73b3a1d6502aed4d48 494.0 46 3 \n", + "34 04c32d4d95425f73b3a1d6502aed4d48 494.0 49 5 \n", + "35 04c32d4d95425f73b3a1d6502aed4d48 494.0 67 5 \n", + "36 04c32d4d95425f73b3a1d6502aed4d48 494.0 104 1 \n", + "37 04c32d4d95425f73b3a1d6502aed4d48 494.0 106 8 \n", + "38 04c32d4d95425f73b3a1d6502aed4d48 494.0 107 5 \n", + "39 04c32d4d95425f73b3a1d6502aed4d48 494.0 108 1 \n", + "40 04c32d4d95425f73b3a1d6502aed4d48 502.0 45 9 \n", + "41 04c32d4d95425f73b3a1d6502aed4d48 502.0 48 3 \n", + "42 04c32d4d95425f73b3a1d6502aed4d48 502.0 51 10 \n", + "43 04c32d4d95425f73b3a1d6502aed4d48 502.0 56 5 \n", + "44 04c32d4d95425f73b3a1d6502aed4d48 502.0 57 2 \n", + "45 04c32d4d95425f73b3a1d6502aed4d48 502.0 64 10 \n", + "46 04c32d4d95425f73b3a1d6502aed4d48 502.0 70 9 \n", + "47 04c32d4d95425f73b3a1d6502aed4d48 502.0 71 1 \n", + "48 04c32d4d95425f73b3a1d6502aed4d48 502.0 112 9 \n", + "49 04c32d4d95425f73b3a1d6502aed4d48 502.0 118 5 \n", + "\n", + " CorrectEventually Label IsLateProblem \n", + "0 True True False \n", + "1 True True False \n", + "2 True True False \n", + "3 True True False \n", + "4 True True False \n", + "5 True False False \n", + "6 True False False \n", + "7 True False False \n", + "8 True True False \n", + "9 True True False \n", + "10 True True False \n", + "11 True True False \n", + "12 True True False \n", + "13 True False False \n", + "14 True True False \n", + "15 True False False \n", + "16 True True False \n", + "17 True False False \n", + "18 True False False \n", + "19 True False False \n", + "20 True True False \n", + "21 True False False \n", + "22 True False False \n", + "23 True False False \n", + "24 True True False \n", + "25 True False False \n", + "26 True False False \n", + "27 True False False \n", + "28 True False False \n", + "29 True False False \n", + "30 True False True \n", + "31 True True True \n", + "32 True True True \n", + "33 True True True \n", + "34 True False True \n", + "35 True True True \n", + "36 True True True \n", + "37 True False True \n", + "38 True True True \n", + "39 True True True \n", + "40 True False True \n", + "41 True True True \n", + "42 True False True \n", + "43 True False True \n", + "44 True True True \n", + "45 True False True \n", + "46 True False True \n", + "47 True True True \n", + "48 True False True \n", + "49 True False True " + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "student_scores['IsLateProblem'] = student_scores[PS2.AssignmentID].isin(late_assignments)\n", + "student_scores.sort_values([PS2.SubjectID, PS2.AssignmentID, 'IsLateProblem', PS2.ProblemID], inplace=True)\n", + "# Remove attempts not in a valid assignment (for F19)\n", + "student_scores = student_scores[student_scores[PS2.AssignmentID].isin(valid_assignments)]\n", + "student_scores.head(50)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train/Test Split\n", + "\n", + "Here we split out data into training/test datasets, as well as eary problems (used to extract features input into the model) and late problems (where struggle will be predicted)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split by SubjectID" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Valid subjects must have completed at least one early and one late problem\n", + "ealry_late_subject_ids = student_scores.groupby(PS2.SubjectID)['IsLateProblem'].apply(lambda x: np.mean(x) > 0 and np.mean(x) < 1)\n", + "# The vast majority of students have\n", + "np.mean(ealry_late_subject_ids)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "# Instersect these subjectIDs with the ones who completed the final exam\n", + "valid_subject_ids = subject_ids.intersection(set(ealry_late_subject_ids.index[ealry_late_subject_ids]))\n", + "valid_subject_ids = list(valid_subject_ids)\n", + "valid_subject_ids.sort()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "246\n", + "246\n" + ] + } + ], + "source": [ + "print(len(subject_ids))\n", + "print(len(valid_subject_ids))" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "train_ids, test_ids = train_test_split(list(valid_subject_ids), test_size=0.25, random_state=1234)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'data\\\\Release\\\\S19\\\\split.csv'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_12600/3474547466.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;31m# If saving, uncomment the top line\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10\u001b[0m \u001b[1;31m# ids_df.to_csv(os.path.join(path, 'split.csv'), index=False)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 11\u001b[1;33m \u001b[0mcached_df\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'split.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 12\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 13\u001b[0m \u001b[1;32massert\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mids_df\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mequals\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcached_df\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\util\\_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 309\u001b[0m \u001b[0mstacklevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstacklevel\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 310\u001b[0m )\n\u001b[1;32m--> 311\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 312\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 313\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 584\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 585\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 586\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 587\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 588\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 480\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 481\u001b[0m \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 482\u001b[1;33m \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 483\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 484\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 809\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 810\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 811\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 812\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 813\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m 1038\u001b[0m )\n\u001b[0;32m 1039\u001b[0m \u001b[1;31m# error: Too many arguments for \"ParserBase\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1040\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmapping\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# type: ignore[call-arg]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1041\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1042\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_failover_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\c_parser_wrapper.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;31m# open handles\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_open_handles\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhandles\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\base_parser.py\u001b[0m in \u001b[0;36m_open_handles\u001b[1;34m(self, src, kwds)\u001b[0m\n\u001b[0;32m 220\u001b[0m \u001b[0mLet\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mreaders\u001b[0m \u001b[0mopen\u001b[0m \u001b[0mIOHandles\u001b[0m \u001b[0mafter\u001b[0m \u001b[0mthey\u001b[0m \u001b[0mare\u001b[0m \u001b[0mdone\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtheir\u001b[0m \u001b[0mpotential\u001b[0m \u001b[0mraises\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 221\u001b[0m \"\"\"\n\u001b[1;32m--> 222\u001b[1;33m self.handles = get_handle(\n\u001b[0m\u001b[0;32m 223\u001b[0m \u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 224\u001b[0m \u001b[1;34m\"r\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 700\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;34m\"b\"\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 701\u001b[0m \u001b[1;31m# Encoding\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 702\u001b[1;33m handle = open(\n\u001b[0m\u001b[0;32m 703\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 704\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'data\\\\Release\\\\S19\\\\split.csv'" + ] + } + ], + "source": [ + "# Check the train/test split dataframe against the last saved run\n", + "ids_df = pd.DataFrame({PS2.SubjectID: valid_subject_ids})\n", + "ids_df['IsTrain'] = ids_df[PS2.SubjectID].isin(train_ids)\n", + "ids_df\n", + "\n", + "path = os.path.join('data', 'Release', semester)\n", + "os.makedirs(path, exist_ok=True)\n", + "\n", + "# If saving, uncomment the top line\n", + "# ids_df.to_csv(os.path.join(path, 'split.csv'), index=False)\n", + "cached_df = pd.read_csv(os.path.join(path, 'split.csv'))\n", + "\n", + "assert(ids_df.equals(cached_df))" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "184\n", + "62\n" + ] + } + ], + "source": [ + "print(len(train_ids))\n", + "print(len(test_ids))" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "student_scores_train = student_scores[student_scores[PS2.SubjectID].isin(train_ids)]\n", + "student_scores_test = student_scores[student_scores[PS2.SubjectID].isin(test_ids)]" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10843\n", + "7933\n", + "2910\n" + ] + } + ], + "source": [ + "print(student_scores.shape[0])\n", + "print(student_scores_train.shape[0])\n", + "print(student_scores_test.shape[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 1: Split Into early/late datasets\n", + "In Task 1, we need an early set of problems to use to extract features for the model, and a late set of problems where we're actually predicting student outcomes." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "early_train = student_scores_train[student_scores_train['IsLateProblem'] == False].drop(['IsLateProblem'], axis=1)\n", + "early_test = student_scores_test[student_scores_test['IsLateProblem'] == False].drop(['IsLateProblem'], axis=1)\n", + "late_train = student_scores_train[student_scores_train['IsLateProblem']].drop(['IsLateProblem', 'Attempts', 'CorrectEventually'], axis=1)\n", + "late_test = student_scores_test[student_scores_test['IsLateProblem']].drop(['IsLateProblem', 'Attempts', 'CorrectEventually'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4867, 6)\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Attempts</th>\n", + " <th>CorrectEventually</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>5</td>\n", + " <td>3</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>12</td>\n", + " <td>1</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>439.0</td>\n", + " <td>13</td>\n", + " <td>2</td>\n", + " <td>True</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Attempts \\\n", + "0 04c32d4d95425f73b3a1d6502aed4d48 439.0 1 1 \n", + "1 04c32d4d95425f73b3a1d6502aed4d48 439.0 3 2 \n", + "2 04c32d4d95425f73b3a1d6502aed4d48 439.0 5 3 \n", + "3 04c32d4d95425f73b3a1d6502aed4d48 439.0 12 1 \n", + "4 04c32d4d95425f73b3a1d6502aed4d48 439.0 13 2 \n", + "\n", + " CorrectEventually Label \n", + "0 True True \n", + "1 True True \n", + "2 True True \n", + "3 True True \n", + "4 True True " + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(early_train.shape)\n", + "early_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "print(early_test.shape)\n", + "early_test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(3066, 4)\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>30</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>41</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>31</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>43</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>32</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>44</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>33</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>46</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>34</th>\n", + " <td>04c32d4d95425f73b3a1d6502aed4d48</td>\n", + " <td>494.0</td>\n", + " <td>49</td>\n", + " <td>False</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Label\n", + "30 04c32d4d95425f73b3a1d6502aed4d48 494.0 41 False\n", + "31 04c32d4d95425f73b3a1d6502aed4d48 494.0 43 True\n", + "32 04c32d4d95425f73b3a1d6502aed4d48 494.0 44 True\n", + "33 04c32d4d95425f73b3a1d6502aed4d48 494.0 46 True\n", + "34 04c32d4d95425f73b3a1d6502aed4d48 494.0 49 False" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(late_train.shape)\n", + "late_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1135, 4)\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>SubjectID</th>\n", + " <th>AssignmentID</th>\n", + " <th>ProblemID</th>\n", + " <th>Label</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>250</th>\n", + " <td>08fcb5ead4e963a6f0bbdbc971f4a3ee</td>\n", + " <td>494.0</td>\n", + " <td>41</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>251</th>\n", + " <td>08fcb5ead4e963a6f0bbdbc971f4a3ee</td>\n", + " <td>494.0</td>\n", + " <td>43</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>252</th>\n", + " <td>08fcb5ead4e963a6f0bbdbc971f4a3ee</td>\n", + " <td>494.0</td>\n", + " <td>44</td>\n", + " <td>True</td>\n", + " </tr>\n", + " <tr>\n", + " <th>253</th>\n", + " <td>08fcb5ead4e963a6f0bbdbc971f4a3ee</td>\n", + " <td>494.0</td>\n", + " <td>46</td>\n", + " <td>False</td>\n", + " </tr>\n", + " <tr>\n", + " <th>254</th>\n", + " <td>08fcb5ead4e963a6f0bbdbc971f4a3ee</td>\n", + " <td>494.0</td>\n", + " <td>49</td>\n", + " <td>True</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " SubjectID AssignmentID ProblemID Label\n", + "250 08fcb5ead4e963a6f0bbdbc971f4a3ee 494.0 41 True\n", + "251 08fcb5ead4e963a6f0bbdbc971f4a3ee 494.0 43 True\n", + "252 08fcb5ead4e963a6f0bbdbc971f4a3ee 494.0 44 True\n", + "253 08fcb5ead4e963a6f0bbdbc971f4a3ee 494.0 46 False\n", + "254 08fcb5ead4e963a6f0bbdbc971f4a3ee 494.0 49 True" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(late_test.shape)\n", + "late_test.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [], + "source": [ + "base_path = os.path.join('data', 'Release', semester)\n", + "os.makedirs(os.path.join(base_path, 'Train'), exist_ok=True)\n", + "os.makedirs(os.path.join(base_path, 'Test'), exist_ok=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "early_train.to_csv(os.path.join(base_path, 'Train', 'early.csv'), index=False)\n", + "late_train.to_csv(os.path.join(base_path, 'Train', 'late.csv'), index=False)\n", + "early_test.to_csv(os.path.join(base_path, 'Test', 'early.csv'), index=False)\n", + "late_test.drop('Label', axis=1).to_csv(os.path.join(base_path, 'Test', 'late.csv'), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "data.save_subset(os.path.join(base_path, 'Train', 'Data'), lambda df: df[df[PS2.SubjectID].isin(train_ids) & df[PS2.AssignmentID].isin(valid_assignments)])" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "data.save_subset(os.path.join(base_path, 'Test', 'Data'), lambda df: df[df[PS2.SubjectID].isin(test_ids) & df[PS2.AssignmentID].isin(early_assignments)], False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Task 2: Predicting Student Grades\n", + "For Task 2, we are predicting students' Final Exam grades. We just add the appropriate LinkTable _without_ the actual grades, just leaving the SubjectIDs to predict for." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "student_table_dir = os.path.join(base_path, 'Test', 'Data', 'LinkTables')\n", + "os.makedirs(student_table_dir, exist_ok=True)\n", + "student_table_test = student_table[student_table[PS2.SubjectID].isin(test_ids)]\n", + "student_table_test.drop('X-Grade', axis=1).to_csv(os.path.join(student_table_dir, 'Subject.csv'), index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save the solution\n", + "The solution is, of course, not released, but used to judge submissions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sol_path = os.path.join('data', 'Solution', semester, 'task1', 'ref')\n", + "os.makedirs(sol_path, exist_ok=True)\n", + "late_test.to_csv(os.path.join(sol_path, 'truth.csv'), index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sol_path = os.path.join('data', 'Solution', semester, 'task2', 'ref')\n", + "os.makedirs(sol_path, exist_ok=True)\n", + "student_table_test.to_csv(os.path.join(sol_path, 'truth.csv'), index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Unused code for identifying the late problems\n", + "\n", + "The code below was used to investigate whether we could predict the last 3 problems of every assignment using the first 7. However, students appear to do the assignment in a variety of orders, making this difficult. Additionally, this would leak future data (e.g. the first 7 problems on Assignment 2 could be used to predict the last 3 problems on Assignment 1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_start_end_order(rows):\n", + " return pd.Series({\n", + " 'StartEventOrder': min(rows[PS2.Order])\n", + " # 'EndEventOrder': max(rows[PS2.Order])\n", + " })\n", + "\n", + "start_orders = main_table_filtered.groupby([PS2.SubjectID, PS2.AssignmentID, PS2.ProblemID]).apply(get_start_end_order)\n", + "start_orders" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Unfortunately, the last 3 problems aren't always easy to pick apart\n", + "print(problem_stats[PS2.AssignmentID].unique())\n", + "assignment1 = problem_stats[problem_stats[PS2.AssignmentID] == 439]\n", + "y = range(0, 10)\n", + "plt.scatter(assignment1['MedTime'], y, c='red')\n", + "plt.scatter(assignment1['MedSuccess'], y, c='blue')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "late_problems = problem_stats.groupby(PS2.AssignmentID).apply(lambda rows: list(rows[PS2.ProblemID][rows['MedTime'].argsort()][-3:]))\n", + "late_problems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "late_problem_ids_old = [st for row in late_problems for st in row]\n", + "late_problem_ids_old" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "runs_time = runs.copy()\n", + "runs_time['IsLateProblemInAssignment'] = runs_time[PS2.ProblemID].isin(late_problem_ids_old)\n", + "runs_time.head(15)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "first_late_attempts = runs_time[runs_time['IsLateProblemInAssignment']].groupby([PS2.AssignmentID, PS2.SubjectID])['TimeInt'].min()\n", + "first_late_attempts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runs_late = runs_time.merge(first_late_attempts.to_frame('FirstLateAttempt'), on=[PS2.AssignmentID, PS2.SubjectID], how='left')\n", + "np.mean(runs_late['FirstLateAttempt'].isna())\n", + "# ~2% of student-assignments did not have a late attempt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "runs_late['IsLateAttempt'] = runs_late['TimeInt'] >= runs_late['FirstLateAttempt']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "np.sum(~runs_late['IsLateAttempt'] & runs_late['IsLateProblemInAssignment'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 30% of attempts at early problems occurred after the first attempt at a late problem\n", + "np.sum(runs_late['IsLateAttempt'] & ~runs_late['IsLateProblemInAssignment']) / np.sum(~runs_late['IsLateProblemInAssignment'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(1234)\n", + "subjects_sample = np.random.choice(runs_time[PS2.SubjectID].unique(), 20)\n", + "assignment_sample = runs_time[(runs_time[PS2.AssignmentID] == 439) & runs_time[PS2.SubjectID].isin(subjects_sample)]\n", + "assignment_sample.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "all_subject_ids = assignment_sample[PS2.SubjectID]\n", + "distinct_subject_ids = all_subject_ids.unique()\n", + "subject_indices = [sorted(distinct_subject_ids).index(x) for x in all_subject_ids]\n", + "colors = [viridis(float(i)) for i in assignment_sample['IsLateProblemInAssignment']]\n", + "subject_times_norm = assignment_sample.groupby('SubjectID')['TimeInt'].transform(lambda x: (x - x.mean()) / x.std())\n", + "widths = list(assignment_sample[PS2.Score].apply(lambda x: 0.2 if x < 1 else 3))\n", + "plt.scatter(x=subject_times_norm, y=subject_indices, color=colors, linewidths=widths, edgecolors=None)\n", + "plt.xlim([-2, 2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "is_sorted = lambda a: np.all(a[:-1] <= a[1:])\n", + "\n", + "def is_consistent(rows):\n", + " orders = rows[PS2.Order]\n", + " times = rows['TimeInt']\n", + " return is_sorted(times.values)\n", + "\n", + "consistent = runs.groupby([PS2.SubjectID, PS2.AssignmentID, PS2.ProblemID]).apply(is_consistent)\n", + "consistent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "runs[(runs[PS2.ProblemID]==102) & (runs[PS2.SubjectID]=='71ffa17407d66e134442eebb32d330ec')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "consistent[~consistent]" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/verify_split.ipynb b/verify_split.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..4a9dcfb177181f2eda3cf7a3e1e8e173e830f053 --- /dev/null +++ b/verify_split.ipynb @@ -0,0 +1,150 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from ProgSnap2 import ProgSnap2Dataset\n", + "from ProgSnap2 import PS2\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "from os import path" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "early_assignments = np.array([439, 487, 492])\n", + "late_assignments = np.array([494, 502])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def verify_users(path, is_train, users):\n", + " subdir = 'Train' if is_train else 'Test'\n", + " path = os.path.join(path, subdir)\n", + " ps2 = ProgSnap2Dataset(os.path.join(path, 'Data'))\n", + " early = pd.read_csv(os.path.join(path, 'early.csv'))\n", + " late = pd.read_csv(os.path.join(path, 'late.csv'))\n", + " main_table = ps2.get_main_table()\n", + " student_table = ps2.load_link_table('Subject')\n", + " code_states = ps2.get_code_states_table()\n", + " \n", + " # Verify the early table\n", + " assert early[PS2.SubjectID].isin(users).all()\n", + " assert early[PS2.AssignmentID].isin(early_assignments).all()\n", + " assert 'Label' in early.columns\n", + " assert early['Label'].mean() != 0\n", + " \n", + " # Verify the late table\n", + " assert late[PS2.SubjectID].isin(users).all()\n", + " assert late[PS2.AssignmentID].isin(late_assignments).all()\n", + " assert ('Label' in late.columns) == is_train\n", + " if (is_train):\n", + " assert early['Label'].mean() != 0\n", + " \n", + " # Verify the main table\n", + " assert main_table[PS2.SubjectID].isin(users).all()\n", + " main_table_assignments = early_assignments\n", + " if (is_train):\n", + " main_table_assignments = np.append(main_table_assignments, late_assignments)\n", + " assert main_table[PS2.AssignmentID].isin(main_table_assignments).all()\n", + " \n", + " # Verify student table\n", + " assert student_table[PS2.SubjectID].isin(users).all()\n", + " assert ('X-Grade' in student_table.columns) == is_train\n", + " \n", + " # Verify the code states table\n", + " assert set(code_states[PS2.CodeStateID]) == set(main_table[PS2.CodeStateID])\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'data/Release/S19\\\\split.csv'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_10184/2007109656.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0msemester\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'S19'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'F19'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m \u001b[0msplit\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mos\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mPATH\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msemester\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'split.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf'Train % for {semester} is: {split[\"IsTrain\"].mean()}'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf'Count for {semester} is: {len(split.index)}'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\util\\_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 309\u001b[0m \u001b[0mstacklevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mstacklevel\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 310\u001b[0m )\n\u001b[1;32m--> 311\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 312\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 313\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)\u001b[0m\n\u001b[0;32m 584\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 585\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 586\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 587\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 588\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 480\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 481\u001b[0m \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 482\u001b[1;33m \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 483\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 484\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 809\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"has_index_names\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 810\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 811\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 812\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 813\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\readers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m 1038\u001b[0m )\n\u001b[0;32m 1039\u001b[0m \u001b[1;31m# error: Too many arguments for \"ParserBase\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1040\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmapping\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# type: ignore[call-arg]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1041\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1042\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_failover_to_python\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\c_parser_wrapper.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 49\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 50\u001b[0m \u001b[1;31m# open handles\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 51\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_open_handles\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 52\u001b[0m \u001b[1;32massert\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhandles\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 53\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\parsers\\base_parser.py\u001b[0m in \u001b[0;36m_open_handles\u001b[1;34m(self, src, kwds)\u001b[0m\n\u001b[0;32m 220\u001b[0m \u001b[0mLet\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mreaders\u001b[0m \u001b[0mopen\u001b[0m \u001b[0mIOHandles\u001b[0m \u001b[0mafter\u001b[0m \u001b[0mthey\u001b[0m \u001b[0mare\u001b[0m \u001b[0mdone\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mtheir\u001b[0m \u001b[0mpotential\u001b[0m \u001b[0mraises\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 221\u001b[0m \"\"\"\n\u001b[1;32m--> 222\u001b[1;33m self.handles = get_handle(\n\u001b[0m\u001b[0;32m 223\u001b[0m \u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 224\u001b[0m \u001b[1;34m\"r\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;32m~\\miniconda3\\lib\\site-packages\\pandas\\io\\common.py\u001b[0m in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m 700\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;34m\"b\"\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 701\u001b[0m \u001b[1;31m# Encoding\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 702\u001b[1;33m handle = open(\n\u001b[0m\u001b[0;32m 703\u001b[0m \u001b[0mhandle\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 704\u001b[0m \u001b[0mioargs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'data/Release/S19\\\\split.csv'" + ] + } + ], + "source": [ + "PATH = \"data/Release/\"\n", + "\n", + "last_semester_users = None\n", + "\n", + "for semester in ['S19', 'F19']:\n", + " split = pd.read_csv(os.path.join(PATH, semester, 'split.csv'))\n", + " print(f'Train % for {semester} is: {split[\"IsTrain\"].mean()}')\n", + " print(f'Count for {semester} is: {len(split.index)}')\n", + " for is_train in [True, False]:\n", + " path = os.path.join(PATH, semester)\n", + " print('Verifying: ' + path)\n", + " users = split[split['IsTrain'] == is_train][PS2.SubjectID]\n", + " verify_users(path, is_train, users)\n", + " \n", + " semester_users = split[PS2.SubjectID]\n", + " if last_semester_users is not None:\n", + " assert len(set(last_semester_users).intersection(semester_users)) == 0\n", + " last_semester_users = semester_users" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}