Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| # Load the dataset | |
| df = pd.read_csv('/kaggle/input/students-performance-in-exams/StudentsPerformance.csv') | |
| # --- EDA --- | |
| # Basic Information | |
| print("Dataset Info:") | |
| df.info() | |
| print("\n" + "="*50 + "\n") | |
| print("Descriptive Statistics for Scores:") | |
| print(df[['math score', 'reading score', 'writing score']].describe()) | |
| print("\n" + "="*50 + "\n") | |
| # Missing Values (already checked, but good to confirm programmatically) | |
| print("Missing values per column:") | |
| print(df.isnull().sum()) | |
| print("\n" + "="*50 + "\n") | |
| # --- Visualizations --- | |
| fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(18, 18)) | |
| fig.suptitle('Exploratory Data Analysis of Student Performance', fontsize=20) | |
| # Distribution of Scores | |
| sns.histplot(df['math score'], kde=True, ax=axes[0, 0], color='skyblue') | |
| axes[0, 0].set_title('Distribution of Math Scores') | |
| sns.histplot(df['reading score'], kde=True, ax=axes[0, 1], color='lightcoral') | |
| axes[0, 1].set_title('Distribution of Reading Scores') | |
| sns.histplot(df['writing score'], kde=True, ax=axes[1, 0], color='lightgreen') | |
| axes[1, 0].set_title('Distribution of Writing Scores') | |
| # Scores by Gender | |
| sns.boxplot(x='gender', y='math score', data=df, ax=axes[1, 1]) | |
| axes[1, 1].set_title('Math Score by Gender') | |
| # Scores by Parental Level of Education (using a combined score) | |
| df['average score'] = df[['math score', 'reading score', 'writing score']].mean(axis=1) | |
| sns.boxplot(x='parental level of education', y='average score', data=df, ax=axes[2, 0]) | |
| axes[2, 0].set_title('Average Score by Parental Education Level') | |
| axes[2, 0].tick_params(axis='x', rotation=45) | |
| # Scores by Test Preparation Course | |
| sns.boxplot(x='test preparation course', y='average score', data=df, ax=axes[2, 1]) | |
| axes[2, 1].set_title('Average Score by Test Preparation Course') | |
| plt.tight_layout(rect=[0, 0.03, 1, 0.96]) # Adjust layout to prevent suptitle overlap | |
| plt.savefig('students_performance_eda.png') | |
| print("EDA plots saved to students_performance_eda.png") |