#Importing NumPY and Pandas Library
import numpy as np
import pandas as pd
#Importing Data Visualization Libraries
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
result = pd.read_csv("StudentsPerformance.csv")
result.head()
result.columns = map(str.upper, result.columns)
result.head()
result.info()
result.isna().sum()
result.describe()
sns.pairplot(result, hue = 'GENDER', palette = 'coolwarm')
# Matrix form for correlation data
result.corr()
sns.heatmap(result.corr(), cmap = 'PuRd', annot = True)
result["AVERAGE"] = (result["MATH SCORE"] + result["READING SCORE"] + result["WRITING SCORE"])/3
result.head()
sns.lmplot(x='READING SCORE',y='WRITING SCORE',data=result,hue='GENDER')
result.pivot_table(values='MATH SCORE',index='GENDER',columns='RACE/ETHNICITY')
pvresult = result.pivot_table(values='MATH SCORE',index='GENDER',columns='RACE/ETHNICITY')
sns.heatmap(pvresult, annot = True)
result.pivot_table(values='READING SCORE',index='GENDER',columns='RACE/ETHNICITY')
pvresult = result.pivot_table(values='READING SCORE',index='GENDER',columns='RACE/ETHNICITY')
sns.heatmap(pvresult,cmap='YlOrRd',linecolor='white',linewidths=1, annot = True)
result.pivot_table(values='WRITING SCORE',index='GENDER',columns='RACE/ETHNICITY')
pvresult = result.pivot_table(values='WRITING SCORE',index='GENDER',columns='RACE/ETHNICITY')
sns.heatmap(pvresult, cmap = 'YlGnBu',linecolor='black',linewidths=1, annot = True)
result.pivot_table(values='AVERAGE',index='GENDER',columns='RACE/ETHNICITY')
pvresult = result.pivot_table(values='AVERAGE',index='GENDER',columns='RACE/ETHNICITY')
sns.heatmap(pvresult, cmap = 'Reds', annot = True)
(result.GENDER.value_counts()/len(result)) * 100
gender = result['GENDER'].value_counts()
labels = result.GENDER.unique()
plt.pie(gender,labels=labels,autopct="%1.1f%%",shadow=True,explode=(0.04,0.04),startangle=90)
plt.title('GENDER DISTRIBUTION',fontsize=15)
plt.show()
result.GENDER.value_counts()
sns.countplot(x='GENDER', data=result, palette = 'magma')
gender = result.groupby("GENDER")
gender.mean()
gender.describe().transpose()
(result['TEST PREPARATION COURSE'].value_counts()/len(result)) * 100
test = result['TEST PREPARATION COURSE'].value_counts()
labels = result["TEST PREPARATION COURSE"].unique()
plt.pie(test,labels=labels,autopct="%1.1f%%",shadow=True,explode=(0.04,0.04),startangle=90)
plt.title('TEST PREPARATION COURSE',fontsize=15)
plt.show()
tpc = result.groupby("TEST PREPARATION COURSE")
tpc.mean()
fig, ax = plt.subplots(1, 3, figsize=(16,4))
sns.violinplot(x="TEST PREPARATION COURSE", y='MATH SCORE', data=result,hue='GENDER',split=True,palette='PuRd', ax = ax[0])
sns.violinplot(x="TEST PREPARATION COURSE", y='READING SCORE', data=result,hue='GENDER',split = True,
palette='Purples', ax = ax[1])
sns.violinplot(x="TEST PREPARATION COURSE", y='WRITING SCORE', data=result,hue='GENDER',split = True,
palette='RdPu', ax = ax[2])
sns.boxplot(x="TEST PREPARATION COURSE", y="AVERAGE", hue = "GENDER", data = result)
p_edu = result.groupby("PARENTAL LEVEL OF EDUCATION")
p_edu.mean()
fig, ax = plt.subplots(3, 1, figsize=(16,16))
sns.boxplot(x = 'PARENTAL LEVEL OF EDUCATION', y = 'MATH SCORE', data = result, ax = ax[0], palette = "magma")
sns.boxplot(x = 'PARENTAL LEVEL OF EDUCATION', y = 'READING SCORE', data = result, ax = ax[1], palette = "plasma")
sns.boxplot(x = 'PARENTAL LEVEL OF EDUCATION', y = 'WRITING SCORE', data = result, ax = ax[2], palette = "inferno")
sns.boxplot(x="TEST PREPARATION COURSE", y='AVERAGE', data=result,hue='GENDER', palette='inferno')
# Lets find the percentage distribution
(result["RACE/ETHNICITY"].value_counts()/len(result)) * 100
sns.countplot(x='RACE/ETHNICITY', data=result, palette = 'Reds')
sns.despine()
sns.boxplot(x = 'RACE/ETHNICITY', y = 'AVERAGE', data = result, palette = "magma")
plt.figure(figsize = (16,5))
sns.countplot(x="PARENTAL LEVEL OF EDUCATION", hue="RACE/ETHNICITY", data=result, palette='viridis')
(result["LUNCH"].value_counts()/len(result)) * 100
lunch = result['LUNCH'].value_counts()
labels = result["LUNCH"].unique()
plt.pie(test,labels=labels,autopct="%1.1f%%",shadow=True,explode=(0.04,0.04),startangle=90)
plt.title('LUNCH DISTRIBUTION',fontsize=15)
plt.show()
# Plotting the figures
fig, ax = plt.subplots(3, 1, figsize=(16,16))
sns.swarmplot(x="RACE/ETHNICITY", y='MATH SCORE', data=result,hue='LUNCH',palette='Purples', ax = ax[0])
sns.swarmplot(x="RACE/ETHNICITY", y='READING SCORE', data=result,hue='LUNCH', palette='Blues', ax = ax[1])
sns.swarmplot(x="RACE/ETHNICITY", y='WRITING SCORE', data=result,hue='LUNCH', palette='Greens', ax = ax[2])
p_edu = result.groupby("LUNCH")
p_edu.mean()
sns.countplot(x="RACE/ETHNICITY", hue="LUNCH", data=result, palette='Oranges')
sns.countplot(x="LUNCH", data=result,hue = 'GENDER', palette='YlGnBu')