import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats
Input the Data
df = pd.read_csv("/Users/hongyeliu/Desktop/CS361JupyterNotebook/Lecture1/mtcars.csv")
df
Bar chart
Num_cylinders = df["cyl"].value_counts()
Num_cylinders
plt.bar(Num_cylinders.keys(), Num_cylinders,color=['black', 'blue', 'orange'])
plt.title("Count of cars by Cylinder")
plt.xlabel("Num. of Cylinder")
plt.ylabel("Count")
Histogram
Input the iris data set
df2 = pd.read_csv("/Users/hongyeliu/Desktop/CS361JupyterNotebook/Lecture1/iris.csv")
df2
plt.subplot(1, 2, 1)
plt.hist(df2["Sepal.Length"], bins=50)
plt.xlabel("Sepal Length")
plt.ylabel("Count")
plt.title("Histogram of Sepal length with 50 bins")
plt.subplot(1, 2, 2)
plt.hist(df2["Sepal.Length"], bins=20)
plt.xlabel("Sepal Length")
plt.ylabel("Count")
plt.title("Histogram of Sepal length with 20 bins")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)
Conditional Histogrom
n_bins = 20
tmpdf1 = df2[df2["Species"]=="setosa"]
tmpdf2 = df2[df2["Species"]=="versicolor"]
tmpdf3 = df2[df2["Species"]=="virginica"]
colors = ['orange', 'blue', 'black']
labels = ['setosa','versicolor','virginica']
x_multi = [tmpdf1["Sepal.Length"],tmpdf2["Sepal.Length"],tmpdf3["Sepal.Length"]]
plt.hist(x_multi, n_bins, histtype='bar',color=colors,label= labels)
plt.legend(prop={'size': 10})
plt.xlabel("Sepal Length")
plt.ylabel("Count")
Input the CS361 student score (Homeworks + project + exams) data set
df3 = pd.read_csv("/Users/hongyeliu/Desktop/CS361JupyterNotebook/Lecture1/Data_finalAna.csv") df3 = df3.iloc[:,3:10] df3 df_4 = df3[["Total_HWPRJExam","ParticipationScore"]] df_4
n_bins = 10
tmpdf1 = df_4[df_4["ParticipationScore"]==1]
tmpdf2 = df_4[df_4["ParticipationScore"]==0]
colors = ['orange', 'blue']
labels = ['Full','NotFull']
x_multi = [tmpdf1["Total_HWPRJExam"],tmpdf2["Total_HWPRJExam"]]
plt.hist(x_multi, n_bins, histtype='bar',color=colors,label= labels)
plt.legend(prop={'size': 10})
plt.xlabel("Score")
plt.ylabel("Count")
plt.savefig('Score_FullOrNot.png')
Calculate the mean score of the students who have full participation and that of students who don't.
m1 = tmpdf1["Total_HWPRJExam"].mean()
m1
print("Mean:", m1)
s1 = tmpdf1["Total_HWPRJExam"].std(ddof=0)
print("Standard deviation:", s1)
m2 = tmpdf2["Total_HWPRJExam"].mean()
m2
print("Mean:", m2)
s2 = tmpdf2["Total_HWPRJExam"].std(ddof=0)
print("Standard deviation:", s2)
Calculate the median score of the students who have full participation and that of students who don't.
md1 = tmpdf1["Total_HWPRJExam"].median()
md1
md2 = tmpdf2["Total_HWPRJExam"].median()
md2