import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats
df = pd.read_csv("/Users/hongyeliu/Desktop/CS361JupyterNotebook/Lecture1/DeathData.csv")
df
Do box plots of the vehicle death data
data = df["VEHICLE"]
fig1, ax1 = plt.subplots()
ax1.set_title('Basic Box Plot of Vehicle death')
ax1.boxplot(data, labels=["Vehicle Death"])
d1= df[df["Region"]=="e"]
d2= df[df["Region"]=="n"]
d3= df[df["Region"]=="s"]
d4= df[df["Region"]=="w"]
data = [d1["VEHICLE"], d2["VEHICLE"],d3["VEHICLE"],d4["VEHICLE"]]
fig2, ax2 = plt.subplots()
ax2.set_title('Vehicle death by region')
ax2.boxplot(data,labels=["e","n","s","w"])
plt.show()
Plot time series data: stock prices
df = pd.read_csv("/Users/hongyeliu/Desktop/CS361JupyterNotebook/Lecture1/Stock_sht.csv")
df
plt.figure(figsize=(12,5))
plt.plot(df["AMZN"], color="mediumorchid", linestyle="--", label='AMAZON')
plt.plot(df["DUK"], color="cyan", linestyle="-", label='DUK')
plt.legend()
plt.xticks([0, 38], ["Jan 13 2007", "Feb. 28 2007"])
plt.ylabel("Price in USD")
plt.title("Daily stock prices 1/13/2007-2/28/2007: AMZN and DUK")
Do the standardization of the data for comparison
stock_data_standardized = df.copy()
stock_data_standardized.AMZN = (df.AMZN-df.AMZN.mean())/df.AMZN.std(ddof=0)
stock_data_standardized.DUK = (df.DUK-df.DUK.mean())/df.DUK.std(ddof=0)
plt.figure(figsize=(12,5))
plt.plot(stock_data_standardized.AMZN, color="mediumorchid", linestyle="--", label='AMZN')
plt.plot(stock_data_standardized.DUK, color="cyan", linestyle="-", label='DUK')
plt.legend()
plt.xticks([0, 38], ["Jan 13 2007", "Feb. 28 2007"])
plt.ylabel("Standardized Price in USD")
plt.title("Standardized Daily stock prices 1/13/2007-2/28/2007: AMZN and DUK")
Scatter plots of the standardized stock prices
plt.subplot(1, 2, 1)
plt.scatter(df.AMZN, df.DUK, color="magenta")
plt.title("Scatter plot of AMZN vs DUK stock prices 1/13/2007-2/28/2007")
plt.xlabel("AMZN stock price (USD)")
plt.ylabel("DUK stock price (USD)")
plt.subplot(1, 2, 2)
plt.scatter(stock_data_standardized.AMZN, stock_data_standardized.DUK, color="blue")
plt.title("Scatter plot of standardized AMZN vs DUK stock prices 1/13/2007-2/28/2007")
plt.xlabel("AMZN stock price (USD)")
plt.ylabel("DUK stock price (USD)")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)
Print the correlations
print(df.corr())