In [ ]:
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
import scipy.stats as stats
df = pd.read_csv("/Users/hongyeliu/Desktop/CS361JupyterNotebook/Lecture1/DeathData.csv")
df

Do box plots of the vehicle death data

In [30]:
data = df["VEHICLE"]
fig1, ax1 = plt.subplots()
ax1.set_title('Basic Box Plot of Vehicle death')
ax1.boxplot(data, labels=["Vehicle Death"])
Out[30]:
{'whiskers': [<matplotlib.lines.Line2D at 0x1a2578f890>,
  <matplotlib.lines.Line2D at 0x1a2578fe50>],
 'caps': [<matplotlib.lines.Line2D at 0x1a25795390>,
  <matplotlib.lines.Line2D at 0x1a25795890>],
 'boxes': [<matplotlib.lines.Line2D at 0x1a25784d10>],
 'medians': [<matplotlib.lines.Line2D at 0x1a25795dd0>],
 'fliers': [<matplotlib.lines.Line2D at 0x1a2579f310>],
 'means': []}
In [31]:
d1= df[df["Region"]=="e"]

d2= df[df["Region"]=="n"]

d3= df[df["Region"]=="s"]

d4= df[df["Region"]=="w"]
data = [d1["VEHICLE"], d2["VEHICLE"],d3["VEHICLE"],d4["VEHICLE"]]

fig2, ax2 = plt.subplots()
ax2.set_title('Vehicle death by region')
ax2.boxplot(data,labels=["e","n","s","w"])

plt.show()

Plot time series data: stock prices

In [32]:
df = pd.read_csv("/Users/hongyeliu/Desktop/CS361JupyterNotebook/Lecture1/Stock_sht.csv")
df
Out[32]:
Date AMZN DUK KO Day
0 1/3/07 38.700001 34.971017 17.874906 1
1 1/4/07 38.900002 35.044103 17.882263 2
2 1/5/07 38.369999 34.240172 17.757161 3
3 1/8/07 37.500000 34.294985 17.871225 6
4 1/9/07 37.779999 34.130544 17.885944 7
5 1/10/07 37.150002 33.984374 17.911700 8
6 1/11/07 37.400002 34.075731 17.933777 9
7 1/12/07 38.200001 33.911290 17.863866 10
8 1/16/07 38.660000 34.020917 17.845469 14
9 1/17/07 37.880001 33.966104 17.882263 15
10 1/18/07 36.980000 34.130544 17.790276 16
11 1/19/07 37.020000 34.240172 17.757161 17
12 1/22/07 36.950001 34.057458 17.672533 20
13 1/23/07 36.430000 34.112272 17.705649 21
14 1/24/07 37.259998 34.258442 17.709329 22
15 1/25/07 37.080002 34.569051 17.639418 23
16 1/26/07 36.849998 34.861392 17.598945 24
17 1/29/07 37.430000 35.062373 17.554790 27
18 1/30/07 37.049999 35.573966 17.591586 28
19 1/31/07 37.669998 35.975932 17.617342 29
20 2/1/07 38.700001 36.268270 17.698289 30
21 2/2/07 37.389999 36.524068 17.749803 31
22 2/5/07 37.160000 37.072201 17.624701 34
23 2/6/07 38.270000 36.651965 17.679892 35
24 2/7/07 38.980000 36.524068 17.672533 36
25 2/8/07 39.099998 36.834677 17.609983 37
26 2/9/07 38.720001 36.834677 17.573187 38
27 2/12/07 38.849998 36.907761 17.632059 41
28 2/13/07 39.310001 37.254913 17.738764 42
29 2/14/07 40.139999 37.033379 17.617342 43
30 2/15/07 40.060001 36.922611 17.606302 44
31 2/16/07 40.330002 36.848766 17.613662 45
32 2/20/07 41.509998 36.830303 17.580546 49
33 2/21/07 41.259998 36.830303 17.503278 50
34 2/22/07 41.000000 36.590307 17.403931 51
35 2/23/07 40.779999 36.756459 17.389213 52
36 2/26/07 40.880001 36.941071 17.558470 55
37 2/27/07 38.830002 36.221081 17.069099 56
38 2/28/07 39.139999 36.331849 17.175804 57
In [38]:
plt.figure(figsize=(12,5))
plt.plot(df["AMZN"], color="mediumorchid", linestyle="--", label='AMAZON')
plt.plot(df["DUK"], color="cyan", linestyle="-", label='DUK')
plt.legend()
plt.xticks([0, 38], ["Jan 13 2007", "Feb. 28 2007"])
plt.ylabel("Price in USD")
plt.title("Daily stock prices 1/13/2007-2/28/2007: AMZN and DUK")
Out[38]:
Text(0.5, 1.0, 'Daily stock prices 1/13/2007-2/28/2007: AMZN and DUK')

Do the standardization of the data for comparison

In [40]:
stock_data_standardized = df.copy()
stock_data_standardized.AMZN = (df.AMZN-df.AMZN.mean())/df.AMZN.std(ddof=0)
stock_data_standardized.DUK = (df.DUK-df.DUK.mean())/df.DUK.std(ddof=0)
In [41]:
plt.figure(figsize=(12,5))
plt.plot(stock_data_standardized.AMZN, color="mediumorchid", linestyle="--", label='AMZN')
plt.plot(stock_data_standardized.DUK, color="cyan", linestyle="-", label='DUK')
plt.legend()
plt.xticks([0, 38], ["Jan 13 2007", "Feb. 28 2007"])
plt.ylabel("Standardized Price in USD")
plt.title("Standardized Daily stock prices 1/13/2007-2/28/2007: AMZN and DUK")
Out[41]:
Text(0.5, 1.0, 'Standardized Daily stock prices 1/13/2007-2/28/2007: AMZN and DUK')

Scatter plots of the standardized stock prices

In [43]:
plt.subplot(1, 2, 1)
plt.scatter(df.AMZN, df.DUK, color="magenta")
plt.title("Scatter plot of AMZN vs DUK stock prices 1/13/2007-2/28/2007")
plt.xlabel("AMZN stock price (USD)")
plt.ylabel("DUK stock price (USD)")
plt.subplot(1, 2, 2)
plt.scatter(stock_data_standardized.AMZN, stock_data_standardized.DUK, color="blue")
plt.title("Scatter plot of standardized AMZN vs DUK stock prices 1/13/2007-2/28/2007")
plt.xlabel("AMZN stock price (USD)")
plt.ylabel("DUK stock price (USD)")
plt.subplots_adjust(bottom=0, top=1, left=-0.5, right=1.5)

Print the correlations

In [47]:
print(df.corr())
          AMZN       DUK        KO       Day
AMZN  1.000000  0.680083 -0.421396  0.663338
DUK   0.680083  1.000000 -0.580787  0.845473
KO   -0.421396 -0.580787  1.000000 -0.835988
Day   0.663338  0.845473 -0.835988  1.000000
In [ ]: