The following graphs are the codes for the figures included in Chapter 5 of the e-book Statistics with Python.
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.preprocessing import StandardScaler
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
#fig511
st=pd.Timestamp(2024, 4,20)
et=pd.Timestamp(2024, 5, 30)
da1=yf.download('GOOGL', st, et)["Close"]
da2=yf.download('MSFT', st, et)["Close"]
da1=da1.iloc[:,0].pct_change()[1:]*100
da2=da2.iloc[:,0].pct_change()[1:]*100
da=pd.DataFrame([da1, da2], index=['data1', 'data2']).T
da.index=range(len(da1))
mu1, sd1, n1=np.mean(da1), np.std(da1, ddof=1), len(da1)
mu2, sd2, n2=np.mean(da2), np.std(da2, ddof=1), len(da2)
s_p=np.sqrt(((n1-1)*sd1**2+(n2-1)*sd2**2)/(n1+n2-2))
se=s_p*np.sqrt((1/n1+1/n2))
df=n1+n2-2
mu=mu1-mu2
testStatic=((mu1-mu2)-0)/se
x=np.linspace(-3, 3, 500)
p=stats.t.pdf(x, df)
l=stats.t.ppf(0.025, df)
u=stats.t.ppf(0.975, df)
idx=np.where((x>l)& (x<u))[0]
idx2=np.where(x>testStatic)[0]
f, ax=plt.subplots(1,2, figsize=(8, 3), sharey=True)
ax[0].plot(x, p, color="g", label=f"t({df})")
ax[0].fill_between(x[idx], stats.t.pdf(x[idx], df), color="brown", alpha=0.3, label="Conf. Int.")
ax[0].vlines(testStatic, 0, stats.t.pdf(testStatic, df), ls="--", color="red", label="test Statistic")
ax[0].legend(loc='upper right', prop={'size':8})
ax[0].set_xlabel("x")
ax[0].set_ylabel("pdf")
ax[0].set_title("(a) Confidence Interval")
ax[1].plot(x, p, color="g", label=f"t({df})")
ax[1].vlines(testStatic, 0, stats.t.pdf(testStatic, df), ls="--", color="red", label="test Statistic")
ax[1].fill_between(x[idx2], stats.t.pdf(x[idx2], df), color="b", alpha=0.2, label=r"$\frac{p-value}{2}$")
ax[1].legend(loc='upper right', prop={'size':8})
ax[1].set_xlabel("x")
ax[1].set_title("(b) p_value")
plt.show()
#fig 521
np.random.seed(0)
x=np.linspace(0, 10, 100)
y1=x+np.random.rand(100)*5
y2=-x+np.random.rand(100)*5
y3=np.random.rand(100)*10
plt.figure(figsize=(9, 4))
col=["blue","red","green"]
lab=["a) direct","b) inverse", "c) no"]
yT=[y1, y2, y3]
for i in range(3):
plt.subplot(1,3,i+1)
plt.scatter(x, yT[i], s=15, color=col[i])
plt.title(f"{lab[i]} proportion", fontsize=15)
plt.xticks([])
plt.yticks([])
plt.xlabel("x")
if i==0:
plt.ylabel("y")
plt.show()
#fig523
st=pd.Timestamp(2023,1,1)
et=pd.Timestamp(2024, 5, 30)
vix=yf.download('^vix', st, et)["Close"].dropna()
ue=yf.download('EUR=X', st, et)["Close"].dropna()
data=pd.concat([ue, vix.shift(periods=-1)], join="inner", axis=1)
data.index=range(len(data))
data.columns=['ue', 'vix']
data=data.dropna()
scaler=StandardScaler().fit(data)
da=scaler.transform(data)
plt.figure(figsize=(4,3))
sns.scatterplot(x=da[:,0], y=da[:,1], s=30)
plt.xlabel("ue")
plt.ylabel("vix")
plt.show()
#fig 531
x=np.linspace(-3, 3,1000)
pdf=stats.norm.pdf(x)
cdf=stats.norm.cdf(x)
q=np.linspace(0, 1, 1000)
ppf=stats.norm.ppf(q)
plt.figure(figsize=(7,3))
plt.subplots_adjust(wspace=0.3)
plt.subplot(1,2,1)
plt.plot(x, pdf, color="blue", label="PDF")
plt.plot(x, cdf, color="red", label="CDF")
plt.xlabel("x (a)")
plt.ylabel("Prob.", rotation="horizontal", labelpad=10)
plt.legend(loc="best")
plt.title("(a)", loc="left")
plt.subplot(1,2,2)
plt.plot(q, ppf, color="green", label="Inverse CDF")
plt.xlabel("Prob.(q)")
plt.ylabel("I(q)", rotation="horizontal")
plt.legend(loc="best")
plt.title("(b)", loc="left")
plt.show()
#fig532
np.random.seed(1)
x=np.random.rand(1000)
x2=stats.norm.rvs(size=1000, random_state=3)
plt.figure(figsize=(6,3))
ax1=plt.subplot(1,2,1)
f1=stats.probplot(x, plot=plt, rvalue=True)
ax1.set_title("(a) Q-Q plot: random")
ax2=plt.subplot(1,2,2)
f2=stats.probplot(x2, plot=plt, rvalue=True)
ax2.set_ylabel("")
ax2.set_title("(b) Q-Q plot: normal random")
plt.show()
#fig 533, fig 532 데이터 적용
fig, (ax1, ax2)= plt.subplots(nrows=1, ncols=2, figsize=(7, 3))
ax1.hist(x, bins=30, rwidth=0.8, color="b")
ax1.set_xlabel("x (a) random", loc="right", size="12")
ax1.set_ylabel("Frequency", size="12")
ax2.hist(x2, bins=30, rwidth=0.8, color="g")
ax2.set_xlabel("x (b)normal random", loc="right", size="12")
plt.show()
#fig534
st=pd.Timestamp(2024,1,1)
et=pd.Timestamp(2024, 5, 30)
nd=yf.download("^IXIC",st, et)["Close"]
ex=yf.download("EUR=X", st, et)["Close"]
nd1=(nd-nd.mean())/nd.std()
ex1=(ex-ex.mean())/ex.std()
plt.figure(figsize=(7, 3))
ax1 = plt.subplot(121)
res = stats.probplot(np.ravel(nd1.values), plot=plt)
ax1.set_title("Q-Q plot: nd")
ax2 = plt.subplot(122)
res = stats.probplot(np.ravel(ex1.values), plot=plt)
ax2.set_title("Q-Q plot: ex")
ax2.set_ylabel("")
plt.show()
#fig536
st=pd.Timestamp(2024,1,1)
et=pd.Timestamp(2024, 5, 30)
s=yf.download("^SPX",st, et)["Close"].dropna()
ex=yf.download("EUR=X", st, et)["Close"].dropna()
s1=(s-s.mean())/s.std()
ex1=(ex-ex.mean())/ex.std()
plt.figure(figsize=(7, 3))
ax1 = plt.subplot(121)
res = stats.probplot(s1.iloc[:,0], plot=plt, rvalue=True)
ax1.set_title("Q-Q plot: S&P")
ax2 = plt.subplot(122)
res = stats.probplot(ex1.iloc[:,0], plot=plt, rvalue=True)
ax2.set_title("Q-Q plot: USD/EUR")
ax2.set_ylabel("")
plt.show()
#fig 537
np.random.seed(3)
N=100
da=np.sort(np.random.randn(N))
ecdf=[i/N for i in range(1, N+1)]
nCdf=stats.norm.cdf(da)
plt.figure(figsize=(3,2))
plt.plot(da, ecdf, color="blue", label="ECDF")
plt.plot(da, nCdf, color="red", label="normCDF")
plt.legend(loc="best")
plt.xlabel("x", weight="bold")
plt.ylabel("probility", weight="bold")
plt.show()
#fig538, Apply fig 536 data
s2=np.sort(s1.iloc[:,0])
ex2=np.sort(ex1.dropna().iloc[:,0])
n_s=len(s2)
n_ex=len(ex2)
ecdf_s=[i/n_s for i in range(1, n_s+1)]
ncdf_s=stats.norm.cdf(s2)
ecdf_ex=[i/n_ex for i in range(1, n_ex+1)]
ncdf_ex=stats.norm.cdf(ex2)
plt.figure(figsize=(7,3))
plt.subplot(1,2,1)
plt.plot(s2, ecdf_s, color="blue", label="ECDF")
plt.plot(s2, ncdf_s, color="red", label="NCDF")
plt.legend(loc="best")
plt.title("S&P", weight="bold")
plt.xlabel("x", weight="bold")
plt.ylabel("Prob.", weight="bold")
plt.subplot(1,2,2)
plt.plot(ex2, ecdf_ex, color="blue", label="ECDF")
plt.plot(ex2, ncdf_ex, color="red", label="NCDF")
plt.legend(loc="best")
plt.title("USD/EUR", weight="bold")
plt.xlabel("x", weight="bold")
plt.show()






댓글
댓글 쓰기