The following graphs are the codes for the figures included in Chapter 4 of the e-book Statistics with Python.
import numpy as np 
import pandas as pd
from scipy import stats
from sklearn.preprocessing import StandardScaler
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
# fig 421
st=pd.Timestamp(2022,12, 1)
et=pd.Timestamp(2023, 4, 1)
da=yf.download("^DJI", st, et)
pop=(da['Close']-da['Open'])/da['Open']*100
pop.columns=['change']
pop.index=pd.DatetimeIndex(pop.index.date)
xBar=np.array([])
for i in range(20):
    x=pop.sample(5, replace=False, random_state=i)
    xBar=np.append(xBar, x.mean())
xBar2=np.array([])
for i in range(100):
    x=pop.sample(5, replace=False, random_state=i)
    xBar2=np.append(xBar2, x.mean())
plt.figure(figsize=(7, 3))
plt.subplot(1,2,1)
plt.hist(xBar, 10, rwidth=0.8, color="blue", label="n=20")
plt.axvline(-0.18, 0, 0.9, color="red", label=f"m={xBar.mean().round(2)}")
plt.xlabel("x\n(a)", weight="bold")
plt.ylabel("frequency", weight="bold")
plt.legend(loc='upper left')
plt.subplot(1,2,2)
plt.hist(xBar2, 10, rwidth=0.8, color="green", label="n=100")
plt.axvline(0, 0,0.9, color="red", label=f"m={xBar2.mean().round(2)}")
plt.xlabel("x\n(b)", weight="bold")
plt.legend(loc='best')
plt.show()
#fig422  
st=pd.Timestamp(2024,4, 1)
et=pd.Timestamp(2024, 5, 30)
da=yf.download("DX-Y.NYB", st, et)['Close']
da.columns=["D_index"]
da.index=range(len(da))
sample=np.array([])
for i in range(1000):
    x=da.sample(5, random_state=i)
    sample=np.append(sample, x.mean())
print(sample[:10].round(2))
s=np.reshape(sample,(-1,1))
scaler=StandardScaler().fit(s)
s_n=scaler.transform(s)
xbar, xs=s_n.mean(), s_n.std()
fig, ax=plt.subplots(figsize=(4,3))
ax.hist(s_n, bins=15, rwidth=0.9, color="g", alpha=0.3, label="histogram")
ax.set_xlabel("x")
ax.set_ylabel("frequency", color="g")
ax2=plt.twinx()
s_n2=np.sort(s_n, axis=0)
ax2.plot(s_n2, stats.norm.pdf(s_n2), color="b", label="N(0, 1)")
ax2.set_ylabel("pdf", color="b")
ax.legend(loc=(0.6, 0.8), frameon=False)
ax2.legend(loc=(0.6, 0.7), frameon=False)
plt.show()
#fig431
x=np.linspace(-3, 3, 1000)
plt.figure(figsize=(4,3))
plt.plot(x, stats.norm.pdf(x), color="g", label="N(0,1)")
x1=np.linspace(-1.96, 1.96, 100)
plt.fill_between(x1, stats.norm.pdf(x1), color="brown", alpha=0.3, label="p=0.95")
x2=np.linspace(-3, -1.96, 50)
plt.fill_between(x2, stats.norm.pdf(x2), color="blue", alpha=0.3, label=r"$\frac{\alpha}{2}$=0.025")
x3=np.linspace(1.96, 3, 50)
plt.fill_between(x3, stats.norm.pdf(x3), color="blue", alpha=0.3)
plt.text(-0.8, 0.15,"confidence\n Interval\n"+r"1-$\alpha$=0.95", color="r")
plt.legend(loc="upper right", frameon=False)
plt.xlabel("x")
plt.ylabel("PDF")
plt.show()
#fig 442
st=pd.Timestamp(2024, 2, 1)
et=pd.Timestamp(2024, 5, 30)
da=yf.download('^IXIC', st, et)["Close"]
da=da.dropna()
da1=da.iloc[:,0].pct_change()[1:]*100
da1.index=range(len(da1))
sample=np.array([da1.sample(n=10, replace=False, random_state=i).mean() for i in range(1000)])
scal=StandardScaler().fit(sample.reshape(-1,1))
d=scal.transform(sample.reshape(-1,1))
fig, ax=plt.subplots(figsize=(4, 3))
ax.hist(d, bins=15, color="g", alpha=0.3, label="histogram")
ax.set_xlabel("d")
ax.set_ylabel("frequency", color="g")
ax2=ax.twinx()
d1=np.sort(d, axis=0)
ax2.plot(d1, stats.norm.pdf(d1), color="b", label="N(0, 1)")
ax2.set_ylabel("pdf", color="b")
ax.legend(loc=(0.6, 0.8), frameon=False)
ax2.legend(loc=(0.6, 0.73), frameon=False)
plt.show()
#fig443, fig 442 데이터 적용
mu=da1.mean()
std=da1.std()
CI=stats.norm.interval(0.95)
mu_pop1=scal.transform(np.array(mu).reshape(-1,1))
CP_one=stats.norm.ppf(1-0.05)
fig, (ax1,ax3)=plt.subplots(nrows=1, ncols=2, figsize=(8, 3))
d2=np.sort(np.ravel(d))
p=stats.norm.pdf(d2)
x_two=d2[np.where((d2>=CI[0] )&(d2<=CI[1]))[0]]
p_two=stats.norm.pdf(x_two)
x_one=d2[np.where(d2<=CP_one)[0]]
p_one=stats.norm.pdf(x_one)
ax1.plot(d2, p, color="g", label="N(0,1)")
ax1.vlines(mu_pop1, 0, 0.4, ls="--", color="r", label=r"$\mu_{pop}$")
ax1.fill_between(x_two, p_two, color="brown", alpha=0.3, label=r"CI, 1-$\alpha$")
ax1.set_xlabel("x\n(a) two side")
ax1.set_ylabel("pdf")
ax1.legend(loc="upper left", frameon=False)
ax1.text(-1, 0.1, f"{np.round(CI, 2)}", color="brown")
ax3.plot(d2, p, color="g", label=f"N(0, 1)")
ax3.vlines(mu_pop1, 0, 0.4, ls="--", color="r", label=r"$\mu_{pop}$")
ax3.fill_between(x_one, p_one, color="brown", alpha=0.3, label=r"CI, 1-$\alpha$")
ax3.set_xlabel("x\n(b) one side")
ax3.legend(loc="upper left", frameon=False)
ax3.text(-0.6, 0.1, f"(oo, {round(CP_one,2)}]", color="brown")
plt.show()





댓글
댓글 쓰기