다음 그래프들은 전자책 파이썬과 함께하는 통계이야기 4 장에 수록된 그림들의 코드들입니다.
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.preprocessing import StandardScaler
import FinanceDataReader as fdr
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
#fig 421
st=pd.Timestamp(2022,12, 1)
et=pd.Timestamp(2023, 4, 1)
da=fdr.DataReader("KS11", st, et)
pop=(da['Close']-da['Open'])/da['Open']*100
np.random.seed(1)
xBar=np.array([])
for i in range(20):
x=pop.sample(5, random_state=i)
xBar=np.append(xBar, x.mean())
xBar2=np.array([])
for i in range(100):
x=pop.sample(5, random_state=i)
xBar2=np.append(xBar2, x.mean())
fig, ax=plt.subplots(1, 2, figsize=(8, 3))
ax[0].hist(xBar, 10, rwidth=0.8, color="blue", label="n=20")
ax[0].axvline(xBar.mean(), 0, 0.9, color="red", label=f"m={round(xBar.mean(), 2)}")
ax[0].set_xlabel("x", weight="bold")
ax[0].set_ylabel("frequency", weight="bold")
ax[0].set_title("(a)",loc="right")
ax[0].legend(loc='best', frameon=False)
ax[1].hist(xBar2, 10, rwidth=0.8, color="green", label="n=100")
ax[1].axvline(xBar2.mean(), 0,0.9, color="red", label=f"m={round(xBar2.mean(),2)}")
ax[1].set_xlabel("x", weight="bold")
ax[1].set_title("(b)",loc="right")
ax[1].legend(loc='best', frameon=False)
plt.show()
#fig 422
st=pd.Timestamp(2024,4, 1)
et=pd.Timestamp(2024, 5, 30)
da=yf.download("KRW=X", st, et)['Close']
sample=np.array([])
for i in range(1000):
x=da.sample(5, random_state=i)
sample=np.append(sample, x.mean())
s=sample.reshape(-1,1)
scaler=StandardScaler().fit(s)
s_n=scaler.transform(s)
xbar, xs=s_n.mean(), s_n.std()
fig, ax=plt.subplots(figsize=(4,3))
ax.hist(s_n, bins=15, rwidth=0.9, color="g", alpha=0.3, label="histogram")
ax.set_xlabel("x")
ax.set_ylabel("frequency", color="g")
ax2=plt.twinx()
s_n2=np.sort(s_n, axis=0)
ax2.plot(s_n2, stats.norm.pdf(s_n2), color="b", label="N(0, 1)")
ax2.set_ylabel("pdf", color="b")
ax.legend(loc=(0.6, 0.8), frameon=False)
ax2.legend(loc=(0.6, 0.7), frameon=False)
plt.show()
#fig 431
x=np.linspace(-3, 3, 1000)
plt.figure(figsize=(4,3))
plt.plot(x, stats.norm.pdf(x), color="g", label="N(0,1)")
x1=np.linspace(-1.96, 1.96, 100)
plt.fill_between(x1, stats.norm.pdf(x1), color="brown", alpha=0.3, label="p=0.95")
x2=np.linspace(-3, -1.96, 50)
plt.fill_between(x2, stats.norm.pdf(x2), color="blue", alpha=0.3, label=r"$\frac{\alpha}{2}$=0.025")
x3=np.linspace(1.96, 3, 50)
plt.fill_between(x3, stats.norm.pdf(x3), color="blue", alpha=0.3)
plt.text(-0.8, 0.15,"confidence\n Interval\n"+r"1-$\alpha$=0.95", color="r")
plt.legend(loc="upper right", frameon=False)
plt.xlabel("x")
plt.ylabel("PDF")
plt.show()
#fig 441
x=range(0, 50)
r, p=10, 0.53
p1=stats.nbinom.pmf(x, r, p)
x2=range(2, 19)
p2=stats.nbinom.pmf(x2, r, p)
x3=range(18, 50)
p3=stats.nbinom.pmf(x3, r, p)
x4=range(0,3)
p4=stats.nbinom.pmf(x4, r, p)
plt.figure(figsize=(4,3))
plt.plot(x, p1, color="g", label="NB(10, 0.53)")
plt.fill_between(x2, p2, color="brown", alpha=0.3,label=r"1-$\alpha$=0.95")
plt.fill_between(x3, p3, color="b", alpha=0.3, label=r"$\frac{\alpha}{2}$=0.025")
plt.fill_between(x4, p4, color="b", alpha=0.3)
plt.vlines(30, 0, 0.02, ls="--", color="r", label="p-value")
plt.legend(loc="best")
plt.text(25, 0.01, "critical range(cr)", color="b")
plt.text(-2, 0.01, "cr", color="b")
plt.text(3, 0.02, "Confidence\nInterval ", color="brown")
plt.show()
#fig 442
st=pd.Timestamp(2023, 12, 1)
et=pd.Timestamp(2024,5, 30)
da=fdr.DataReader('091160', st, et)["Close"]
da1=da.pct_change()[1:]*100
da1.index=range(len(da1))
sample=np.array([round(da1.sample(n=10, replace=False, random_state=i).mean(), 3) for i in range(1000)])
scal=StandardScaler().fit(sample.reshape(-1,1))
d=scal.transform(sample.reshape(-1,1))
fig, ax=plt.subplots(figsize=(4, 3))
ax.hist(d, bins=15, color="g", alpha=0.3, label="histogram")
ax.set_xlabel("d")
ax.set_ylabel("frequency", color="g")
ax2=ax.twinx()
d1=np.sort(d, axis=0)
ax2.plot(d1, stats.norm.pdf(d1), color="b", label="N(0, 1)")
ax2.set_ylabel("pdf", color="b")
ax.legend(loc=(0.6, 0.8), frameon=False)
ax2.legend(loc=(0.6, 0.73), frameon=False)
plt.show()
#fig 443, fig 442의 데이터를 적용
mu=da1.mean()
std=da1.std()
CI=stats.norm.interval(0.95)
mu_pop1=scal.transform(np.array(mu).reshape(-1,1))
CP_one=stats.norm.ppf(1-0.05)
fig, (ax1,ax3)=plt.subplots(nrows=1, ncols=2, figsize=(8, 3))
d2=np.sort(np.ravel(d))
p=stats.norm.pdf(d2)
x_two=d2[np.where((d2>=CI[0] )&(d2<=CI[1]))[0]]
p_two=stats.norm.pdf(x_two)
x_one=d2[np.where(d2 <=CP_one)[0]]
p_one=stats.norm.pdf(x_one)
ax1.plot(d2, p, color="g", label="N(0,1)")
ax1.vlines(mu_pop1, 0, 0.4, ls="--", color="r", label=r"$\mu_{pop}$")
ax1.fill_between(x_two, p_two, color="brown", alpha=0.3, label=r"CI, 1-$\alpha$")
ax1.set_xlabel("x\n(a) two side")
ax1.set_ylabel("pdf")
ax1.legend(loc="upper left", frameon=False)
ax1.text(-1, 0.1, f"{np.round(CI, 2)}", color="brown")
ax3.plot(d2, p, color="g", label=f"N(0, 1)")
ax3.vlines(mu_pop1, 0, 0.4, ls="--", color="r", label=r"$\mu_{pop}$")
ax3.fill_between(x_one, p_one, color="brown", alpha=0.3, label=r"CI, 1-$\alpha$")
ax3.set_xlabel("x\n(b) one side")
ax3.legend(loc="upper left", frameon=False)
ax3.text(-0.6, 0.1, f"(oo, {round(CP_one,2)}]", color="brown")
plt.show()






댓글
댓글 쓰기