The following graphs are the codes for the figures included in Chapter 4 of the e-book Statistics with Python.
import numpy as np import pandas as pd from scipy import stats from sklearn.preprocessing import StandardScaler import yfinance as yf import matplotlib.pyplot as plt import seaborn as sns sns.set_style("darkgrid")
# fig 421 st=pd.Timestamp(2022,12, 1) et=pd.Timestamp(2023, 4, 1) da=yf.download("^DJI", st, et) pop=(da['Close']-da['Open'])/da['Open']*100 pop.columns=['change'] pop.index=pd.DatetimeIndex(pop.index.date) xBar=np.array([]) for i in range(20): x=pop.sample(5, replace=False, random_state=i) xBar=np.append(xBar, x.mean()) xBar2=np.array([]) for i in range(100): x=pop.sample(5, replace=False, random_state=i) xBar2=np.append(xBar2, x.mean()) plt.figure(figsize=(7, 3)) plt.subplot(1,2,1) plt.hist(xBar, 10, rwidth=0.8, color="blue", label="n=20") plt.axvline(-0.18, 0, 0.9, color="red", label=f"m={xBar.mean().round(2)}") plt.xlabel("x\n(a)", weight="bold") plt.ylabel("frequency", weight="bold") plt.legend(loc='upper left') plt.subplot(1,2,2) plt.hist(xBar2, 10, rwidth=0.8, color="green", label="n=100") plt.axvline(0, 0,0.9, color="red", label=f"m={xBar2.mean().round(2)}") plt.xlabel("x\n(b)", weight="bold") plt.legend(loc='best') plt.show()
#fig422 st=pd.Timestamp(2024,4, 1) et=pd.Timestamp(2024, 5, 30) da=yf.download("DX-Y.NYB", st, et)['Close'] da.columns=["D_index"] da.index=range(len(da)) sample=np.array([]) for i in range(1000): x=da.sample(5, random_state=i) sample=np.append(sample, x.mean()) print(sample[:10].round(2)) s=np.reshape(sample,(-1,1)) scaler=StandardScaler().fit(s) s_n=scaler.transform(s) xbar, xs=s_n.mean(), s_n.std() fig, ax=plt.subplots(figsize=(4,3)) ax.hist(s_n, bins=15, rwidth=0.9, color="g", alpha=0.3, label="histogram") ax.set_xlabel("x") ax.set_ylabel("frequency", color="g") ax2=plt.twinx() s_n2=np.sort(s_n, axis=0) ax2.plot(s_n2, stats.norm.pdf(s_n2), color="b", label="N(0, 1)") ax2.set_ylabel("pdf", color="b") ax.legend(loc=(0.6, 0.8), frameon=False) ax2.legend(loc=(0.6, 0.7), frameon=False) plt.show()
#fig431 x=np.linspace(-3, 3, 1000) plt.figure(figsize=(4,3)) plt.plot(x, stats.norm.pdf(x), color="g", label="N(0,1)") x1=np.linspace(-1.96, 1.96, 100) plt.fill_between(x1, stats.norm.pdf(x1), color="brown", alpha=0.3, label="p=0.95") x2=np.linspace(-3, -1.96, 50) plt.fill_between(x2, stats.norm.pdf(x2), color="blue", alpha=0.3, label=r"$\frac{\alpha}{2}$=0.025") x3=np.linspace(1.96, 3, 50) plt.fill_between(x3, stats.norm.pdf(x3), color="blue", alpha=0.3) plt.text(-0.8, 0.15,"confidence\n Interval\n"+r"1-$\alpha$=0.95", color="r") plt.legend(loc="upper right", frameon=False) plt.xlabel("x") plt.ylabel("PDF") plt.show()
#fig 442 st=pd.Timestamp(2024, 2, 1) et=pd.Timestamp(2024, 5, 30) da=yf.download('^IXIC', st, et)["Close"] da=da.dropna() da1=da.iloc[:,0].pct_change()[1:]*100 da1.index=range(len(da1)) sample=np.array([da1.sample(n=10, replace=False, random_state=i).mean() for i in range(1000)]) scal=StandardScaler().fit(sample.reshape(-1,1)) d=scal.transform(sample.reshape(-1,1)) fig, ax=plt.subplots(figsize=(4, 3)) ax.hist(d, bins=15, color="g", alpha=0.3, label="histogram") ax.set_xlabel("d") ax.set_ylabel("frequency", color="g") ax2=ax.twinx() d1=np.sort(d, axis=0) ax2.plot(d1, stats.norm.pdf(d1), color="b", label="N(0, 1)") ax2.set_ylabel("pdf", color="b") ax.legend(loc=(0.6, 0.8), frameon=False) ax2.legend(loc=(0.6, 0.73), frameon=False) plt.show()
#fig443, fig 442 데이터 적용 mu=da1.mean() std=da1.std() CI=stats.norm.interval(0.95) mu_pop1=scal.transform(np.array(mu).reshape(-1,1)) CP_one=stats.norm.ppf(1-0.05) fig, (ax1,ax3)=plt.subplots(nrows=1, ncols=2, figsize=(8, 3)) d2=np.sort(np.ravel(d)) p=stats.norm.pdf(d2) x_two=d2[np.where((d2>=CI[0] )&(d2<=CI[1]))[0]] p_two=stats.norm.pdf(x_two) x_one=d2[np.where(d2<=CP_one)[0]] p_one=stats.norm.pdf(x_one) ax1.plot(d2, p, color="g", label="N(0,1)") ax1.vlines(mu_pop1, 0, 0.4, ls="--", color="r", label=r"$\mu_{pop}$") ax1.fill_between(x_two, p_two, color="brown", alpha=0.3, label=r"CI, 1-$\alpha$") ax1.set_xlabel("x\n(a) two side") ax1.set_ylabel("pdf") ax1.legend(loc="upper left", frameon=False) ax1.text(-1, 0.1, f"{np.round(CI, 2)}", color="brown") ax3.plot(d2, p, color="g", label=f"N(0, 1)") ax3.vlines(mu_pop1, 0, 0.4, ls="--", color="r", label=r"$\mu_{pop}$") ax3.fill_between(x_one, p_one, color="brown", alpha=0.3, label=r"CI, 1-$\alpha$") ax3.set_xlabel("x\n(b) one side") ax3.legend(loc="upper left", frameon=False) ax3.text(-0.6, 0.1, f"(oo, {round(CP_one,2)}]", color="brown") plt.show()
댓글
댓글 쓰기