다음 그래프들은 전자책 파이썬과 함께하는 통계이야기 4 장에 수록된 그림들의 코드들입니다.
import numpy as np import pandas as pd from scipy import stats from sklearn.preprocessing import StandardScaler import FinanceDataReader as fdr import yfinance as yf import matplotlib.pyplot as plt import seaborn as sns sns.set_style("darkgrid")
#fig 421 st=pd.Timestamp(2022,12, 1) et=pd.Timestamp(2023, 4, 1) da=fdr.DataReader("KS11", st, et) pop=(da['Close']-da['Open'])/da['Open']*100 np.random.seed(1) xBar=np.array([]) for i in range(20): x=pop.sample(5, random_state=i) xBar=np.append(xBar, x.mean()) xBar2=np.array([]) for i in range(100): x=pop.sample(5, random_state=i) xBar2=np.append(xBar2, x.mean()) fig, ax=plt.subplots(1, 2, figsize=(8, 3)) ax[0].hist(xBar, 10, rwidth=0.8, color="blue", label="n=20") ax[0].axvline(xBar.mean(), 0, 0.9, color="red", label=f"m={round(xBar.mean(), 2)}") ax[0].set_xlabel("x", weight="bold") ax[0].set_ylabel("frequency", weight="bold") ax[0].set_title("(a)",loc="right") ax[0].legend(loc='best', frameon=False) ax[1].hist(xBar2, 10, rwidth=0.8, color="green", label="n=100") ax[1].axvline(xBar2.mean(), 0,0.9, color="red", label=f"m={round(xBar2.mean(),2)}") ax[1].set_xlabel("x", weight="bold") ax[1].set_title("(b)",loc="right") ax[1].legend(loc='best', frameon=False) plt.show()
#fig 422 st=pd.Timestamp(2024,4, 1) et=pd.Timestamp(2024, 5, 30) da=yf.download("KRW=X", st, et)['Close'] sample=np.array([]) for i in range(1000): x=da.sample(5, random_state=i) sample=np.append(sample, x.mean()) s=sample.reshape(-1,1) scaler=StandardScaler().fit(s) s_n=scaler.transform(s) xbar, xs=s_n.mean(), s_n.std() fig, ax=plt.subplots(figsize=(4,3)) ax.hist(s_n, bins=15, rwidth=0.9, color="g", alpha=0.3, label="histogram") ax.set_xlabel("x") ax.set_ylabel("frequency", color="g") ax2=plt.twinx() s_n2=np.sort(s_n, axis=0) ax2.plot(s_n2, stats.norm.pdf(s_n2), color="b", label="N(0, 1)") ax2.set_ylabel("pdf", color="b") ax.legend(loc=(0.6, 0.8), frameon=False) ax2.legend(loc=(0.6, 0.7), frameon=False) plt.show()
#fig 431 x=np.linspace(-3, 3, 1000) plt.figure(figsize=(4,3)) plt.plot(x, stats.norm.pdf(x), color="g", label="N(0,1)") x1=np.linspace(-1.96, 1.96, 100) plt.fill_between(x1, stats.norm.pdf(x1), color="brown", alpha=0.3, label="p=0.95") x2=np.linspace(-3, -1.96, 50) plt.fill_between(x2, stats.norm.pdf(x2), color="blue", alpha=0.3, label=r"$\frac{\alpha}{2}$=0.025") x3=np.linspace(1.96, 3, 50) plt.fill_between(x3, stats.norm.pdf(x3), color="blue", alpha=0.3) plt.text(-0.8, 0.15,"confidence\n Interval\n"+r"1-$\alpha$=0.95", color="r") plt.legend(loc="upper right", frameon=False) plt.xlabel("x") plt.ylabel("PDF") plt.show()
#fig 441 x=range(0, 50) r, p=10, 0.53 p1=stats.nbinom.pmf(x, r, p) x2=range(2, 19) p2=stats.nbinom.pmf(x2, r, p) x3=range(18, 50) p3=stats.nbinom.pmf(x3, r, p) x4=range(0,3) p4=stats.nbinom.pmf(x4, r, p) plt.figure(figsize=(4,3)) plt.plot(x, p1, color="g", label="NB(10, 0.53)") plt.fill_between(x2, p2, color="brown", alpha=0.3,label=r"1-$\alpha$=0.95") plt.fill_between(x3, p3, color="b", alpha=0.3, label=r"$\frac{\alpha}{2}$=0.025") plt.fill_between(x4, p4, color="b", alpha=0.3) plt.vlines(30, 0, 0.02, ls="--", color="r", label="p-value") plt.legend(loc="best") plt.text(25, 0.01, "critical range(cr)", color="b") plt.text(-2, 0.01, "cr", color="b") plt.text(3, 0.02, "Confidence\nInterval ", color="brown") plt.show()
#fig 442 st=pd.Timestamp(2023, 12, 1) et=pd.Timestamp(2024,5, 30) da=fdr.DataReader('091160', st, et)["Close"] da1=da.pct_change()[1:]*100 da1.index=range(len(da1)) sample=np.array([round(da1.sample(n=10, replace=False, random_state=i).mean(), 3) for i in range(1000)]) scal=StandardScaler().fit(sample.reshape(-1,1)) d=scal.transform(sample.reshape(-1,1)) fig, ax=plt.subplots(figsize=(4, 3)) ax.hist(d, bins=15, color="g", alpha=0.3, label="histogram") ax.set_xlabel("d") ax.set_ylabel("frequency", color="g") ax2=ax.twinx() d1=np.sort(d, axis=0) ax2.plot(d1, stats.norm.pdf(d1), color="b", label="N(0, 1)") ax2.set_ylabel("pdf", color="b") ax.legend(loc=(0.6, 0.8), frameon=False) ax2.legend(loc=(0.6, 0.73), frameon=False) plt.show()
#fig 443, fig 442의 데이터를 적용 mu=da1.mean() std=da1.std() CI=stats.norm.interval(0.95) mu_pop1=scal.transform(np.array(mu).reshape(-1,1)) CP_one=stats.norm.ppf(1-0.05) fig, (ax1,ax3)=plt.subplots(nrows=1, ncols=2, figsize=(8, 3)) d2=np.sort(np.ravel(d)) p=stats.norm.pdf(d2) x_two=d2[np.where((d2>=CI[0] )&(d2<=CI[1]))[0]] p_two=stats.norm.pdf(x_two) x_one=d2[np.where(d2 <=CP_one)[0]] p_one=stats.norm.pdf(x_one) ax1.plot(d2, p, color="g", label="N(0,1)") ax1.vlines(mu_pop1, 0, 0.4, ls="--", color="r", label=r"$\mu_{pop}$") ax1.fill_between(x_two, p_two, color="brown", alpha=0.3, label=r"CI, 1-$\alpha$") ax1.set_xlabel("x\n(a) two side") ax1.set_ylabel("pdf") ax1.legend(loc="upper left", frameon=False) ax1.text(-1, 0.1, f"{np.round(CI, 2)}", color="brown") ax3.plot(d2, p, color="g", label=f"N(0, 1)") ax3.vlines(mu_pop1, 0, 0.4, ls="--", color="r", label=r"$\mu_{pop}$") ax3.fill_between(x_one, p_one, color="brown", alpha=0.3, label=r"CI, 1-$\alpha$") ax3.set_xlabel("x\n(b) one side") ax3.legend(loc="upper left", frameon=False) ax3.text(-0.6, 0.1, f"(oo, {round(CP_one,2)}]", color="brown") plt.show()
댓글
댓글 쓰기