다음 그래프들은 전자책 파이썬과 함께하는 통계이야기 5 장에 수록된 그림들의 코드들입니다.
import numpy as np import pandas as pd from scipy import stats from sklearn.preprocessing import StandardScaler import FinanceDataReader as fdr import yfinance as yf import matplotlib.pyplot as plt import seaborn as sns sns.set_style("darkgrid")
#fig 511 st=pd.Timestamp(2024, 4,20) et=pd.Timestamp(2024, 5, 30) da1=fdr.DataReader('091160', st, et)["Close"] da2=fdr.DataReader('005930', st, et)["Close"] da1=da1.pct_change()[1:]*100 da2=da2.pct_change()[1:]*100 da=pd.DataFrame([da1, da2], index=['data1', 'data2']).T da.index=range(len(da1)) mu1, sd1, n1=np.mean(da1), np.std(da1, ddof=1), len(da1) mu2, sd2, n2=np.mean(da2), np.std(da2, ddof=1), len(da2) s_p=np.sqrt(((n1-1)*sd1**2+(n2-1)*sd2**2)/(n1+n2-2)) se=s_p*np.sqrt((1/n1+1/n2)) se=s_p*np.sqrt((1/n1+1/n2)) df=n1+n2-2 mu=mu1-mu2 ci=stats.t.interval(0.95, df, mu, se) testStatic=((mu1-mu2)-0)/se x=np.linspace(-3, 3, 500) p=stats.t.pdf(x, df) l=stats.t.ppf(0.025, df) u=stats.t.ppf(0.975, df) idx=np.where((x>l)& (x<u))[0] idx2=np.where(x>testStatic)[0] f, ax=plt.subplots(1,2, figsize=(8, 3), sharey=True) ax[0].plot(x, p, color="g", label=f"t({df})") ax[0].fill_between(x[idx], stats.t.pdf(x[idx], df), color="brown", alpha=0.3, label="Conf. Int.") ax[0].vlines(testStatic, 0, stats.t.pdf(testStatic, df), ls="--", color="red", label="test Statistic") ax[0].legend(loc='upper right', prop={'size':8}) ax[0].set_xlabel("x") ax[0].set_ylabel("pdf") ax[0].set_title("(a) Confidence Interval") ax[1].plot(x, p, color="g", label=f"t({df})") ax[1].vlines(testStatic, 0, stats.t.pdf(testStatic, df), ls="--", color="red", label="test Statistic") ax[1].fill_between(x[idx2], stats.t.pdf(x[idx2], df), color="b", alpha=0.2, label=r"$\frac{p-value}{2}$") ax[1].legend(loc='upper right', prop={'size':8}) ax[1].set_xlabel("x") ax[1].set_title("(b) p_value") plt.show()
#fig 521 np.random.seed(0) x=np.linspace(0, 10, 100) y1=x+np.random.rand(100)*5 y2=-x+np.random.rand(100)*5 y3=np.random.rand(100)*10 plt.figure(figsize=(9, 4)) col=["blue","red","green"] lab=["a) direct","b) inverse", "c) no"] yT=[y1, y2, y3] for i in range(3): plt.subplot(1,3,i+1) plt.scatter(x, yT[i], s=15, color=col[i]) plt.title(f"{lab[i]} proportion", fontsize=15) plt.xticks([]) plt.yticks([]) plt.xlabel("x") if i==0: plt.ylabel("y") plt.show()
#fig 522 st=pd.Timestamp(2023,1,1) et=pd.Timestamp(2024, 5, 30) kos=yf.download('^KS11', st, et)["Close"] ex=yf.download('KRW=X',st, et)["Close"] kos=kos.pct_change()[1:]*100 ex=ex.pct_change()[1:]*100 data=pd.concat([ex, kos.shift(periods=-1)], join="inner", axis=1) data.index=range(len(data)) data.columns=['ex', 'kos'] data=data.dropna() plt.figure(figsize=(4,3)) sns.scatterplot(data=data, x="ex", y="kos", s=30) plt.xlabel("ex(%)") plt.ylabel("kos(%)") plt.show()
#fig 523 st=pd.Timestamp(2023,1,1) et=pd.Timestamp(2024, 5, 30) kos=yf.download('^KS11', st, et)["Close"] ex=yf.download('KRW=X',st, et)["Close"] data=pd.concat([ex, kos.shift(periods=-1)], join="inner", axis=1) data.index=range(len(data)) data.columns=['ex', 'kos'] data=data.dropna() scaler=StandardScaler().fit(data) da=scaler.transform(data) plt.figure(figsize=(4,3)) sns.scatterplot(x=da[:,0], y=da[:,1], s=30) plt.xlabel("ex") plt.ylabel("kos") plt.show()
#fig 531 x=np.linspace(-3, 3,1000) pdf=stats.norm.pdf(x) cdf=stats.norm.cdf(x) q=np.linspace(0, 1, 1000) ppf=stats.norm.ppf(q) plt.figure(figsize=(7,3)) plt.subplots_adjust(wspace=0.3) plt.subplot(1,2,1) plt.plot(x, pdf, color="blue", label="PDF") plt.plot(x, cdf, color="red", label="CDF") plt.xlabel("x (a)") plt.ylabel("Prob.", rotation="horizontal", labelpad=10) plt.legend(loc="best") plt.title("(a)", loc="left") plt.subplot(1,2,2) plt.plot(q, ppf, color="green", label="Inverse CDF") plt.xlabel("Prob.(q)") plt.ylabel("I(q)", rotation="horizontal") plt.legend(loc="best") plt.title("(b)", loc="left") plt.show()
#fig532 np.random.seed(1) x=np.random.rand(1000) x2=stats.norm.rvs(size=1000, random_state=3) plt.figure(figsize=(6,3)) ax1=plt.subplot(1,2,1) f1=stats.probplot(x, plot=plt, rvalue=True) ax1.set_title("(a) Q-Q plot: random") ax2=plt.subplot(1,2,2) f2=stats.probplot(x2, plot=plt, rvalue=True) ax2.set_ylabel("") ax2.set_title("(b) Q-Q plot: normal random") plt.show()
#fig 533, fig 532 데이터 적용 fig, (ax1, ax2)= plt.subplots(nrows=1, ncols=2, figsize=(8, 3)) ax1.hist(x, bins=30, rwidth=0.8, color="b") ax1.set_xlabel("x (a) random", loc="right", size="12") ax1.set_ylabel("Frequency", size="12") ax2.hist(x2, bins=30, rwidth=0.8, color="g") ax2.set_xlabel("x (b)normal random", loc="right", size="12") plt.show()
#fig 534 st=pd.Timestamp(2024,1,1) et=pd.Timestamp(2024, 5, 30) kos=yf.download("^KS11",st, et)["Close"] ex=yf.download("KRW=X", st, et)["Close"] kos1=(kos-kos.mean())/kos.std() ex1=(ex-ex.mean())/ex.std() plt.figure(figsize=(6, 3)) ax1 = plt.subplot(121) res = stats.probplot(np.ravel(kos1.values), plot=plt) ax1.set_title("Q-Q plot: KOSPI") ax2 = plt.subplot(122) res = stats.probplot(np.ravel(ex1.values), plot=plt) ax2.set_title("Q-Q plot: USDKRW") ax2.set_ylabel("") plt.show()
#fig 535 age=np.array([65,61,63,86,70,55,74,35,72,68,45,58]) plt.figure(figsize=(3,2)) stats.probplot(age, plot=plt, rvalue=True) plt.show()
#fig 536 st=pd.Timestamp(2024,1,1) et=pd.Timestamp(2024, 5, 30) kos=fdr.DataReader("KS11",st, et)["Close"] kq=fdr.DataReader("KQ11", st, et)["Close"] kos1=(kos-kos.mean())/kos.std() kq1=(kq-kq.mean())/kq.std() plt.figure(figsize=(6, 3)) ax1 = plt.subplot(121) res = stats.probplot(kos1, plot=plt, rvalue=True) ax1.set_title("Q-Q plot: KOSPI") ax2 = plt.subplot(122) res = stats.probplot(kq1, plot=plt, rvalue=True) ax2.set_title("Q-Q plot: kq") ax2.set_ylabel("") plt.show()
#fig 537 np.random.seed(3) N=100 da=np.sort(np.random.randn(N)) ecdf=[i/N for i in range(1, N+1)] nCdf=stats.norm.cdf(da) plt.figure(figsize=(3,2)) plt.plot(da, ecdf, color="blue", label="ECDF") plt.plot(da, nCdf, color="red", label="normCDF") plt.legend(loc="best") plt.xlabel("x", weight="bold") plt.ylabel("Prob.", weight="bold") plt.show()
#fig 538 st=pd.Timestamp(2024,1,1) et=pd.Timestamp(2024, 5, 30) kos=fdr.DataReader("KS11",st, et)["Close"] kq=fdr.DataReader("KQ11", st, et)["Close"] kos1=(kos-kos.mean())/kos.std() kq1=(kq-kq.mean())/kq.std() kos2=np.sort(kos1) kq2=np.sort(kq1.dropna()) n_kos=len(kos2) n_kq=len(kq2) ecdf_kos=[i/n_kos for i in range(1, n_kos+1)] ncdf_kos=stats.norm.cdf(kos2) ecdf_kq=[i/n_kq for i in range(1, n_kq+1)] ncdf_kq=stats.norm.cdf(kq2) plt.figure(figsize=(6,3)) plt.subplot(1,2,1) plt.plot(kos2, ecdf_kos, color="blue", label="ECDF") plt.plot(kos2, ncdf_kos, color="red", label="NCDF") plt.legend(loc="best") plt.title("Kospi", weight="bold") plt.xlabel("x", weight="bold") plt.ylabel("Prob.", weight="bold") plt.subplot(1,2,2) plt.plot(kq2, ecdf_kq, color="blue", label="ECDF") plt.plot(kq2, ncdf_kq, color="red", label="NCDF") plt.legend(loc="best") plt.title("USD/KRW", weight="bold") plt.xlabel("x", weight="bold") plt.show()
댓글
댓글 쓰기