The following graphs are the codes for the figures included in Chapter 5 of the e-book Statistics with Python.
import numpy as np import pandas as pd from scipy import stats from sklearn.preprocessing import StandardScaler import yfinance as yf import matplotlib.pyplot as plt import seaborn as sns sns.set_style("darkgrid")
#fig511 st=pd.Timestamp(2024, 4,20) et=pd.Timestamp(2024, 5, 30) da1=yf.download('GOOGL', st, et)["Close"] da2=yf.download('MSFT', st, et)["Close"] da1=da1.iloc[:,0].pct_change()[1:]*100 da2=da2.iloc[:,0].pct_change()[1:]*100 da=pd.DataFrame([da1, da2], index=['data1', 'data2']).T da.index=range(len(da1)) mu1, sd1, n1=np.mean(da1), np.std(da1, ddof=1), len(da1) mu2, sd2, n2=np.mean(da2), np.std(da2, ddof=1), len(da2) s_p=np.sqrt(((n1-1)*sd1**2+(n2-1)*sd2**2)/(n1+n2-2)) se=s_p*np.sqrt((1/n1+1/n2)) df=n1+n2-2 mu=mu1-mu2 testStatic=((mu1-mu2)-0)/se x=np.linspace(-3, 3, 500) p=stats.t.pdf(x, df) l=stats.t.ppf(0.025, df) u=stats.t.ppf(0.975, df) idx=np.where((x>l)& (x<u))[0] idx2=np.where(x>testStatic)[0] f, ax=plt.subplots(1,2, figsize=(8, 3), sharey=True) ax[0].plot(x, p, color="g", label=f"t({df})") ax[0].fill_between(x[idx], stats.t.pdf(x[idx], df), color="brown", alpha=0.3, label="Conf. Int.") ax[0].vlines(testStatic, 0, stats.t.pdf(testStatic, df), ls="--", color="red", label="test Statistic") ax[0].legend(loc='upper right', prop={'size':8}) ax[0].set_xlabel("x") ax[0].set_ylabel("pdf") ax[0].set_title("(a) Confidence Interval") ax[1].plot(x, p, color="g", label=f"t({df})") ax[1].vlines(testStatic, 0, stats.t.pdf(testStatic, df), ls="--", color="red", label="test Statistic") ax[1].fill_between(x[idx2], stats.t.pdf(x[idx2], df), color="b", alpha=0.2, label=r"$\frac{p-value}{2}$") ax[1].legend(loc='upper right', prop={'size':8}) ax[1].set_xlabel("x") ax[1].set_title("(b) p_value") plt.show()
#fig 521 np.random.seed(0) x=np.linspace(0, 10, 100) y1=x+np.random.rand(100)*5 y2=-x+np.random.rand(100)*5 y3=np.random.rand(100)*10 plt.figure(figsize=(9, 4)) col=["blue","red","green"] lab=["a) direct","b) inverse", "c) no"] yT=[y1, y2, y3] for i in range(3): plt.subplot(1,3,i+1) plt.scatter(x, yT[i], s=15, color=col[i]) plt.title(f"{lab[i]} proportion", fontsize=15) plt.xticks([]) plt.yticks([]) plt.xlabel("x") if i==0: plt.ylabel("y") plt.show()
#fig523 st=pd.Timestamp(2023,1,1) et=pd.Timestamp(2024, 5, 30) vix=yf.download('^vix', st, et)["Close"].dropna() ue=yf.download('EUR=X', st, et)["Close"].dropna() data=pd.concat([ue, vix.shift(periods=-1)], join="inner", axis=1) data.index=range(len(data)) data.columns=['ue', 'vix'] data=data.dropna() scaler=StandardScaler().fit(data) da=scaler.transform(data) plt.figure(figsize=(4,3)) sns.scatterplot(x=da[:,0], y=da[:,1], s=30) plt.xlabel("ue") plt.ylabel("vix") plt.show()
#fig 531 x=np.linspace(-3, 3,1000) pdf=stats.norm.pdf(x) cdf=stats.norm.cdf(x) q=np.linspace(0, 1, 1000) ppf=stats.norm.ppf(q) plt.figure(figsize=(7,3)) plt.subplots_adjust(wspace=0.3) plt.subplot(1,2,1) plt.plot(x, pdf, color="blue", label="PDF") plt.plot(x, cdf, color="red", label="CDF") plt.xlabel("x (a)") plt.ylabel("Prob.", rotation="horizontal", labelpad=10) plt.legend(loc="best") plt.title("(a)", loc="left") plt.subplot(1,2,2) plt.plot(q, ppf, color="green", label="Inverse CDF") plt.xlabel("Prob.(q)") plt.ylabel("I(q)", rotation="horizontal") plt.legend(loc="best") plt.title("(b)", loc="left") plt.show()
#fig532 np.random.seed(1) x=np.random.rand(1000) x2=stats.norm.rvs(size=1000, random_state=3) plt.figure(figsize=(6,3)) ax1=plt.subplot(1,2,1) f1=stats.probplot(x, plot=plt, rvalue=True) ax1.set_title("(a) Q-Q plot: random") ax2=plt.subplot(1,2,2) f2=stats.probplot(x2, plot=plt, rvalue=True) ax2.set_ylabel("") ax2.set_title("(b) Q-Q plot: normal random") plt.show()
#fig 533, fig 532 데이터 적용 fig, (ax1, ax2)= plt.subplots(nrows=1, ncols=2, figsize=(7, 3)) ax1.hist(x, bins=30, rwidth=0.8, color="b") ax1.set_xlabel("x (a) random", loc="right", size="12") ax1.set_ylabel("Frequency", size="12") ax2.hist(x2, bins=30, rwidth=0.8, color="g") ax2.set_xlabel("x (b)normal random", loc="right", size="12") plt.show()
#fig534 st=pd.Timestamp(2024,1,1) et=pd.Timestamp(2024, 5, 30) nd=yf.download("^IXIC",st, et)["Close"] ex=yf.download("EUR=X", st, et)["Close"] nd1=(nd-nd.mean())/nd.std() ex1=(ex-ex.mean())/ex.std() plt.figure(figsize=(7, 3)) ax1 = plt.subplot(121) res = stats.probplot(np.ravel(nd1.values), plot=plt) ax1.set_title("Q-Q plot: nd") ax2 = plt.subplot(122) res = stats.probplot(np.ravel(ex1.values), plot=plt) ax2.set_title("Q-Q plot: ex") ax2.set_ylabel("") plt.show()
#fig536 st=pd.Timestamp(2024,1,1) et=pd.Timestamp(2024, 5, 30) s=yf.download("^SPX",st, et)["Close"].dropna() ex=yf.download("EUR=X", st, et)["Close"].dropna() s1=(s-s.mean())/s.std() ex1=(ex-ex.mean())/ex.std() plt.figure(figsize=(7, 3)) ax1 = plt.subplot(121) res = stats.probplot(s1.iloc[:,0], plot=plt, rvalue=True) ax1.set_title("Q-Q plot: S&P") ax2 = plt.subplot(122) res = stats.probplot(ex1.iloc[:,0], plot=plt, rvalue=True) ax2.set_title("Q-Q plot: USD/EUR") ax2.set_ylabel("") plt.show()
#fig 537 np.random.seed(3) N=100 da=np.sort(np.random.randn(N)) ecdf=[i/N for i in range(1, N+1)] nCdf=stats.norm.cdf(da) plt.figure(figsize=(3,2)) plt.plot(da, ecdf, color="blue", label="ECDF") plt.plot(da, nCdf, color="red", label="normCDF") plt.legend(loc="best") plt.xlabel("x", weight="bold") plt.ylabel("probility", weight="bold") plt.show()
#fig538, Apply fig 536 data s2=np.sort(s1.iloc[:,0]) ex2=np.sort(ex1.dropna().iloc[:,0]) n_s=len(s2) n_ex=len(ex2) ecdf_s=[i/n_s for i in range(1, n_s+1)] ncdf_s=stats.norm.cdf(s2) ecdf_ex=[i/n_ex for i in range(1, n_ex+1)] ncdf_ex=stats.norm.cdf(ex2) plt.figure(figsize=(7,3)) plt.subplot(1,2,1) plt.plot(s2, ecdf_s, color="blue", label="ECDF") plt.plot(s2, ncdf_s, color="red", label="NCDF") plt.legend(loc="best") plt.title("S&P", weight="bold") plt.xlabel("x", weight="bold") plt.ylabel("Prob.", weight="bold") plt.subplot(1,2,2) plt.plot(ex2, ecdf_ex, color="blue", label="ECDF") plt.plot(ex2, ncdf_ex, color="red", label="NCDF") plt.legend(loc="best") plt.title("USD/EUR", weight="bold") plt.xlabel("x", weight="bold") plt.show()
댓글
댓글 쓰기