다음 그래프들은 전자책 파이썬과 함께하는 통계이야기 5 장에 수록된 그림들의 코드들입니다.
import numpy as np
import pandas as pd
from scipy import stats
from sklearn.preprocessing import StandardScaler
import FinanceDataReader as fdr
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
#fig 511
st=pd.Timestamp(2024, 4,20)
et=pd.Timestamp(2024, 5, 30)
da1=fdr.DataReader('091160', st, et)["Close"]
da2=fdr.DataReader('005930', st, et)["Close"]
da1=da1.pct_change()[1:]*100
da2=da2.pct_change()[1:]*100
da=pd.DataFrame([da1, da2], index=['data1', 'data2']).T
da.index=range(len(da1))
mu1, sd1, n1=np.mean(da1), np.std(da1, ddof=1), len(da1)
mu2, sd2, n2=np.mean(da2), np.std(da2, ddof=1), len(da2)
s_p=np.sqrt(((n1-1)*sd1**2+(n2-1)*sd2**2)/(n1+n2-2))
se=s_p*np.sqrt((1/n1+1/n2))
se=s_p*np.sqrt((1/n1+1/n2))
df=n1+n2-2
mu=mu1-mu2
ci=stats.t.interval(0.95, df, mu, se)
testStatic=((mu1-mu2)-0)/se
x=np.linspace(-3, 3, 500)
p=stats.t.pdf(x, df)
l=stats.t.ppf(0.025, df)
u=stats.t.ppf(0.975, df)
idx=np.where((x>l)& (x<u))[0]
idx2=np.where(x>testStatic)[0]
f, ax=plt.subplots(1,2, figsize=(8, 3), sharey=True)
ax[0].plot(x, p, color="g", label=f"t({df})")
ax[0].fill_between(x[idx], stats.t.pdf(x[idx], df), color="brown", alpha=0.3, label="Conf. Int.")
ax[0].vlines(testStatic, 0, stats.t.pdf(testStatic, df), ls="--", color="red", label="test Statistic")
ax[0].legend(loc='upper right', prop={'size':8})
ax[0].set_xlabel("x")
ax[0].set_ylabel("pdf")
ax[0].set_title("(a) Confidence Interval")
ax[1].plot(x, p, color="g", label=f"t({df})")
ax[1].vlines(testStatic, 0, stats.t.pdf(testStatic, df), ls="--", color="red", label="test Statistic")
ax[1].fill_between(x[idx2], stats.t.pdf(x[idx2], df), color="b", alpha=0.2, label=r"$\frac{p-value}{2}$")
ax[1].legend(loc='upper right', prop={'size':8})
ax[1].set_xlabel("x")
ax[1].set_title("(b) p_value")
plt.show()
#fig 521
np.random.seed(0)
x=np.linspace(0, 10, 100)
y1=x+np.random.rand(100)*5
y2=-x+np.random.rand(100)*5
y3=np.random.rand(100)*10
plt.figure(figsize=(9, 4))
col=["blue","red","green"]
lab=["a) direct","b) inverse", "c) no"]
yT=[y1, y2, y3]
for i in range(3):
plt.subplot(1,3,i+1)
plt.scatter(x, yT[i], s=15, color=col[i])
plt.title(f"{lab[i]} proportion", fontsize=15)
plt.xticks([])
plt.yticks([])
plt.xlabel("x")
if i==0:
plt.ylabel("y")
plt.show()
#fig 522
st=pd.Timestamp(2023,1,1)
et=pd.Timestamp(2024, 5, 30)
kos=yf.download('^KS11', st, et)["Close"]
ex=yf.download('KRW=X',st, et)["Close"]
kos=kos.pct_change()[1:]*100
ex=ex.pct_change()[1:]*100
data=pd.concat([ex, kos.shift(periods=-1)], join="inner", axis=1)
data.index=range(len(data))
data.columns=['ex', 'kos']
data=data.dropna()
plt.figure(figsize=(4,3))
sns.scatterplot(data=data, x="ex", y="kos", s=30)
plt.xlabel("ex(%)")
plt.ylabel("kos(%)")
plt.show()
#fig 523
st=pd.Timestamp(2023,1,1)
et=pd.Timestamp(2024, 5, 30)
kos=yf.download('^KS11', st, et)["Close"]
ex=yf.download('KRW=X',st, et)["Close"]
data=pd.concat([ex, kos.shift(periods=-1)], join="inner", axis=1)
data.index=range(len(data))
data.columns=['ex', 'kos']
data=data.dropna()
scaler=StandardScaler().fit(data)
da=scaler.transform(data)
plt.figure(figsize=(4,3))
sns.scatterplot(x=da[:,0], y=da[:,1], s=30)
plt.xlabel("ex")
plt.ylabel("kos")
plt.show()
#fig 531
x=np.linspace(-3, 3,1000)
pdf=stats.norm.pdf(x)
cdf=stats.norm.cdf(x)
q=np.linspace(0, 1, 1000)
ppf=stats.norm.ppf(q)
plt.figure(figsize=(7,3))
plt.subplots_adjust(wspace=0.3)
plt.subplot(1,2,1)
plt.plot(x, pdf, color="blue", label="PDF")
plt.plot(x, cdf, color="red", label="CDF")
plt.xlabel("x (a)")
plt.ylabel("Prob.", rotation="horizontal", labelpad=10)
plt.legend(loc="best")
plt.title("(a)", loc="left")
plt.subplot(1,2,2)
plt.plot(q, ppf, color="green", label="Inverse CDF")
plt.xlabel("Prob.(q)")
plt.ylabel("I(q)", rotation="horizontal")
plt.legend(loc="best")
plt.title("(b)", loc="left")
plt.show()
#fig532
np.random.seed(1)
x=np.random.rand(1000)
x2=stats.norm.rvs(size=1000, random_state=3)
plt.figure(figsize=(6,3))
ax1=plt.subplot(1,2,1)
f1=stats.probplot(x, plot=plt, rvalue=True)
ax1.set_title("(a) Q-Q plot: random")
ax2=plt.subplot(1,2,2)
f2=stats.probplot(x2, plot=plt, rvalue=True)
ax2.set_ylabel("")
ax2.set_title("(b) Q-Q plot: normal random")
plt.show()
#fig 533, fig 532 데이터 적용
fig, (ax1, ax2)= plt.subplots(nrows=1, ncols=2, figsize=(8, 3))
ax1.hist(x, bins=30, rwidth=0.8, color="b")
ax1.set_xlabel("x (a) random", loc="right", size="12")
ax1.set_ylabel("Frequency", size="12")
ax2.hist(x2, bins=30, rwidth=0.8, color="g")
ax2.set_xlabel("x (b)normal random", loc="right", size="12")
plt.show()
#fig 534
st=pd.Timestamp(2024,1,1)
et=pd.Timestamp(2024, 5, 30)
kos=yf.download("^KS11",st, et)["Close"]
ex=yf.download("KRW=X", st, et)["Close"]
kos1=(kos-kos.mean())/kos.std()
ex1=(ex-ex.mean())/ex.std()
plt.figure(figsize=(6, 3))
ax1 = plt.subplot(121)
res = stats.probplot(np.ravel(kos1.values), plot=plt)
ax1.set_title("Q-Q plot: KOSPI")
ax2 = plt.subplot(122)
res = stats.probplot(np.ravel(ex1.values), plot=plt)
ax2.set_title("Q-Q plot: USDKRW")
ax2.set_ylabel("")
plt.show()
#fig 535 age=np.array([65,61,63,86,70,55,74,35,72,68,45,58]) plt.figure(figsize=(3,2)) stats.probplot(age, plot=plt, rvalue=True) plt.show()
#fig 536
st=pd.Timestamp(2024,1,1)
et=pd.Timestamp(2024, 5, 30)
kos=fdr.DataReader("KS11",st, et)["Close"]
kq=fdr.DataReader("KQ11", st, et)["Close"]
kos1=(kos-kos.mean())/kos.std()
kq1=(kq-kq.mean())/kq.std()
plt.figure(figsize=(6, 3))
ax1 = plt.subplot(121)
res = stats.probplot(kos1, plot=plt, rvalue=True)
ax1.set_title("Q-Q plot: KOSPI")
ax2 = plt.subplot(122)
res = stats.probplot(kq1, plot=plt, rvalue=True)
ax2.set_title("Q-Q plot: kq")
ax2.set_ylabel("")
plt.show()
#fig 537
np.random.seed(3)
N=100
da=np.sort(np.random.randn(N))
ecdf=[i/N for i in range(1, N+1)]
nCdf=stats.norm.cdf(da)
plt.figure(figsize=(3,2))
plt.plot(da, ecdf, color="blue", label="ECDF")
plt.plot(da, nCdf, color="red", label="normCDF")
plt.legend(loc="best")
plt.xlabel("x", weight="bold")
plt.ylabel("Prob.", weight="bold")
plt.show()
#fig 538
st=pd.Timestamp(2024,1,1)
et=pd.Timestamp(2024, 5, 30)
kos=fdr.DataReader("KS11",st, et)["Close"]
kq=fdr.DataReader("KQ11", st, et)["Close"]
kos1=(kos-kos.mean())/kos.std()
kq1=(kq-kq.mean())/kq.std()
kos2=np.sort(kos1)
kq2=np.sort(kq1.dropna())
n_kos=len(kos2)
n_kq=len(kq2)
ecdf_kos=[i/n_kos for i in range(1, n_kos+1)]
ncdf_kos=stats.norm.cdf(kos2)
ecdf_kq=[i/n_kq for i in range(1, n_kq+1)]
ncdf_kq=stats.norm.cdf(kq2)
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plt.plot(kos2, ecdf_kos, color="blue", label="ECDF")
plt.plot(kos2, ncdf_kos, color="red", label="NCDF")
plt.legend(loc="best")
plt.title("Kospi", weight="bold")
plt.xlabel("x", weight="bold")
plt.ylabel("Prob.", weight="bold")
plt.subplot(1,2,2)
plt.plot(kq2, ecdf_kq, color="blue", label="ECDF")
plt.plot(kq2, ncdf_kq, color="red", label="NCDF")
plt.legend(loc="best")
plt.title("USD/KRW", weight="bold")
plt.xlabel("x", weight="bold")
plt.show()












댓글
댓글 쓰기