%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.stats import norm


import warnings
warnings.filterwarnings("ignore")


def gaussian_distribution(x, mu, sigma):
    '''
    function：Gaussian distribution
 
    mu: mean
    sigma: std
    '''
    return 1 / (np.sqrt(2 * np.pi) * sigma) * np.exp(- (x - mu)**2 / (2 * np.power(sigma, 2) ))


total = 100000000

x = np.random.rand(total,1)
y = np.random.rand(total,1)
dist = np.sqrt(np.multiply(x, x) + np.multiply(y, y))

(dist < 1).sum() / total * 4

3.14166516


fig, ax = plt.subplots(1,2, figsize=(12,5))

theta = 2
x = np.arange(0,15,0.1)

y1 = theta * np.exp(-x * theta)
ax[0].plot(x, y1)
ax[0].set_title('PDF')
ax[0].set_xlabel('X')
ax[0].set_ylabel('Y')


y2 = 1 - np.exp(-x * theta)
ax[1].plot(x, y2)
ax[1].set_title('CDF')
ax[1].set_xlabel('X')
ax[1].set_ylabel('P')

Text(0, 0.5, 'P')


# 指数分布
theta = 2
x = np.arange(0,5,0.1)
y = theta * np.exp(-x * theta)

# 通过均匀分布进行采样
sample_num = 10000
z = np.random.uniform(0,1, sample_num)   # 均匀分布采样
sample_x = - np.log(1 - z) / theta       

plt.plot(x, y)
sns.distplot(sample_x)

<AxesSubplot:ylabel='Density'>


def p1(x):
    return 0.3 * np.exp(-(x-0.3)**2) + 0.7 * np.exp(-(x-2.)**2/0.3)

def p2(x, z=1.2113):
    return p1(x) / z

def kq(x, k):
    return k * norm.pdf(x, loc=1.4, scale=1.2)
#     return k * gaussian_distribution(x, mu=1.4, sigma=1.2)  # 自定义正态分布


x = np.arange(-4.,6.,0.01)
k = 2.5

# 对 p1(x) 可视化
plt.plot(x, p1(x),color = "red")

# 对 k*q(x) 可视化
plt.plot(x, kq(x, k), color = "blue")


plt.xticks([])
plt.yticks([])
plt.ylim(0,0.9)
plt.xlim(-4,6)
plt.plot([0.3,0.3],[0,0.54601532],color = "black")
plt.plot(0.3,0.54601532,'b.')
plt.fill_between(x,p1(x), kq(x, k),color = (0.7,0.7,0.7))
plt.annotate('$x_0$',xy=(0.,0),xytext=(0.2,-0.06),fontsize=15)
plt.annotate('$u_0$',xy=(0.,0.),xytext=(0.35,0.15),fontsize=15)
plt.annotate('$kq(x_0)$',xy=(0.,0.),xytext=(-1.2,0.55),fontsize=15)
plt.annotate('$p(x)$',xy=(0.,0.),xytext=(2,0.15),fontsize=15)
plt.annotate('$kq(x)$',xy=(0.,0.),xytext=(2.7,0.5),fontsize=15)
plt.show()


x = np.arange(-4.,6.,0.01)
# p1(x) 可视化
plt.plot(x, p1(x),color = "red")

size = 50000
mu = 1.4
sigma = 1.2
k = 2.5

# 1. 从 q(x) 采样，得到 sample_x
sample_x = np.random.normal(loc = mu, scale = sigma, size = size)

# 2. 计算 q
q = norm.pdf(sample_x, loc=mu, scale=sigma)
# q = 1/(np.sqrt(2*np.pi)*sigma)*np.exp(-0.5*(sample_x-mu)**2/sigma**2)

# 3. 计算 p
p = p1(sample_x)

# 4. 生成 0-1 随机数
u = np.random.uniform(low = 0, high = 1, size = size)

# 5. 选择采样点
sample = sample_x[p / (q * k) >= u]

sns.distplot(sample)

plt.show()


def theoretical():
    print('CDF计算的真实概率：', norm.cdf(-8, 0, 1))
    
def naive_monte():
    N = 10000
    rand_num = np.random.normal(loc=0, scale=1, size=N)
    sample = (rand_num > 8)
    print("直接对p(x)采样的概率：", np.mean(sample))
    
def importance_sampling():
    N = 10000
    rand_num = np.random.normal(loc=8, scale=1, size=N)
    sample = (rand_num > 8) * np.exp(-8 *rand_num + 32)
    print("重要性采样概率：", np.mean(sample))

# 1. 直接使用函数f(x)的CDF进行求解
theoretical()

# 2. 通过对函数f(x)进行采样，然后进行求解
naive_monte()

# 3. 通过重要性采样进行求解
importance_sampling()

CDF计算的真实概率： 6.22096057427174e-16
直接对p(x)采样的概率： 0.0
重要性采样概率： 6.200460782041351e-16

UDF¶

计算$\pi$¶

直接抽样（逆采样，Inverse Sampling)¶

接受-拒绝抽样¶

重要性抽样¶