In [69]:
import numpy as np
import torch
import torch.nn as nn

import torch.nn.functional as F

torch.manual_seed(7) 
Out[69]:
<torch._C.Generator at 0x7f39540686c0>

二分类交叉熵

BCELoss

要求预测的输出是概率

\begin{equation} L=-\frac{1}{N} \sum_{i=1}^{N}\left[y_{i} \log \left(p_{i}\right)+\left(1-y_{i}\right) \log \left(1-p_{i}\right)\right] \\ N:样本总数 \\ y_i:第i个样本的所属类别 \\ p_i:第i个样本的预测值 \end{equation}
样本 $y_i$ $p_i$
样本1 1 0.8
样本2 0 0.2
样本3 0 0.4
In [5]:
# 按公式计算
Loss = -((1*np.log(0.8)+(1-1)*np.log(1-0.8)) + (0*np.log(0.2)+(1-0)*np.log(1-0.2)) + (0*np.log(0.4)+(1-0)*np.log(1-0.4))) / 3
Loss
Out[5]:
0.3190375754648034
In [7]:
# BCELoss
bce_loss = nn.BCELoss()
pred = torch.tensor([0.8, 0.2, 0.4], dtype=torch.float)
label = torch.tensor([1, 0, 0], dtype=torch.float)

print(bce_loss(pred, label))
tensor(0.3190)

BCEWithLogitsLoss

相比于BCELoss,预测输出不要求预测值是概率,可以是实数域内任意值。
BCELossWithLogits = BCELoss + Sigmoid

准备数据

In [24]:
pred = torch.randn(4,2)               # 预测值
target = torch.rand(4).random_(0,2)   # 真实类别标签

print(pred)
print(target)

# 将target进行独热编码
onehot_target = torch.eye(2)[target.long(), :]
print(onehot_target)
tensor([[-1.1595,  1.5483],
        [ 0.1839,  0.3613],
        [ 0.8999,  0.5699],
        [ 0.0102, -0.8208]])
tensor([0., 1., 0., 1.])
tensor([[1., 0.],
        [0., 1.],
        [1., 0.],
        [0., 1.]])

BCEWithLogitsLoss

In [32]:
bce_logits = nn.BCEWithLogitsLoss()
loss = bce_logits(pred, onehot_target)
loss
Out[32]:
tensor(0.9668)

Sigmoid+BCELoss

In [27]:
sigmoid = nn.Sigmoid()
sigmoid_pred = sigmoid(pred)
sigmoid_pred
Out[27]:
tensor([[0.2388, 0.8247],
        [0.5458, 0.5894],
        [0.7109, 0.6387],
        [0.5025, 0.3056]])
In [22]:
[target.long()]
Out[22]:
[tensor([0, 1, 1, 1])]
In [28]:
bce_loss = nn.BCELoss()
bce_loss(sigmoid_pred, onehot_target)
Out[28]:
tensor(0.9668)

多分类交叉熵

\begin{equation} L=-\frac{1}{N} \sum_{i=1}^{N} \sum_{c=1}^{K} y_{i c} \log \left(p_{i c}\right) \\ N:样本数 \\ K:类别数 \\ p_{ic}: 第i个样本属于类别c的概率 ,\sum_{c=1}^{K} p_{i c}=1, i=1,2, \ldots, N \\ y_{i c}:可以看作一个one-hot编码(若第i个样本属于类别c,则对应位置的y_{i c}取1,否则取0),y_{i c} \in\{0,1\} \end{equation}

假设N=2, K=3,即总共3个样本,3个类别,样本的数据如下:

. $y_{i1}$ $y_{i2}$ $y_{i3}$ $p_{i1}$ $p_{i2}$ $p_{i3}$
第1个样本 0 1 0 0.2 0.3 0.5
第2个样本 1 0 0 0.3 0.2 0.5
第3个样本 0 0 1 0.4 0.4 0.2
\begin{equation} \begin{gathered} L_{1}=0 * \log (0.2)+1 * \log (0.3)+0 * \log (0.5)=-1.2039 \\ L_{2}=1 * \log (0.3)+0 * \log (0.2)+0 * \log (0.5)=-1.2039 \\ L_{3}=0 * \log (0.4)+0 * \log (0.4)+0 * \log (0.2)=-1.6094 \\ \end{gathered} \end{equation}
\begin{equation} \begin{gathered} L=-\frac{1}{3}\left(L_{1}+L_{2}+L_{3}\right)=1.3391 \end{gathered} \end{equation}

需要注意,$p_{i c}$之和应为1,即预测结果要经过softmax处理!

手动计算

In [36]:
#预测值,假设已做softmax
pred=torch.tensor([[0.2,0.3,0.5],[0.3,0.2,0.5],[0.4,0.4,0.2]])

#真实类别标签
target=torch.tensor([1,0,2])

# 对真实类别标签做 独热编码
one_hot = F.one_hot(target).float()
"""
one_hot:
tensor([[0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.]])
"""

#对预测值取log
log = torch.log(pred)

#计算最终的结果
res = -torch.sum(one_hot*log)/target.shape[0]

print(res)# tensor(1.3391)
tensor(1.3391)

NLL_LOSS

不需要对真实标签做one-hot处理,但是预测值要进行log_softmax操作

In [37]:
#预测值,已做softmax
pred=torch.tensor([[0.2,0.3,0.5],[0.3,0.2,0.5],[0.4,0.4,0.2]])

#真实类别标签,此时无需再做one_hot,因为nll_loss会自动做
target=torch.tensor([1,0,2])

#对预测值取log
log=torch.log(pred)

#计算最终的结果
res=F.nll_loss(log, target)
print(res)# tensor(1.3391)
tensor(1.3391)

CrossEntropyLoss

log_softmax + nll_loss

In [67]:
torch.manual_seed(7) #cpu

# 4个样本,3分类
pred=torch.rand(4,3)

# 真实类别标签
target=torch.tensor([0,1,0,2])

# 需要对预测值进行 log_softmax操作
logsoftmax = F.log_softmax(pred, dim=1)
print(logsoftmax)

res = F.nll_loss(logsoftmax,target)

print()
print('log_softmax + nll_loss:')
print(res) 
tensor([[-1.0462, -1.3823, -0.9219],
        [-0.8950, -1.3192, -1.1269],
        [-1.3076, -0.8850, -1.1494],
        [-1.0095, -1.0058, -1.3098]])

log_softmax + nll_loss:
tensor(1.2457)
In [68]:
print('直接使用CrossEntropyLoss:')
res=F.cross_entropy(pred, target)
print(res) 
直接使用CrossEntropyLoss:
tensor(1.2457)
In [ ]: