import torch
from d2l import torch as d2l
from mxnet import autograd, gluon, np, npx
from d2l import mxnet as d2l

from functools import partial
import jax
import optax
from jax import numpy as jnp
from d2l import jax as d2l
No GPU/TPU found, falling back to CPU. (Set TF_CPP_MIN_LOG_LEVEL=0 and rerun for more info.)
import tensorflow as tf
from d2l import tensorflow as d2l

4.3.1. Classifier_


class Classifier(d2l.Module): #@save
  """The base class of classification models."""
  def validation_step(self, batch):
    Y_hat = self(*batch[:-1])
    self.plot('loss', self.loss(Y_hat, batch[-1]), train=False)
    self.plot('acc', self.accuracy(Y_hat, batch[-1]), train=False)

We define the Classifier class below. In the validation_step we report both the loss value and the classification accuracy on a validation batch. We draw an update for every num_val_batches batches. This has the benefit of generating the averaged loss and accuracy on the whole validation data. These average numbers are not exactly correct if the last batch contains fewer examples, but we ignore this minor difference to keep the code simple.

class Classifier(d2l.Module): #@save
  """The base class of classification models."""
  def validation_step(self, batch):
    Y_hat = self(*batch[:-1])
    self.plot('loss', self.loss(Y_hat, batch[-1]), train=False)
    self.plot('acc', self.accuracy(Y_hat, batch[-1]), train=False)

We define the Classifier class below. In the validation_step we report both the loss value and the classification accuracy on a validation batch. We draw an update for every num_val_batches batches. This has the benefit of generating the averaged loss and accuracy on the whole validation data. These average numbers are not exactly correct if the last batch contains fewer examples, but we ignore this minor difference to keep the code simple.

We also redefine the training_step method for JAX since all models that will subclass Classifier later will have a loss that returns auxiliary data. This auxiliary data can be used for models with batch normalization (to be explained in Section 8.5), while in all other cases we will make the loss also return a placeholder (empty dictionary) to represent the auxiliary data.

class Classifier(d2l.Module): #@save
  """The base class of classification models."""
  def training_step(self, params, batch, state):
    # Here value is a tuple since models with BatchNorm layers require
    # the loss to return auxiliary data
    value, grads = jax.value_and_grad(
      self.loss, has_aux=True)(params, batch[:-1], batch[-1], state)
    l, _ = value
    self.plot("loss", l, train=True)
    return value, grads

  def validation_step(self, params, batch, state):
    # Discard the second returned value. It is used for training models
    # with BatchNorm layers since loss also returns auxiliary data
    l, _ = self.loss(params, batch[:-1], batch[-1], state)
    self.plot('loss', l, train=False)
    self.plot('acc', self.accuracy(params, batch[:-1], batch[-1], state),

We define the Classifier class below. In the validation_step we report both the loss value and the classification accuracy on a validation batch. We draw an update for every num_val_batches batches. This has the benefit of generating the averaged loss and accuracy on the whole validation data. These average numbers are not exactly correct if the last batch contains fewer examples, but we ignore this minor difference to keep the code simple.

class Classifier(d2l.Module): #@save
  """The base class of classification models."""
  def validation_step(self, batch):
    Y_hat = self(*batch[:-1])
    self.plot('loss', self.loss(Y_hat, batch[-1]), train=False)
    self.plot('acc', self.accuracy(Y_hat, batch[-1]), train=False)


@d2l.add_to_class(d2l.Module) #@save
def configure_optimizers(self):
  return torch.optim.SGD(self.parameters(), lr=self.lr)
@d2l.add_to_class(d2l.Module) #@save
def configure_optimizers(self):
  params = self.parameters()
  if isinstance(params, list):
    return d2l.SGD(params, self.lr)
  return gluon.Trainer(params, 'sgd', {'learning_rate': self.lr})
@d2l.add_to_class(d2l.Module) #@save
def configure_optimizers(self):
  return optax.sgd(self.lr)
@d2l.add_to_class(d2l.Module) #@save
def configure_optimizers(self):
  return tf.keras.optimizers.SGD(self.lr)

4.3.2. 準確性

給定預測概率分布y_hat,每當我們必須輸出硬預測時,我們通常會選擇預測概率最高的類別。事實上,許多應用程序需要我們做出選擇。例如,Gmail 必須將電子郵件分類為“主要”、“社交”、“更新”、“論壇”或“垃圾郵件”。它可能會在內部估計概率,但最終它必須在類別中選擇一個。

當預測與標簽 class 一致時y,它們是正確的。分類準確度是所有正確預測的分數。盡管直接優化精度可能很困難(不可微分),但它通常是我們最關心的性能指標。它通常是基準測試中的相關數量因此,我們幾乎總是在訓練分類器時報告它。

準確度計算如下。首先,如果y_hat是一個矩陣,我們假設第二個維度存儲每個類別的預測分數。我們使用argmax每行中最大條目的索引來獲取預測類。然后我們將預測的類別與真實的元素進行比較y由于相等運算符== 對數據類型敏感,因此我們轉換 的y_hat數據類型以匹配 的數據類型y結果是一個包含條目 0(假)和 1(真)的張量。求和得出正確預測的數量。

@d2l.add_to_class(Classifier) #@save
def accuracy(self, Y_hat, Y, averaged=True):
  """Compute the number of correct predictions."""
  Y_hat = Y_hat.reshape((-1, Y_hat.shape[-1]))
  preds = Y_hat.argmax(axis=1).type(Y.dtype)
  compare = (preds == Y.reshape(-1)).type(torch.float32)
  return compare.mean() if averaged else compare
@d2l.add_to_class(Classifier) #@save
def accuracy(self, Y_hat, Y, averaged=True):
  """Compute the number of correct predictions."""
  Y_hat = Y_hat.reshape((-1, Y_hat.shape[-1]))
  preds = Y_hat.argmax(axis=1).astype(Y.dtype)
  compare = (preds == Y.reshape(-1)).astype(np.float32)
  return compare.mean() if averaged else compare

@d2l.add_to_class(d2l.Module) #@save
def get_scratch_params(self):
  params = []
  for attr in dir(self):
    a = getattr(self, attr)
    if isinstance(a, np.ndarray):
    if isinstance(a, d2l.Module):
  return params

@d2l.add_to_class(d2l.Module) #@save
def parameters(self):
  params = self.collect_params()
  params = self.collect_params()
return params if isinstance(params, gluon.parameter.ParameterDict) and

