import {
Array1D,
Graph,
Session,
NDArrayMathGPU,
} from 'deeplearn';
const math = new NDArrayMathGPU();
class ColorAccessibilityModel {
session;
inputTensor;
targetTensor;
predictionTensor;
costTensor;
...
prepareTrainingSet(trainingSet) {
math.scope(() => {
const { rawInputs, rawTargets } = trainingSet;
const inputArray = rawInputs.map(v => Array1D.new(this.normalizeColor(v)));
const targetArray = rawTargets.map(v => Array1D.new(v));
});
}
...
}
export default ColorAccessibilityModel;
第三,shuffle 輸入和目標陣列。shuffle 的時候,deeplearn.js 提供的 shuffler 將二者保存在 sync 中。每次訓練迭代都會出現 shuffle,以饋送不同的輸入作為神經網絡的 batch。整個 shuffle 流程可以改善訓練算法,因為它更可能通過避免過擬合來實現泛化。
import {
Array1D,
InCPUMemoryShuffledInputProviderBuilder,
Graph,
Session,
NDArrayMathGPU,
} from 'deeplearn';
const math = new NDArrayMathGPU();
class ColorAccessibilityModel {
session;
inputTensor;
targetTensor;
predictionTensor;
costTensor;
...
prepareTrainingSet(trainingSet) {
math.scope(() => {
const { rawInputs, rawTargets } = trainingSet;
const inputArray = rawInputs.map(v => Array1D.new(this.normalizeColor(v)));
const targetArray = rawTargets.map(v => Array1D.new(v));
const shuffledInputProviderBuilder = new InCPUMemoryShuffledInputProviderBuilder([
inputArray,
targetArray
]);
const [
inputProvider,
targetProvider,
] = shuffledInputProviderBuilder.getInputProviders();
});
}
...
}
export default ColorAccessibilityModel;
最后,饋送條目(feed entries)是訓練階段中神經網絡前饋算法的最終輸入。它匹配數據和張量(根據設置階段的形態而定義)。
import {
Array1D,
InCPUMemoryShuffledInputProviderBuilder
Graph,
Session,
NDArrayMathGPU,
} from 'deeplearn';
const math = new NDArrayMathGPU();
class ColorAccessibilityModel {
session;
inputTensor;
targetTensor;
predictionTensor;
costTensor;
feedEntries;
...
prepareTrainingSet(trainingSet) {
math.scope(() => {
const { rawInputs, rawTargets } = trainingSet;
const inputArray = rawInputs.map(v => Array1D.new(this.normalizeColor(v)));
const targetArray = rawTargets.map(v => Array1D.new(v));
const shuffledInputProviderBuilder = new InCPUMemoryShuffledInputProviderBuilder([
inputArray,
targetArray
]);
const [
inputProvider,
targetProvider,
] = shuffledInputProviderBuilder.getInputProviders();
this.feedEntries = [
{ tensor: this.inputTensor, data: inputProvider },
{ tensor: this.targetTensor, data: targetProvider },
];
});
}
...
}
export default ColorAccessibilityModel;
這樣,神經網絡的設置就結束了。神經網絡的所有層和單元都實現了,訓練集也準備好進行訓練了。現在只需要添加兩個配置神經網絡行為的超參數,它們適用于下個階段:訓練階段。
import {
Array1D,
InCPUMemoryShuffledInputProviderBuilder,
Graph,
Session,
SGDOptimizer,
NDArrayMathGPU,
} from 'deeplearn';
const math = new NDArrayMathGPU();
class ColorAccessibilityModel {
session;
optimizer;
batchSize = 300;
initialLearningRate = 0.06;
inputTensor;
targetTensor;
predictionTensor;
costTensor;
feedEntries;
constructor() {
this.optimizer = new SGDOptimizer(this.initialLearningRate);
}
...
}
export default ColorAccessibilityModel;
第一個參數是學習速率(learning rate)。學習速率決定算法的收斂速度,以最小化成本。我們應該假定它的數值很高,但實際上不能太高了。否則梯度下降就不會收斂,因為找不到局部最優值。
第二個參數是批尺寸(batch size)。它定義每個 epoch(迭代)里有多少個訓練集的數據點通過神經網絡。一個 epoch 等于一批數據點的一次正向傳播和一次反向傳播。以批次的方式訓練神經網絡有兩個好處:第一,這樣可以防止密集計算,因為算法訓練時使用了內存中的少量數據點;第二,這樣可以讓神經網絡更快地進行批處理,因為每個 epoch 中權重會隨著每個批次的數據點進行調整——而不是等到整個數據集訓練完之后再進行改動。
訓練階段
評論