Hallo,
ich habe versucht den Backpropagation Algorithmus umzusetzen (was offensichtlich nicht funktioniert) und mir dabei folgende Formeln überlegt, wobei W' und b' jeweils die neuen Gewichte bzw. Biases sind:

Die Matrix W (in diesem Fall für den Layer mit den Neuronen 3 und 4) sieht dabei wie folgt aus:

Sollten diese Formeln stimmen, muss der Fehler in meiner Implementation liegen:
[CODE lang="java" title="Backpropagation Algorithmus"]/**
*
* @param net {@link NeuralNetwork} that will be trained
* @param in {@link Matrix} of input data; each row represents one sample
* @param out {@link Matrix} of output data; each row represents the expected
* output vector of the referring input
* @param alpha learning rate
* @return the trained {@link NeuralNetwork}
*/
public NeuralNetwork backprop(NeuralNetwork net, Matrix in, Matrix out, double alpha) {
Objects.requireNonNull(net, "The specified NeuralNetwork may not be null");
Objects.requireNonNull(in, "The specified input Matrix may not be null");
Objects.requireNonNull(out, "The specified output Matrix may no be null");
if (in.numberOfRows() != out.numberOfRows())
throw new IllegalArgumentException(
"The number of rows of the input matrix must equal the number of rows of the output matrix");
int a = 0;
while (a < 1000000) {
// iterate over all samples
for (int s = 0; s < in.numberOfRows(); s++) {
// the current sample
Vector sample = in.getRow(s);
// the expected output for the current sample
Vector expectedOutput = out.getRow(s);
// the actual output for the current sample
Vector output = net.predict(sample);
// the error of the current sample
Vector error = expectedOutput.sub(output);
// create variable layer and set its value to the output layer
Layer layer = net.getOutputLayer();
// update the error-vector of the output layer: delta = error x g'(input)
layer.setError(error
.elementProduct(layer.getInput().map(x -> net.fct.applyDerivation(x))));
// calculate new weights and biases for the output layer
calculateWeightsInOutputLayer(layer, alpha, sample);
// move backwards in the network
layer = layer.getPreviousLayer();
// iterate over all hidden layer
while (layer != null) {
// calculate new weights and biases for the hidden layer
calculateWeightsInHiddenLayer(layer, alpha, sample);
layer = layer.getPreviousLayer();
}
// update weights and biases in the output layer
layer = net.getOutputLayer();
layer.updateWeightsAndBiases();
layer = layer.getPreviousLayer();
// iterate over all hidden layers and update weights and biases
while (layer != null) {
layer.updateWeightsAndBiases();
layer = layer.getPreviousLayer();
}
}
a++;
}
return net;
}
private void calculateWeightsInOutputLayer(Layer l, double alpha, Vector sample) {
Matrix newWeights = Matrix.clone(l.getInputWeights());
Vector newBiases = Vector.clone(l.getBiases());
// error of the layer
Matrix delta = l.getError().toMatrix();
// outputs of the previous layer / input to the net if previousLyer is null
Matrix inputs = (l.getPreviousLayer() == null ? sample : l.getPreviousLayer().getOutput()).toMatrix();
// calculate new weights
newWeights = l.getInputWeights().add(delta.mul(inputs.transpose()).mul(alpha));
// calculate new biases
newBiases = l.getBiases().add(l.getError().mul(alpha));
// update new weights
l.setNewInpWeights(newWeights);
// update new biases
l.setNewBiases(newBiases);
}
private void calculateWeightsInHiddenLayer(Layer l, double alpha, Vector sample) {
ActivationFunction fct = l.getActivationFunction();
Layer next = l.getNextLayer();
// update error-vector
l.setError(l.getInput().map(x -> fct.applyDerivation(x))
.elementProduct(next.getInputWeights().transpose().mul(next.getError())));
Matrix newWeights = Matrix.clone(l.getInputWeights());
Vector newBiases = Vector.clone(l.getBiases());
// error of the layer
Matrix delta = l.getError().toMatrix();
// outputs of the previous layer / input to the net if previousLyer is null
Matrix inputs = (l.getPreviousLayer() == null ? sample : l.getPreviousLayer().getOutput()).toMatrix();
// calculate new weights
newWeights = l.getInputWeights().add(delta.mul(inputs.transpose()).mul(alpha));
// calculate new biases
newBiases = l.getBiases().add(l.getError().mul(alpha));
// update new weights
l.setNewInpWeights(newWeights);
// update new biases
l.setNewBiases(newBiases);
}[/CODE]
Und hier noch die Klasse Layer:
[CODE lang="java" title="Layer"]package neural_network;
import data.function.activation.ActivationFunction;
import data.math.Matrix;
import data.math.Vector;
public class Layer {
private final ActivationFunction fct;
private Matrix inpWeights;
private Matrix newInpWeights;
private Vector biases;
private Vector newBiases;
private final Layer prevLayer;
private Layer nextLayer;
private Vector input = null;
private Vector output = null;
private Vector error;
/**
* Initializes all weights and biases with random values between the specified
* limits
*
* @param numberOfNeurons number of neurons in the layer
* @param numberOfInputs number of inputs to the layer (= the number of outputs
* in the previous {@link Layer}; = the number of input
* weights to each neuron)
* @param lowerLimit lower limit for random values
* @param upperLimit upper limit for random values
* @param fct {@link ActivationFunction}
* @param prevLayer the previous {@link Layer}
* @param nextLayer the next {@link Layer}
*/
public Layer(int numberOfNeurons, int numberOfInputs, double lowerLimit, double upperLimit,
ActivationFunction fct, Layer prevLayer, Layer nextLayer) {
this.fct = fct;
this.biases = Vector.random(numberOfNeurons, lowerLimit, upperLimit);
this.inpWeights = Matrix.random(numberOfNeurons, numberOfInputs, lowerLimit, upperLimit);
this.prevLayer = prevLayer;
this.nextLayer = nextLayer;
}
/**
* Takes a {@link Vector} and feeds the data through the {@link Layer} and
* returns the result-vector
*
* @param input {@link Vector} of inputs
* @return a {@link Vector} with the results
*/
public Vector feedForward(Vector input) {
this.input = inpWeights.mul(input).add(biases);
output = this.input.map(x -> fct.apply(x));
return output;
}
public void updateWeightsAndBiases() {
inpWeights = newInpWeights;
biases = newBiases;
}
/**
* @return the number of neurons in the {@link Layer}
*/
public int numberOfNeurons() {
return biases.dimension();
}
/**
* @return the {@link Matrix} of input weights
*/
public Matrix getInputWeights() {
return inpWeights;
}
/**
* @return the {@link Matrix} of inputs weights of the next {@link Layer} (=
* {@link Matrix} of output weights of the current {@link Layer}) or
* null if the next {@link Layer} is null
*/
public Matrix getOutputWeights() {
return nextLayer == null ? null : nextLayer.inpWeights;
}
/**
* @return the {@link Vector} of biases
*/
public Vector getBiases() {
return biases;
}
public void setNewInpWeights(Matrix newInpWeights) {
this.newInpWeights = newInpWeights;
}
public void setNewBiases(Vector newBiases) {
this.newBiases = newBiases;
}
public Layer getPreviousLayer() {
return prevLayer;
}
public Layer getNextLayer() {
return nextLayer;
}
public void setNextLayer(Layer nextLayer) {
this.nextLayer = nextLayer;
}
public Vector getInput() {
return input;
}
public Vector getOutput() {
return output;
}
public ActivationFunction getActivationFunction() {
return fct;
}
public Vector getError() {
return error;
}
public void setError(Vector error) {
this.error = error;
}
}
[/CODE]
Leider bin ich etwas ratlos, wo der Fehler liegt.
ich habe versucht den Backpropagation Algorithmus umzusetzen (was offensichtlich nicht funktioniert) und mir dabei folgende Formeln überlegt, wobei W' und b' jeweils die neuen Gewichte bzw. Biases sind:

Die Matrix W (in diesem Fall für den Layer mit den Neuronen 3 und 4) sieht dabei wie folgt aus:

Sollten diese Formeln stimmen, muss der Fehler in meiner Implementation liegen:
[CODE lang="java" title="Backpropagation Algorithmus"]/**
*
* @param net {@link NeuralNetwork} that will be trained
* @param in {@link Matrix} of input data; each row represents one sample
* @param out {@link Matrix} of output data; each row represents the expected
* output vector of the referring input
* @param alpha learning rate
* @return the trained {@link NeuralNetwork}
*/
public NeuralNetwork backprop(NeuralNetwork net, Matrix in, Matrix out, double alpha) {
Objects.requireNonNull(net, "The specified NeuralNetwork may not be null");
Objects.requireNonNull(in, "The specified input Matrix may not be null");
Objects.requireNonNull(out, "The specified output Matrix may no be null");
if (in.numberOfRows() != out.numberOfRows())
throw new IllegalArgumentException(
"The number of rows of the input matrix must equal the number of rows of the output matrix");
int a = 0;
while (a < 1000000) {
// iterate over all samples
for (int s = 0; s < in.numberOfRows(); s++) {
// the current sample
Vector sample = in.getRow(s);
// the expected output for the current sample
Vector expectedOutput = out.getRow(s);
// the actual output for the current sample
Vector output = net.predict(sample);
// the error of the current sample
Vector error = expectedOutput.sub(output);
// create variable layer and set its value to the output layer
Layer layer = net.getOutputLayer();
// update the error-vector of the output layer: delta = error x g'(input)
layer.setError(error
.elementProduct(layer.getInput().map(x -> net.fct.applyDerivation(x))));
// calculate new weights and biases for the output layer
calculateWeightsInOutputLayer(layer, alpha, sample);
// move backwards in the network
layer = layer.getPreviousLayer();
// iterate over all hidden layer
while (layer != null) {
// calculate new weights and biases for the hidden layer
calculateWeightsInHiddenLayer(layer, alpha, sample);
layer = layer.getPreviousLayer();
}
// update weights and biases in the output layer
layer = net.getOutputLayer();
layer.updateWeightsAndBiases();
layer = layer.getPreviousLayer();
// iterate over all hidden layers and update weights and biases
while (layer != null) {
layer.updateWeightsAndBiases();
layer = layer.getPreviousLayer();
}
}
a++;
}
return net;
}
private void calculateWeightsInOutputLayer(Layer l, double alpha, Vector sample) {
Matrix newWeights = Matrix.clone(l.getInputWeights());
Vector newBiases = Vector.clone(l.getBiases());
// error of the layer
Matrix delta = l.getError().toMatrix();
// outputs of the previous layer / input to the net if previousLyer is null
Matrix inputs = (l.getPreviousLayer() == null ? sample : l.getPreviousLayer().getOutput()).toMatrix();
// calculate new weights
newWeights = l.getInputWeights().add(delta.mul(inputs.transpose()).mul(alpha));
// calculate new biases
newBiases = l.getBiases().add(l.getError().mul(alpha));
// update new weights
l.setNewInpWeights(newWeights);
// update new biases
l.setNewBiases(newBiases);
}
private void calculateWeightsInHiddenLayer(Layer l, double alpha, Vector sample) {
ActivationFunction fct = l.getActivationFunction();
Layer next = l.getNextLayer();
// update error-vector
l.setError(l.getInput().map(x -> fct.applyDerivation(x))
.elementProduct(next.getInputWeights().transpose().mul(next.getError())));
Matrix newWeights = Matrix.clone(l.getInputWeights());
Vector newBiases = Vector.clone(l.getBiases());
// error of the layer
Matrix delta = l.getError().toMatrix();
// outputs of the previous layer / input to the net if previousLyer is null
Matrix inputs = (l.getPreviousLayer() == null ? sample : l.getPreviousLayer().getOutput()).toMatrix();
// calculate new weights
newWeights = l.getInputWeights().add(delta.mul(inputs.transpose()).mul(alpha));
// calculate new biases
newBiases = l.getBiases().add(l.getError().mul(alpha));
// update new weights
l.setNewInpWeights(newWeights);
// update new biases
l.setNewBiases(newBiases);
}[/CODE]
Und hier noch die Klasse Layer:
[CODE lang="java" title="Layer"]package neural_network;
import data.function.activation.ActivationFunction;
import data.math.Matrix;
import data.math.Vector;
public class Layer {
private final ActivationFunction fct;
private Matrix inpWeights;
private Matrix newInpWeights;
private Vector biases;
private Vector newBiases;
private final Layer prevLayer;
private Layer nextLayer;
private Vector input = null;
private Vector output = null;
private Vector error;
/**
* Initializes all weights and biases with random values between the specified
* limits
*
* @param numberOfNeurons number of neurons in the layer
* @param numberOfInputs number of inputs to the layer (= the number of outputs
* in the previous {@link Layer}; = the number of input
* weights to each neuron)
* @param lowerLimit lower limit for random values
* @param upperLimit upper limit for random values
* @param fct {@link ActivationFunction}
* @param prevLayer the previous {@link Layer}
* @param nextLayer the next {@link Layer}
*/
public Layer(int numberOfNeurons, int numberOfInputs, double lowerLimit, double upperLimit,
ActivationFunction fct, Layer prevLayer, Layer nextLayer) {
this.fct = fct;
this.biases = Vector.random(numberOfNeurons, lowerLimit, upperLimit);
this.inpWeights = Matrix.random(numberOfNeurons, numberOfInputs, lowerLimit, upperLimit);
this.prevLayer = prevLayer;
this.nextLayer = nextLayer;
}
/**
* Takes a {@link Vector} and feeds the data through the {@link Layer} and
* returns the result-vector
*
* @param input {@link Vector} of inputs
* @return a {@link Vector} with the results
*/
public Vector feedForward(Vector input) {
this.input = inpWeights.mul(input).add(biases);
output = this.input.map(x -> fct.apply(x));
return output;
}
public void updateWeightsAndBiases() {
inpWeights = newInpWeights;
biases = newBiases;
}
/**
* @return the number of neurons in the {@link Layer}
*/
public int numberOfNeurons() {
return biases.dimension();
}
/**
* @return the {@link Matrix} of input weights
*/
public Matrix getInputWeights() {
return inpWeights;
}
/**
* @return the {@link Matrix} of inputs weights of the next {@link Layer} (=
* {@link Matrix} of output weights of the current {@link Layer}) or
* null if the next {@link Layer} is null
*/
public Matrix getOutputWeights() {
return nextLayer == null ? null : nextLayer.inpWeights;
}
/**
* @return the {@link Vector} of biases
*/
public Vector getBiases() {
return biases;
}
public void setNewInpWeights(Matrix newInpWeights) {
this.newInpWeights = newInpWeights;
}
public void setNewBiases(Vector newBiases) {
this.newBiases = newBiases;
}
public Layer getPreviousLayer() {
return prevLayer;
}
public Layer getNextLayer() {
return nextLayer;
}
public void setNextLayer(Layer nextLayer) {
this.nextLayer = nextLayer;
}
public Vector getInput() {
return input;
}
public Vector getOutput() {
return output;
}
public ActivationFunction getActivationFunction() {
return fct;
}
public Vector getError() {
return error;
}
public void setError(Vector error) {
this.error = error;
}
}
[/CODE]
Leider bin ich etwas ratlos, wo der Fehler liegt.