View file src/neur/backprop.ijs - Download
NB. Backpropagation
NB. See https://www.miximum.fr/blog/introduction-au-deep-learning-2/
load 'stats'
sigma =: 3 : '1 % 1 + ^(-y)'
sigmaprime =: 3 : '(sigma y) * (1 - sigma y)' NB. derivative
nl =: 5 NB. Number of layers
npl =: 6 NB. Number of neurons per layer
n =: nl * npl NB. Total number of neurons
p =: 10
alpha =: 3 NB. Learning rate
nX =: 3 NB. Number of inputs
NB. Masks with 1 for non-zero values
maskW =: (<. (i. n) % npl) =/ (1 + <. (i. n) % npl) NB. Mask of connections : each neuron is connected only to the neurons of next layer
maskB =: (npl#0),(((nl-1)*npl)#1) NB. Mask of biases : no biases for input neurons
maskX =: (npl#1),(((nl-1)*npl)#0) NB. Mask of inputs : only the first layer
maskO =: (((nl-1)*npl)#0),(npl#1) NB. Mask of outputs : only the last layer
W =: maskW * (n,n) $ normalrand n^2 NB. Matrix of connection weights
NB. Element at i-th line and j-th column = weight of connection from neuron j to neuron i
B =: maskB * normalrand n NB. biases
X =: maskX * (? (n,nX) $ p) % p NB. Inputs, i-th column = vector representing the i-th input
T =: maskO * (? (n,nX) $ p) % p NB. Expected outputs
step =: 3 : 0
Z =: B + W +/ . * A NB. Aggregation : add biases and matrix product of weights by activations
A =: (maskX * X) + (1-maskX) * sigma Z NB. Activation : fixed values X for input neurons, sigma applied to aggregation for others
)
NB. One step of backpropagation
NB. delta^L_i = A - T
NB. delta^l_i = sigma'(z^l_i) * sum_j(w^{l+1}_{ji} delta^{l+1}_j
stepdelta =: 3 : 0
NB. delta =: (maskO * A - T) + (sigmaprime Z) * (|: W) +/ . * delta
delta =: (sigmaprime Z) * (maskO * A - T) + (|: W) +/ . * delta
)
NB. Step of learning
steplearn =: 3 : 0
A =: X NB. Initialize activation with input
(step^:(nl-1)) 0 NB. Repeat aggregation and activation nl-1 times
NB. delta^l_i = sigma'(z^l_i) * sum_j(w^{l+1}_{ji} delta^{l+1}_j)
delta =: (n,nX) $ 0
(stepdelta^:nl) 0 NB. repeat backpropagation nl times
avgdelta =: (+/ |: delta) % nX NB. Average delta
NB. Average gradient of weights for nX inputs
NB. dC/dw^l_{ij} = a^{l-1}_j delta^l_i
GW =: maskW * delta +/ . * |: A % nX
NB. Average gradient of biases for nX inputs
NB. dC/db_i = delta^l_i
GB =: maskB * avgdelta
NB. Modifiy weights and biases
W =: W - alpha * GW
B =: B - alpha * GB
)
NB. Repeat learning
(steplearn^:10000) 0
NB. Difference between computed and expected outputs
NB. echo maskO * A - T
echo 'T :'
echo (((nl-1)*npl) + i. npl) { T
echo 'A :'
echo (((nl-1)*npl) + i. npl) { A
echo 'A - T :'
echo (((nl-1)*npl) + i. npl) { A - T