Minimalistic Neural Network

tnlogy · March 22, 2024, 1:26pm

Hi, since I try to refresh my knowledge in machine learning I wrote a small neural network in Codea. It doesn’t do so much to see yet, but it can learn given a set of training examples, which is quite nice for a neural network in under 200 lines of code.

Credits go to karpathy for creating micrograd and explaining it in his youtube video.

--# Main
-- NN

function setup()
    -- Neural Network of 3 layers with 3 inputs
    local x = {Value(1), Value(1), Value(1)}
    local layer1 = Layer(3, 4) -- layer of 4 neurons with 3 inputs
    local layer2 = Layer(4, 4) -- layer of 4 neurons with 4 inputs
    local layer3 = Layer(4, 1) -- layer of 1 neuron with 4 inputs
    
    -- try to learn the output values y_train for the given test examples x_train
    x_train = {
        {2.0, 3.0, -1.0},
        {3.0, -1.0, 0.5},
        {0.5, 1.0, 1.0},
        {1.0, 1.0, -1.0}
    }
    
    y_train = {1.0, -1.0, -1.0, 1.0}
    
    local alpha = 0.01;
    for epoch = 1,200 do
        local total_loss = 0
        -- for each training example, calculate the loss and update the weights
        for i, x_in in ipairs(x_train) do
            local input = {}
            for j, x in ipairs(x_in) do
                table.insert(input, Value(x))
            end
            local out = layer3:call(layer2:call(layer1:call(input)))
            local loss = (out[1] - Value(y_train[i])):pow(Value(2))
            loss:backward()
            total_loss = total_loss + loss.value
            
            -- learn new values for the weights of the network
            layer1:update_grad(alpha)
            layer2:update_grad(alpha)
            layer3:update_grad(alpha)
        end
        if epoch % 20 == 0 then
            print("epoch:", epoch, " total loss:", total_loss)
        end
    end
    
    for i, x_in in ipairs(x_train) do
        local input = {}
        for j, x in ipairs(x_in) do
            table.insert(input, Value(x))
        end
        
        print("NN output:", layer3:call(layer2:call(layer1:call(input)))[1].value, "Actual value:", y_train[i])
    end
end

function example_linear_regression()
    -- try to learn values for this function
    function f_target(x)
        return 4 * x - 5
    end
    
    local x = Value(2)
    local w = Value(3)
    local b = Value(4)
    
    local alpha = 0.01
    for i = 1,300 do
        local total_loss = 0
        for x_in = 0,10 do
            local f = w * Value(x_in) + b
            local y_hat = Value(f_target(x_in))
            local loss = (f - y_hat):pow(Value(2))
            loss:backward()
            w.value = w.value - alpha * w.grad -- learn new values for w and b
            b.value = b.value - alpha * b.grad
            total_loss = total_loss + loss.value
        end
        print("epoch:", i, " total loss:", total_loss)
    end
    print("Learned w:", w.value, " b:", b.value) 
end

--# Value
Value = class()

VALUE = 0
ADD = 1
SUB = 2
MUL = 3
POW = 4
TANH = 5

function Value:init(value, op, left, right)
    self.value, self.op, self.left, self.right = value, op or VALUE, left, right    
    self.grad = 0
    local mt = getmetatable(self)
    
    mt.__mul = function (self, other)
        return Value(self.value * other.value, MUL, self, other)
    end
    mt.__add = function (self, other)
        return Value(self.value + other.value, ADD, self, other)
    end
    mt.__sub = function (self, other)
        return Value(self.value - other.value, SUB, self, other)
    end
end

function Value:pow(other)
    return Value(self.value ^ other.value, POW, self, other)
end

function Value:tanh()
    local v = math.exp(self.value * 2)
    return Value((v - 1) / (v + 1), TANH, self)
end

function Value:backward()
    local visited = {}
    local topo = {}
    local function build_topo(v)
        if not visited[v] then
            visited[v] = true
            if v.left then build_topo(v.left) end
            if v.right then build_topo(v.right) end
            v.grad = 0 -- zero grad
            table.insert(topo, v)
        end
    end
    build_topo(self)
    
    self.grad = 1
    for i = #topo, 1, -1 do
        topo[i]:calculate_local_gradient()
    end
end

function Value:calculate_local_gradient()
    if self.op == ADD or self.op == SUB then
        self.left.grad = self.left.grad + self.grad
        self.right.grad = self.right.grad + self.grad
    elseif self.op == MUL then
        self.left.grad = self.left.grad + self.right.value * self.grad
        self.right.grad = self.right.grad + self.left.value * self.grad
    elseif self.op == POW then
        self.left.grad = self.left.grad + (self.right.value * self.left.value ^ (self.right.value - 1)) * self.grad
    elseif self.op == TANH then
        self.left.grad = self.left.grad + (1 - self.value ^ 2) * self.grad
    end
end

--# NeuralNetwork
Neuron = class()

function Neuron:init(number_of_inputs)
    self.w = {}
    for i = 1,number_of_inputs do
        table.insert(self.w, Value(math.random(-1.0, 1.0)))
    end
    self.b = Value(0)
end

function Neuron:call(x)
    local sum = self.b
    for i, w in ipairs(self.w) do
        sum = sum + (w * x[i])
    end
    return sum:tanh()
end

function Neuron:update_grad(alpha)
    for i, w in ipairs(self.w) do
        w.value = w.value - alpha * w.grad
    end
    self.b.value = self.b.value - alpha * self.b.grad
end

Layer = class()

function Layer:init(number_of_inputs, number_of_outputs)
    self.neurons = {}
    for o = 1,number_of_outputs do
        table.insert(self.neurons, Neuron(number_of_inputs))
    end
end

function Layer:call(x)
    local res = {}
    for i, n in ipairs(self.neurons) do
        table.insert(res, n:call(x))
    end
    return res
end

function Layer:update_grad(alpha)
    for i, n in ipairs(self.neurons) do
        n:update_grad(alpha)
    end
end

UberGoober · March 22, 2024, 3:24pm

If it’s not too much to ask, can you explain how I would use this for something?

tnlogy · March 22, 2024, 3:57pm

I’ll try to make a better example later on. I recommend looking at the linked video of micrograd if you want to understand how it works.

But you could use it to learn to recognize different numbers you write on screen or use reinforcement learning to create an agent in a game.

Steppers · March 25, 2024, 9:20pm

@tnlogy This looks great! I’ve been tinkering with something similar also based on micrograd.

I’ve also added GPU inference using textures and shaders in my implementation. No GPU training though unfortunately.

@UberGoober This example could drop in to be used for the ‘AI’ in the finger chase game on WebRepo. Given additional context in the training (and longer training time), you could probably also devise some simple obstacle avoidance too.

In general you could probably ‘train’ a simple game AI using a small neural net to make decisions.

CodeaAI-0.1a.zip (9.2 KB) It takes a few seconds to start up for some reason so give it a moment.

UberGoober · March 25, 2024, 11:30pm

The decimal in the file name borks my ability to open it in Codea directly from the forum. Can you re-upload it without the decimal?

tnlogy · March 26, 2024, 8:49am

@Steppers thats brilliant! Fun to see that you are writing something similar!

gpu.lua:106: bad argument #2 to 'pack' (number expected, got nil)

I get this error when I try to run the code on my iPhone though.

It will be interesting to see how you used the gpu. I planned to use it as well, but haven’t had time for it. My idea was to store the nodes on a stack, so they I might be able to calculate some parts in parallell, the nodes at the same depth.

Looking forward to check out your code

UberGoober · March 28, 2024, 4:42am

I had to download the file to Files and then open it from there to get it on my Codea, but I did it. (would be great if you could get rid of that extra period).

The project crashes if I touch the screen three times or so.

Steppers · March 28, 2024, 8:50am

@tnlogy Honestly not sure what’s going on there. Does that happen every single time?

@UberGoober Updated zip with no decimals:
CodeaAI-v1.zip (9.2 KB)

tnlogy · March 28, 2024, 11:31am

Hmm, havent had time to figure it out. It works on my macbook pro when I run Codea on it. Seems like a value v is missing when you pack the data. Wierd. Can it be some difference in architecture between devices.

Thanks for the code, interesting to see how you made the gpu version. Lot of bit handling to get the data into and out of the texture