Merge pull request #12 from shivendrra/dev

pulling new changes from "dev" branch
shivendrra · Jul 28, 2024 · 6298c6f · 6298c6f
2 parents 6d45151 + eafafba
commit 6298c6f
Show file tree

Hide file tree

Showing 97 changed files with 4,641 additions and 155 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,7 +13,6 @@ __pycache__/
 
 # Distribution / packaging
 .Python
-build/
 develop-eggs/
 dist/
 downloads/

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -3,6 +3,7 @@ project(engine)
 
 set(CMAKE_CXX_STANDARD 14)
 
+set(pybind11_DIR "C:/Users/shivh/AppData/Roaming/Python/Python311/site-packages/pybind11/share/cmake/pybind11")
 find_package(pybind11 REQUIRED)
 
 set(CMAKE_C_COMPILER "C:/Program Files (x86)/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.XX.XXXXX/bin/Hostx64/x64/cl.exe")

diff --git a/README.md b/README.md
@@ -2,17 +2,7 @@
 
 ![axonlogo.png](https://github.com/shivendrra/axon/blob/main/logo.png)
 
-You have seen [Micrograd](https://github.com/karpathy/micrograd) by Karpathy, this is the upgraded version of micrograd written in c/c++ & is more functions & operational support. A light weight scalar-level autograd engine written in c/c++ & python
-
-## Features
-
-- Element-wise operations (addition, multiplication, etc.)
-- Matrix multiplication
-- Broadcasting
-- Activation functions (ReLU, tanh, sigmoid, GELU)
-- Reshape, transpose, flatten
-- Data type conversion
-and many more.. *(work in progress)*
+You have seen [Micrograd](https://github.com/karpathy/micrograd) by Karpathy, this is the upgraded version of micrograd written in c/c++ & has more functions & operational support. A light weight scalar-level autograd engine written in c/c++ & python
 
 ## Installation
 
@@ -23,51 +13,33 @@ git clone https://github.com/shivendrra/axon.git
 cd axon
 ```
 
-or
-
-```shell
-pip install axon
-```
-
 ## Usage
 
-Here is a sample example showing some operations on two 2D arrays:
+You can use this similar to micrograd to build a simple neural network or do scalar level backprop.
 
 ```python
-from axon.base import array
 
-# Create two 2D arrays
-a = array([[1, 2], [3, 4]], dtype='int32')
-b = array([[5, 6], [7, 8]], dtype='int32')
+from axon import value
 
-# Addition
-c = a + b
-print("Addition:\n", c)
+a = value(2)
+b = value(3)
 
-# Multiplication
+c = a + b
 d = a * b
-print("Multiplication:\n", d)
+e = c.relu()
+f = d ** 2.0
 
-# Matrix Multiplication
-e = a @ b
-print("Matrix Multiplication:\n", e)
-```
+f.backward()
 
-### Output:
-
-```
-Addition:
- array([6, 8], [10, 12], dtype=int32)
-Multiplication:
- array([5, 12], [21, 32], dtype=int32)
-Matrix Multiplication:
- array([19, 22], [43, 50], dtype=int32)
+print(a)
+print(b)
+print(c)
+print(d)
+print(e)
+print(f)
 ```
 
-anyway, prefer documentation for detailed usage guide:
-
-1. [axon.doc](https://github.com/shivendrra/axon/blob/main/docs/axon.md): for development purpose
-2. [usage.doc](https://github.com/shivendrra/axon/blob/main/docs/usage.md): for using it like numpy
+you can even checkout [example](https://github.com/shivendrra/axon/tree/main/examples) neural networks to run them on your system, or build your own :-D.
 
 ## Forking the Repository
 
@@ -97,6 +69,14 @@ git push origin my-feature-branch
 
 6. Create a pull request on the original repository.
 
+## Testing
+
+To run the unit tests you will have to install PyTorch, which the tests use as a reference for verifying the correctness of the calculated gradients. Then simply:
+
+```shell
+python -m pytest
+```
+
 ## Contributing
 
 We welcome contributions! Please follow these steps to contribute:

diff --git a/axon/__init__.py b/axon/__init__.py
@@ -1 +1,2 @@
-from .base import Value
+from .base import value
+from .nn import *
diff --git a/axon/base.py b/axon/base.py
@@ -2,25 +2,64 @@
 
 class value:
   def __init__(self, data):
-    self.value = engine.Value(data)
+    if isinstance(data, engine.Value):
+      self.value = data
+    else:
+      self.value = engine.Value(float(data))
+
+  @property
+  def data(self):
+    return self.value.data
 
-  def add(self, other):
-    return value(engine.Value.add(self.value, other.value))
+  @data.setter
+  def data(self, new_data):
+    self.value.data = new_data
 
-  def mul(self, other):
-    return value(engine.Value.mul(self.value, other.value))
+  @property
+  def grad(self):
+    return self.value.grad
+
+  @grad.setter
+  def grad(self, new_data):
+    self.value.grad = new_data
 
-  def pow_val(self, exp):
-    return value(engine.Value.pow_val(self.value, exp))
+  def __add__(self, other):
+    if isinstance(other, value):
+      return value(engine.Value.add(self.value, other.value))
+    return value(engine.Value.add(self.value, engine.Value(float(other))))
+
+  def __radd__(self, other):
+    return self + other
 
-  def negate(self):
+  def __mul__(self, other):
+    if isinstance(other, value):
+      return value(engine.Value.mul(self.value, other.value))
+    return value(engine.Value.mul(self.value, engine.Value(float(other))))
+
+  def __rmul__(self, other):
+    return self * other
+
+  def __pow__(self, exp):
+    return value(engine.Value.pow(self.value, exp))
+
+  def __neg__(self):
     return value(engine.Value.negate(self.value))
 
-  def sub(self, other):
-    return value(engine.Value.sub(self.value, other.value))
+  def __sub__(self, other):
+    if isinstance(other, value):
+      return value(engine.Value.sub(self.value, other.value))
+    return value(engine.Value.sub(self.value, engine.Value(float(other))))
+
+  def __rsub__(self, other):
+    return value(engine.Value.sub(engine.Value(float(other)), self.value))
 
-  def truediv(self, other):
-    return value(engine.Value.truediv(self.value, other.value))
+  def __truediv__(self, other):
+    if isinstance(other, value):
+      return value(engine.Value.truediv(self.value, other.value))
+    return value(engine.Value.truediv(self.value, engine.Value(float(other))))
+
+  def __rtruediv__(self, other):
+    return value(engine.Value.truediv(engine.Value(float(other)), self.value))
 
   def relu(self):
     return value(engine.Value.relu(self.value))
@@ -29,4 +68,7 @@ def backward(self):
     engine.Value.backward(self.value)
 
   def __repr__(self):
-    self.value.print_value()
+    return f"Value(data={self.value.data}, grad={self.value.grad})"
+
+  def __str__(self):
+    return self.__repr__()
diff --git a/axon/csrc/bindings.cpp b/axon/csrc/bindings.cpp
@@ -8,7 +8,9 @@ PYBIND11_MODULE(engine, m) {
     .def(py::init<double>())
     .def_readwrite("data", &Value::data)
     .def_readwrite("grad", &Value::grad)
-    .def("print_value", &Value::print_value)
+    .def("repr", &Value::repr)
+    .def("get_data", &Value::get_data)
+    .def("get_grad", &Value::get_grad)
     .def_static("add", &Value::add, py::return_value_policy::reference)
     .def_static("mul", &Value::mul, py::return_value_policy::reference)
     .def_static("pow", &Value::pow_val, py::return_value_policy::reference)

diff --git a/axon/csrc/engine.cpp b/axon/csrc/engine.cpp
@@ -89,6 +89,14 @@ void Value::backward(Value* v) {
   }
 }
 
-void Value::print_value() const {
-  std::cout << "Value(data=" << data << ", grad=" << grad << ")\n";
+std::string Value::repr() const {
+  return "Value(data=" + std::to_string(data) + ", grad=" + std::to_string(grad) + ")";
+}
+
+double Value::get_data() const {
+  return data;
+}
+
+double Value::get_grad() const {
+  return grad;
 }
diff --git a/axon/csrc/engine.h b/axon/csrc/engine.h
@@ -2,6 +2,7 @@
 #define ENGINE_H
 
 #include <vector>
+#include <string>
 
 class Value {
 public:
@@ -12,7 +13,7 @@ class Value {
   void (*_backward)(Value*);
 
   Value(double data);
-    
+
   static void noop_backward(Value* v);
 
   static Value* add(Value* a, Value* b);
@@ -34,7 +35,9 @@ class Value {
   static void build_topo(Value* v, std::vector<Value*>& topo, std::vector<Value*>& visited);
   static void backward(Value* v);
 
-  void print_value() const;
+  std::string repr() const;
+  double get_data() const;
+  double get_grad() const;
 };
 
 #endif
diff --git a/axon/nn.py b/axon/nn.py
@@ -1,5 +1,6 @@
 from .base import value
 import pickle
+import random
 
 class Module:
   def zero_grad(self):
@@ -22,4 +23,97 @@ def save(self, filename='model.pickle'):
   def load(self, filename='model.pickle'):
     with open(filename, 'rb') as f:
       state = pickle.load(f)
-    self.load_dict(state)
+    self.load_dict(state)
+
+class Neuron(Module):
+  def __init__(self, nin, nonlin=True) -> None:
+    super().__init__()
+    self.w = [value(random.uniform(-0.5,0.5)) for _ in range(nin)]
+    self.b = value(0)
+    self.nonlin = nonlin
+
+  def __call__(self, x):
+    act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
+    return act.relu() if self.nonlin else act
+
+  def parameters(self):
+    return self.w + [self.b]
+
+  def __repr__(self):
+    return f"{'ReLU' if self.nonlin else 'Linear'}Neuron({len(self.w)})"
+
+class Layer(Module):
+  def __init__(self, _in, _out, **kwargs) -> None:
+    super().__init__()
+    self.neurons = [Neuron(_in, **kwargs) for _ in range(_out)]
+
+  def __call__(self, x):
+    out = [n(x) for n in self.neurons]
+    return out[0] if len(out) == 1 else out
+
+  def parameters(self):
+    return [p for n in self.neurons for p in n.parameters()]
+
+  def __repr__(self) -> str:
+    return f"Layer of [{', '.join(str(n) for n in self.neurons)}]"
+
+class MLP(Module):
+  def __init__(self, nin, nouts):
+    sz = [nin] + nouts
+    self.layers = [Layer(sz[i], sz[i+1], nonlin=i!=len(nouts)-1) for i in range(len(nouts))]
+
+  def __call__(self, x):
+    for layer in self.layers:
+      x = layer(x)
+    return x
+
+  def parameters(self):
+    return [p for layer in self.layers for p in layer.parameters()]
+
+  def __repr__(self):
+    return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"
+
+class RNNCell(Module):
+  def __init__(self, input_size, hidden_size, nonlin=True):
+    super().__init__()
+    self.input_size = input_size
+    self.hidden_size = hidden_size
+    self.wx = [value(random.uniform(-0.5, 0.5)) for _ in range(input_size * hidden_size)]
+    self.wh = [value(random.uniform(-0.5, 0.5)) for _ in range(hidden_size * hidden_size)]
+    self.b = [value(0) for _ in range(hidden_size)]
+    self.nonlin = nonlin
+
+  def __call__(self, x, h):
+    wx = sum((self.wx[i] * x[i] for i in range(self.input_size)), 0)
+    wh = sum((self.wh[i] * h[i] for i in range(self.hidden_size)), 0)
+    act = wx + wh + self.b
+    return act.relu() if self.nonlin else act
+
+  def parameters(self):
+    return self.wx + self.wh + self.b
+
+  def __repr__(self):
+    return f"{'ReLU' if self.nonlin else 'Linear'}RNNCell({self.input_size}, {self.hidden_size})"
+
+
+class RNN(Module):
+  def __init__(self, input_size, hidden_size, output_size, num_layers=1):
+    super().__init__()
+    self.hidden_size = hidden_size
+    self.num_layers = num_layers
+    self.rnn_cells = [RNNCell(input_size, hidden_size) if i == 0 else RNNCell(hidden_size, hidden_size)
+                      for i in range(num_layers)]
+    self.output_layer = Layer(hidden_size, output_size)
+
+  def __call__(self, x, h=None):
+    if h is None:
+        h = [value(0) for _ in range(self.hidden_size)]
+    for rnn_cell in self.rnn_cells:
+        h = rnn_cell(x, h)
+    return self.output_layer(h)
+
+  def parameters(self):
+    return [p for rnn_cell in self.rnn_cells for p in rnn_cell.parameters()] + self.output_layer.parameters()
+
+  def __repr__(self):
+    return f"RNN of [{', '.join(str(rnn_cell) for rnn_cell in self.rnn_cells)}, {self.output_layer}]"
diff --git a/build/extra.txt b/build/extra.txt