From e7e9241d23e8d29b80f6b756b061e49689be5a44 Mon Sep 17 00:00:00 2001 From: monoid Date: Sat, 13 Feb 2021 00:26:53 +0900 Subject: [PATCH] broadcasting gradient --- layer.py | 62 ++++++++++++++++++++++++++++---------------------------- p2.py | 22 +++++++++++--------- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/layer.py b/layer.py index 0e3e9bd..94235f2 100644 --- a/layer.py +++ b/layer.py @@ -6,6 +6,31 @@ import io #only scalar gradient #op must be tree. 그래프 구현할려면, 위상정렬해서 순회해야하기 때문에 그렇게 하지 않음. +def broadcasting_be(a,b): + i = len(a)-1 + j = len(b)-1 + abroad = [] + bbroad = [] + while i >= 0 and j >= 0: + if a[i] == b[j]: + abroad.insert(0,1) + bbroad.insert(0,1) + elif a[i] == 1 or b[j] == 1: + abroad.insert(0,b[j]) + bbroad.insert(0,a[i]) + else: + raise ValueError + i -= 1 + j -= 1 + while i >= 0: + bbroad.insert(0,a[i]) + i -= 1 + while j >= 0: + abroad.insert(0,b[j]) + j -= 1 + return abroad, bbroad + + class NonExistVarableError(ValueError): pass @@ -91,13 +116,15 @@ class AddOp(OpTree): return self.v def backprop(self,seed): - #borad_casted = self.a.shape != self.b.shape - #np.ones((1,b.shape[1])) #a + b + ashape, bshape = broadcasting_be(self.a.numpy().shape,self.b.numpy().shape) + aai = np.where(np.array(ashape) != 1) + bbi = np.where(np.array(bshape) != 1) if isinstance(self.a,OpTree): - self.a.backprop(seed) + self.a.backprop(np.sum(seed,axis=tuple(aai[0]))) if isinstance(self.b,OpTree): - self.b.backprop(seed) + self.b.backprop(np.sum(seed,axis=tuple(bbi[0]))) + def addmul(a,b): return AddOp(a,b) @@ -163,30 +190,3 @@ class Variable(OpTree): writer.write(f'{id(self)}["Variable{self.x.shape}"]\n') def backprop(self,seed): self.grad = seed - - -""" -input_var = Variable(np.array([[1],[2],[3]])) -weight = Variable(np.array([[2,-1,1]])) -v = relu(weight @ input_var) -print(f"result : {v.numpy()}") -v.backprop(np.ones(())) -print(f"grad input : {input_var.grad}, w : {weight.grad}") -""" - -#input_diff = Variable(np.array([[1.01],[2],[3]])) -#v_diff = relu(weight @ input_diff) -#print(f"diff 1 : {(np.sum(v_diff.numpy()) - v.numpy()) / 0.01}") - -#i -= grad * delta - -""" -graph TD -2284612545696["Variable(1, 3)"] -2284612545696-->2284612624880[MatmulOp] -2284612544496["Variable(3, 2)"] -2284612544496-->2284612624880[MatmulOp] -2284612624880-->2284612625072[FunctionReluOp] -2284612625072-->2284612627856[MatmulOp] -2284612627856-->Result -""" \ No newline at end of file diff --git a/p2.py b/p2.py index 62248b4..d745fd5 100644 --- a/p2.py +++ b/p2.py @@ -1,7 +1,7 @@ from layer import * import numpy as np import pickle - +""" DIMENTION = 3 VAR_RANGE = 1 N = 10 @@ -18,15 +18,17 @@ y = or_weight @ input_x + or_bias error = gen.normal(0,SIGMA,size = (1,N)) y += error print(y) +""" +input_var = Variable(np.array([[1,2,3],[1,5,0]])) +weight = Variable(np.array([[2],[-1],[1]])) +bias = Variable(np.array([1])) +v = relu((input_var @ weight) + bias) +#print(v.numpy()) +#print(v.numpy().shape, np.array([[1,1]]).shape) +k = matmul(np.array([[1,1]]), v) +print(make_mermaid_graph(k)) -input_var = Variable(np.array([[1],[2],[3]])) -weight = Variable(np.array([[2,-1,1]])) -bias = Variable(np.array([[1]])) -v = ((weight @ input_var) + bias) - -print(make_mermaid_graph(v)) - -print(f"result : {v.numpy()}") -v.backprop(np.ones(())) +print(f"result : {k.numpy()}") +k.backprop(np.ones(())) print(f"grad input : {input_var.grad}, w : {weight.grad}, b : {bias.grad}")