broadcasting gradient
This commit is contained in:
parent
d8198c522b
commit
e7e9241d23
62
layer.py
62
layer.py
@ -6,6 +6,31 @@ import io
|
||||
#only scalar gradient
|
||||
#op must be tree. 그래프 구현할려면, 위상정렬해서 순회해야하기 때문에 그렇게 하지 않음.
|
||||
|
||||
def broadcasting_be(a,b):
|
||||
i = len(a)-1
|
||||
j = len(b)-1
|
||||
abroad = []
|
||||
bbroad = []
|
||||
while i >= 0 and j >= 0:
|
||||
if a[i] == b[j]:
|
||||
abroad.insert(0,1)
|
||||
bbroad.insert(0,1)
|
||||
elif a[i] == 1 or b[j] == 1:
|
||||
abroad.insert(0,b[j])
|
||||
bbroad.insert(0,a[i])
|
||||
else:
|
||||
raise ValueError
|
||||
i -= 1
|
||||
j -= 1
|
||||
while i >= 0:
|
||||
bbroad.insert(0,a[i])
|
||||
i -= 1
|
||||
while j >= 0:
|
||||
abroad.insert(0,b[j])
|
||||
j -= 1
|
||||
return abroad, bbroad
|
||||
|
||||
|
||||
class NonExistVarableError(ValueError):
|
||||
pass
|
||||
|
||||
@ -91,13 +116,15 @@ class AddOp(OpTree):
|
||||
return self.v
|
||||
|
||||
def backprop(self,seed):
|
||||
#borad_casted = self.a.shape != self.b.shape
|
||||
#np.ones((1,b.shape[1]))
|
||||
#a + b
|
||||
ashape, bshape = broadcasting_be(self.a.numpy().shape,self.b.numpy().shape)
|
||||
aai = np.where(np.array(ashape) != 1)
|
||||
bbi = np.where(np.array(bshape) != 1)
|
||||
if isinstance(self.a,OpTree):
|
||||
self.a.backprop(seed)
|
||||
self.a.backprop(np.sum(seed,axis=tuple(aai[0])))
|
||||
if isinstance(self.b,OpTree):
|
||||
self.b.backprop(seed)
|
||||
self.b.backprop(np.sum(seed,axis=tuple(bbi[0])))
|
||||
|
||||
|
||||
def addmul(a,b):
|
||||
return AddOp(a,b)
|
||||
@ -163,30 +190,3 @@ class Variable(OpTree):
|
||||
writer.write(f'{id(self)}["Variable{self.x.shape}"]\n')
|
||||
def backprop(self,seed):
|
||||
self.grad = seed
|
||||
|
||||
|
||||
"""
|
||||
input_var = Variable(np.array([[1],[2],[3]]))
|
||||
weight = Variable(np.array([[2,-1,1]]))
|
||||
v = relu(weight @ input_var)
|
||||
print(f"result : {v.numpy()}")
|
||||
v.backprop(np.ones(()))
|
||||
print(f"grad input : {input_var.grad}, w : {weight.grad}")
|
||||
"""
|
||||
|
||||
#input_diff = Variable(np.array([[1.01],[2],[3]]))
|
||||
#v_diff = relu(weight @ input_diff)
|
||||
#print(f"diff 1 : {(np.sum(v_diff.numpy()) - v.numpy()) / 0.01}")
|
||||
|
||||
#i -= grad * delta
|
||||
|
||||
"""
|
||||
graph TD
|
||||
2284612545696["Variable(1, 3)"]
|
||||
2284612545696-->2284612624880[MatmulOp]
|
||||
2284612544496["Variable(3, 2)"]
|
||||
2284612544496-->2284612624880[MatmulOp]
|
||||
2284612624880-->2284612625072[FunctionReluOp]
|
||||
2284612625072-->2284612627856[MatmulOp]
|
||||
2284612627856-->Result
|
||||
"""
|
22
p2.py
22
p2.py
@ -1,7 +1,7 @@
|
||||
from layer import *
|
||||
import numpy as np
|
||||
import pickle
|
||||
|
||||
"""
|
||||
DIMENTION = 3
|
||||
VAR_RANGE = 1
|
||||
N = 10
|
||||
@ -18,15 +18,17 @@ y = or_weight @ input_x + or_bias
|
||||
error = gen.normal(0,SIGMA,size = (1,N))
|
||||
y += error
|
||||
print(y)
|
||||
"""
|
||||
|
||||
input_var = Variable(np.array([[1,2,3],[1,5,0]]))
|
||||
weight = Variable(np.array([[2],[-1],[1]]))
|
||||
bias = Variable(np.array([1]))
|
||||
v = relu((input_var @ weight) + bias)
|
||||
#print(v.numpy())
|
||||
#print(v.numpy().shape, np.array([[1,1]]).shape)
|
||||
k = matmul(np.array([[1,1]]), v)
|
||||
print(make_mermaid_graph(k))
|
||||
|
||||
input_var = Variable(np.array([[1],[2],[3]]))
|
||||
weight = Variable(np.array([[2,-1,1]]))
|
||||
bias = Variable(np.array([[1]]))
|
||||
v = ((weight @ input_var) + bias)
|
||||
|
||||
print(make_mermaid_graph(v))
|
||||
|
||||
print(f"result : {v.numpy()}")
|
||||
v.backprop(np.ones(()))
|
||||
print(f"result : {k.numpy()}")
|
||||
k.backprop(np.ones(()))
|
||||
print(f"grad input : {input_var.grad}, w : {weight.grad}, b : {bias.grad}")
|
||||
|
Loading…
Reference in New Issue
Block a user