重庆慕尚网站建设,在线美工,xampp wordpress,企业网站的意思最近面试有要求手撕SGD#xff0c;这里顺便就把梯度下降、随机梯度下降、批次梯度下降给写出来了 有几个注意点#xff1a; 1.求梯度时注意label[i]和pred[i]不要搞反#xff0c;否则会导致模型发散 2.如果跑了几千个epoch#xff0c;还是没有收敛#xff0c;可能是学习率…最近面试有要求手撕SGD这里顺便就把梯度下降、随机梯度下降、批次梯度下降给写出来了 有几个注意点 1.求梯度时注意label[i]和pred[i]不要搞反否则会导致模型发散 2.如果跑了几千个epoch还是没有收敛可能是学习率太小了
# Xn*k
# Y: n*1import random
import numpyclass GD:def __init__(self,w_dim,r):# 随机初始化self.w [random.random() for _ in range(w_dim)]self.bias random.random()self.learningRate rprint(foriginal w is {self.w}, original bias is {self.bias})def forward(self,x):# 前馈网络ans []for i in range(len(x)):y0for j in range(len(x[0])):yself.w[j]*x[i][j]ans.append(yself.bias)return ansdef bp(self,X,pred,label,opGD):# 计算均方差loss 0for i in range(len(pred)):loss(label[i]-pred[i])**2loss loss/len(X)# 计算梯度# 梯度下降if opGD:grad_w [0 for _ in range(len(self.w))]grad_bias0for i in range(len(X)):grad_bias-2*(label[i]-pred[i])for j in range(len(self.w)):grad_w[j]-2*(label[i]-pred[i])*X[i][j] # 反向传播更新梯度self.biasself.bias-self.learningRate*grad_bias/len(X)for i in range(len(self.w)):self.w[i]-self.learningRate*grad_w[i]/len(X)# 随机梯度下降if opSGD:grad_w [0 for _ in range(len(self.w))]grad_bias0randInd random.randint(0,len(X)-1)grad_bias-2*(label[randInd]-pred[randInd])for j in range(len(self.w)):grad_w[j]-2*(label[randInd]-pred[randInd])*X[randInd][j] # 反向传播更新梯度self.biasself.bias-self.learningRate*grad_biasfor i in range(len(self.w)):self.w[i]-self.learningRate*grad_w[i]# 批次梯度下降if opBGD: grad_w [0 for _ in range(len(self.w))]grad_bias0BS8randInd random.randint(0,len(X)/BS-1)X X[BS*randInd:BS*(randInd1)]label label[BS*randInd:BS*(randInd1)]pred pred[BS*randInd:BS*(randInd1)]for i in range(len(X)):grad_bias-2*(label[i]-pred[i])for j in range(len(self.w)):grad_w[j]-2*(label[i]-pred[i])*X[i][j] # 反向传播更新梯度self.biasself.bias-self.learningRate*grad_bias/len(X)for i in range(len(self.w)):self.w[i]-self.learningRate*grad_w[i]/len(X)return lossdef testY(X,w):Y []for x in X:y0for i in range(len(x)):yw[i]*x[i]Y.append(y)return Y# 构建数据
n 1000
X[[random.random() for _ in range(2)] for _ in range(n)]
w[0.2,0.3]
B0.4
Y testY(X,w)# 设置样本维度为2
k 2
lr GD(k,0.01)
Loss0
epochs2000for e in range(epochs):Loss 0pred lr.forward(X)losslr.bp(X,pred,Y,BGD)Lossloss if (e%100)0: print(fstep:{e},Loss:{Loss}) X_test[[random.random() for _ in range(2)] for _ in range(2)]
Y_testtestY(X_test,w)print(X_test,X_test)
print(Y_test,Y_test)
print(Y_pred,lr.forward(X_test))
测试效果如下 也还行