在參考了百度上的兩篇博客後, 一共用這兩種方式實現了梯度反轉
1. 版本一:
import tensorflow as tf
from tensorflow import keras as K
# 梯度反轉:版本1, 通過手動計算個部分的梯度, 然後手動修改實現
# x = tf.placeholder(dtype=tf.float32, shape=(1, 1)) # 不知道爲什麼, 報錯了, 說沒有喂入一個shape=[1, 1], dtype=float的值
x = tf.constant([[1.]], dtype=tf.float32)
df = K.layers.Dense(1, use_bias=False, kernel_initializer=K.initializers.constant([[1.]]))
f = df(x)
dl = K.layers.Dense(1, use_bias=False, kernel_initializer=K.initializers.constant([[10.]]))
l = dl(f)
dy = K.layers.Dense(1, use_bias=False, kernel_initializer=K.initializers.constant([[2.]]))
y = dy(f)
opt = tf.train.GradientDescentOptimizer(0.1)
loss1 = y
loss2 = l
# 計算標籤分類器loss1對df(特徵dense層參數)和dy(標籤分類器desne層參數)的梯度
y_g = opt.compute_gradients(loss1, [df.trainable_variables, dy.trainable_variables], gate_gradients=2)
# 計算域分類器loss2對dl(與分類器dense層參數)的梯度
l_dl_g = opt.compute_gradients(loss2, [dl.trainable_variables], gate_gradients=2)
# 計算域分類器loss2對df的梯度, 接下來需要對這部分梯度進行反轉
l_df_g = opt.compute_gradients(loss2, [df.trainable_variables], gate_gradients=2)
l_df_g = [(-1 * g, v) for (g, v) in l_df_g] # 梯度反轉後, df_w=1.8, 註釋此句後(不反轉), df_w=-0.2, 另外,-1可以改成-n
# 合併所有的grad_and_var pair list
grad_vars = y_g + l_dl_g + l_df_g
# apply梯度獲取train_op
op = opt.apply_gradients(grad_vars)
# print(x)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("f, l, y, x:", end=" ")
[print(x, end=" ") for x in sess.run([f, l, y, x])]
print("\nfw, lw, yw:", end=" ")
[print(x[0], end=" ") for x in sess.run([df.weights, dl.weights, dy.weights])]
sess.run(op)
print("\nf, l, y, x:", end=" ")
[print(x, end=" ") for x in sess.run([f, l, y, x])]
print("\nfw, lw, yw:", end=" ")
[print(x[0], end=" ") for x in sess.run([df.weights, dl.weights, dy.weights])]
2. 版本二:
import tensorflow as tf
from tensorflow import keras as K
# 梯度反轉:版本2, 實現一個梯度反轉層類, 然後覆寫tf.identity op
class GradientReversal:
def __init__(self, name="GradRevIdentity"):
self.call_num = 0 # 用於防止多次調用call函數時, 名字被重複使用
self.name = name
def call(self, x, s=1.0):
op_name = self.name + "_" + str(self.call_num)
self.call_num += 1
@tf.RegisterGradient(op_name)
def reverse_grad(op, grad):
return [-grad * s]
g = tf.get_default_graph()
with g.gradient_override_map({"Identity": op_name}): # 將下面的identity的梯度改成op_name對應的梯度計算方式
y = tf.identity(x)
return y
def __call__(self, x, s=1.0):
return self.call(x, s)
# x = tf.placeholder(dtype=tf.float32, shape=(1, 1)) # 不知道爲什麼, 報錯了, 說沒有喂入一個shape=[1, 1], dtype=float的值
x = tf.constant([[1.]], dtype=tf.float32)
df = K.layers.Dense(1, use_bias=False, kernel_initializer=K.initializers.constant([[1.]]))
gr = GradientReversal()
f = df(x) # f是獲得的特徵
f_gr = gr(f, 1) # 進行梯度反轉, 改變參數s的值可以對反轉的梯度進行scale, 這一點比較靈活
dl = K.layers.Dense(1, use_bias=False, kernel_initializer=K.initializers.constant([[10.]]))
l = dl(f_gr) # 域分類器使用梯度反轉後的特徵, 此處只是梯度反轉了, 特徵f本身的值是不變的
dy = K.layers.Dense(1, use_bias=False, kernel_initializer=K.initializers.constant([[2.]]))
y = dy(f) # 標籤分類器使用沒有剃度反轉的特徵
loss1 = y
loss2 = l
opt = tf.train.GradientDescentOptimizer(0.1)
op = opt.minimize(loss1 + loss2)
# print(x)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
print("f, l, y, x:", end=" ")
[print(x, end=" ") for x in sess.run([f, l, y, x])]
print("\nfw, lw, yw:", end=" ")
[print(x[0], end=" ") for x in sess.run([df.weights, dl.weights, dy.weights])]
sess.run(op)
print("\nfw, lw, yw:", end=" ")
[print(x[0], end=" ") for x in sess.run([df.weights, dl.weights, dy.weights])]
1.3 結果簡單分析:
如果不加入梯度反轉, 則df.weights更新後的值應該是-0.2=1.0 - 0.1(2+10) 的, 加入後都變成了1.8=1.0 - 0.1(2-10)
參考文獻:
https://www.e-learn.cn/content/qita/2350448
https://blog.csdn.net/jiongnima/article/details/78393613