-
Notifications
You must be signed in to change notification settings - Fork 3
/
ops.py
72 lines (64 loc) · 2.59 KB
/
ops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from contextlib import contextmanager
import tensorflow as tf
import numpy as np
import os
import shutil
def conv_layer(inputs, filter_shape, stride, name = 'conv_layer'):
with tf.variable_scope(name):
init = tf.contrib.layers.xavier_initializer()
filter1 = tf.get_variable(name = 'filt_weights', shape = filter_shape, dtype = tf.float32, initializer = init)
output = tf.nn.conv2d(inputs, filter1, strides = stride, padding = 'SAME')
return output
def average_gradients(tower_grads):
""" Calculate the average gradient for each shared variable across towers.
Note that this function provides a sync point across al towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer
list is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been
averaged across all towers.
"""
with tf.name_scope('average_gradients'):
average_grads = []
for grad_and_vars in zip(*tower_grads):
# each grad is ((grad0_gpu0, var0_gpu0), ..., (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dim to gradients to represent tower
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension that we will average over below
grads.append(expanded_g)
# Build the tensor and average along tower dimension
grad = tf.concat(grads, 0)
grad = tf.reduce_mean(grad, 0)
# The Variables are redundant because they are shared across towers
# just return first tower's pointer to the Variable
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
@contextmanager
def variables_on_first_device(device_name):
old_fn = tf.get_variable
def new_fn(*args, **kwargs):
with tf.device(device_name):
return old_fn(*args, **kwargs)
tf.get_variable = new_fn
yield
tf.get_variable = old_fn
def scalar_summary(name, x):
try:
summ = tf.summary.scalar(name, x)
except AttributeError:
summ = tf.scalar_summary(name, x)
return summ
def histogram_summary(name, x):
try:
summ = tf.summary.histogram(name, x)
except AttributeError:
summ = tf.histogram_summary(name, x)
return summ
def leakyrelu(x, alpha=0.3, name='lrelu'):
return tf.maximum(x, alpha * x, name=name)