model.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. import os
  2. import random
  3. from tqdm import trange, tqdm
  4. from scipy.misc import imsave
  5. import tensorflow as tf
  6. import numpy as np
  7. from generator import Generator
  8. from encoder import Encoder
  9. from discriminator import Discriminator
  10. from utils import logger
  11. class BicycleGAN(object):
  12. def __init__(self, args):
  13. self._log_step = args.log_step
  14. self._batch_size = args.batch_size
  15. self._image_size = args.image_size
  16. self._latent_dim = args.latent_dim
  17. self._coeff_reconstruct = args.coeff_reconstruct
  18. self._coeff_latent = args.coeff_latent
  19. self._coeff_kl = args.coeff_kl
  20. self._norm = 'instance' if args.instance_normalization else 'batch'
  21. self._augment_size = self._image_size + (30 if self._image_size == 256 else 15)
  22. self._image_shape = [self._image_size, self._image_size, 3]
  23. self.is_train = tf.placeholder(tf.bool, name='is_train')
  24. self.lr = tf.placeholder(tf.float32, name='lr')
  25. self.global_step = tf.train.get_or_create_global_step(graph=None)
  26. image_a = self.image_a = \
  27. tf.placeholder(tf.float32, [self._batch_size] + self._image_shape, name='image_a')
  28. image_b = self.image_b = \
  29. tf.placeholder(tf.float32, [self._batch_size] + self._image_shape, name='image_b')
  30. z = self.z = \
  31. tf.placeholder(tf.float32, [self._batch_size, self._latent_dim], name='z')
  32. # Data augmentation
  33. seed = random.randint(0, 2**31 - 1)
  34. def augment_image(image):
  35. image = tf.image.resize_images(image, [self._augment_size, self._augment_size])
  36. image = tf.random_crop(image, [self._batch_size] + self._image_shape, seed=seed)
  37. image = tf.map_fn(lambda x: tf.image.random_flip_left_right(x, seed), image)
  38. return image
  39. image_a = tf.cond(self.is_train,
  40. lambda: augment_image(image_a),
  41. lambda: image_a)
  42. image_b = tf.cond(self.is_train,
  43. lambda: augment_image(image_b),
  44. lambda: image_b)
  45. # Generator
  46. G = Generator('G', is_train=self.is_train,
  47. norm=self._norm, image_size=self._image_size)
  48. # Discriminator
  49. D = Discriminator('D', is_train=self.is_train,
  50. norm=self._norm, activation='leaky',
  51. image_size=self._image_size)
  52. # Encoder
  53. E = Encoder('E', is_train=self.is_train,
  54. norm=self._norm, activation='relu',
  55. image_size=self._image_size, latent_dim=self._latent_dim)
  56. # conditional VAE-GAN: B -> z -> B'
  57. z_encoded, z_encoded_mu, z_encoded_log_sigma = E(image_b)
  58. image_ab_encoded = G(image_a, z_encoded)
  59. # conditional Latent Regressor-GAN: z -> B' -> z'
  60. image_ab = self.image_ab = G(image_a, z)
  61. z_recon, z_recon_mu, z_recon_log_sigma = E(image_ab)
  62. # Discriminate real/fake images
  63. D_real = D(image_b)
  64. D_fake = D(image_ab)
  65. D_fake_encoded = D(image_ab_encoded)
  66. loss_vae_gan = (tf.reduce_mean(tf.squared_difference(D_real, 0.9)) +
  67. tf.reduce_mean(tf.square(D_fake_encoded)))
  68. loss_image_cycle = tf.reduce_mean(tf.abs(image_b - image_ab_encoded))
  69. loss_gan = (tf.reduce_mean(tf.squared_difference(D_real, 0.9)) +
  70. tf.reduce_mean(tf.square(D_fake)))
  71. loss_latent_cycle = tf.reduce_mean(tf.abs(z - z_recon))
  72. loss_kl = -0.5 * tf.reduce_mean(1 + 2 * z_encoded_log_sigma - z_encoded_mu ** 2 -
  73. tf.exp(2 * z_encoded_log_sigma))
  74. loss = loss_vae_gan - self._coeff_reconstruct * loss_image_cycle + \
  75. loss_gan - self._coeff_latent * loss_latent_cycle - \
  76. self._coeff_kl * loss_kl
  77. # Optimizer
  78. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  79. with tf.control_dependencies(update_ops):
  80. self.optimizer_D = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5) \
  81. .minimize(loss, var_list=D.var_list, global_step=self.global_step)
  82. self.optimizer_G = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5) \
  83. .minimize(-loss, var_list=G.var_list)
  84. self.optimizer_E = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.5) \
  85. .minimize(-loss, var_list=E.var_list)
  86. # Summaries
  87. self.loss_vae_gan = loss_vae_gan
  88. self.loss_image_cycle = loss_image_cycle
  89. self.loss_latent_cycle = loss_latent_cycle
  90. self.loss_gan = loss_gan
  91. self.loss_kl = loss_kl
  92. self.loss = loss
  93. tf.summary.scalar('loss/vae_gan', loss_vae_gan)
  94. tf.summary.scalar('loss/image_cycle', loss_image_cycle)
  95. tf.summary.scalar('loss/latent_cycle', loss_latent_cycle)
  96. tf.summary.scalar('loss/gan', loss_gan)
  97. tf.summary.scalar('loss/kl', loss_kl)
  98. tf.summary.scalar('loss/total', loss)
  99. tf.summary.scalar('model/D_real', tf.reduce_mean(D_real))
  100. tf.summary.scalar('model/D_fake', tf.reduce_mean(D_fake))
  101. tf.summary.scalar('model/D_fake_encoded', tf.reduce_mean(D_fake_encoded))
  102. tf.summary.scalar('model/lr', self.lr)
  103. tf.summary.image('image/A', image_a[0:1])
  104. tf.summary.image('image/B', image_b[0:1])
  105. tf.summary.image('image/A-B', image_ab[0:1])
  106. tf.summary.image('image/A-B_encoded', image_ab_encoded[0:1])
  107. self.summary_op = tf.summary.merge_all()
  108. def train(self, sess, summary_writer, data_A, data_B):
  109. logger.info('Start training.')
  110. logger.info(' {} images from A'.format(len(data_A)))
  111. logger.info(' {} images from B'.format(len(data_B)))
  112. assert len(data_A) == len(data_B), \
  113. 'Data size mismatch dataA(%d) dataB(%d)' % (len(data_A), len(data_B))
  114. data_size = len(data_A)
  115. num_batch = data_size // self._batch_size
  116. epoch_length = num_batch * self._batch_size
  117. num_initial_iter = 8
  118. num_decay_iter = 2
  119. lr = lr_initial = 0.0002
  120. lr_decay = lr_initial / num_decay_iter
  121. initial_step = sess.run(self.global_step)
  122. num_global_step = (num_initial_iter + num_decay_iter) * epoch_length
  123. t = trange(initial_step, num_global_step,
  124. total=num_global_step, initial=initial_step)
  125. for step in t:
  126. #TODO: resume training with global_step
  127. epoch = step // epoch_length
  128. iter = step % epoch_length
  129. if epoch > num_initial_iter:
  130. lr = max(0.0, lr_initial - (epoch - num_initial_iter) * lr_decay)
  131. if iter == 0:
  132. data = zip(data_A, data_B)
  133. random.shuffle(data)
  134. data_A, data_B = zip(*data)
  135. image_a = np.stack(data_A[iter*self._batch_size:(iter+1)*self._batch_size])
  136. image_b = np.stack(data_B[iter*self._batch_size:(iter+1)*self._batch_size])
  137. sample_z = np.random.normal(size=(self._batch_size, self._latent_dim))
  138. fetches = [self.loss, self.optimizer_D,
  139. self.optimizer_G, self.optimizer_E]
  140. if step % self._log_step == 0:
  141. fetches += [self.summary_op]
  142. fetched = sess.run(fetches, feed_dict={self.image_a: image_a,
  143. self.image_b: image_b,
  144. self.is_train: True,
  145. self.lr: lr,
  146. self.z: sample_z})
  147. if step % self._log_step == 0:
  148. z = np.random.normal(size=(1, self._latent_dim))
  149. image_ab = sess.run(self.image_ab, feed_dict={self.image_a: image_a,
  150. self.z: z,
  151. self.is_train: False})
  152. imsave('results/r_{}.jpg'.format(step), np.squeeze(image_ab, axis=0))
  153. summary_writer.add_summary(fetched[-1], step)
  154. summary_writer.flush()
  155. t.set_description('Loss({:.3f})'.format(fetched[0]))
  156. def test(self, sess, data_A, data_B, base_dir):
  157. step = 0
  158. for (dataA, dataB) in tqdm(zip(data_A, data_B)):
  159. step += 1
  160. image_a = np.expand_dims(dataA, axis=0)
  161. image_b = np.expand_dims(dataB, axis=0)
  162. images_random = []
  163. images_random.append(image_a)
  164. images_random.append(image_b)
  165. images_linear = []
  166. images_linear.append(image_a)
  167. images_linear.append(image_b)
  168. for i in range(23):
  169. z = np.random.normal(size=(1, self._latent_dim))
  170. image_ab = sess.run(self.image_ab, feed_dict={self.image_a: image_a,
  171. self.z: z,
  172. self.is_train: False})
  173. images_random.append(image_ab)
  174. z = np.zeros((1, self._latent_dim))
  175. z[0][0] = (i / 23.0 - 0.5) * 2.0
  176. image_ab = sess.run(self.image_ab, feed_dict={self.image_a: image_a,
  177. self.z: z,
  178. self.is_train: False})
  179. images_linear.append(image_ab)
  180. image_rows = []
  181. for i in range(5):
  182. image_rows.append(np.concatenate(images_random[i*5:(i+1)*5], axis=2))
  183. images = np.concatenate(image_rows, axis=1)
  184. images = np.squeeze(images, axis=0)
  185. imsave(os.path.join(base_dir, 'random_{}.jpg'.format(step)), images)
  186. image_rows = []
  187. for i in range(5):
  188. image_rows.append(np.concatenate(images_linear[i*5:(i+1)*5], axis=2))
  189. images = np.concatenate(image_rows, axis=1)
  190. images = np.squeeze(images, axis=0)
  191. imsave(os.path.join(base_dir, 'linear_{}.jpg'.format(step)), images)