Source code for beagles.backend.net.hyperparameters.cyclic_learning_rate

import os
import math
import tensorflow as tf
from tensorflow.python.framework import ops
from tensorflow.python.ops import math_ops
from tensorflow.python.eager import context
import numpy as np

[docs]@tf.function def cyclic_learning_rate(global_step, learning_rate=0.01, max_lr=0.1, step_size=20., gamma=0.99994, mode='triangular', name=None): if global_step is None: raise ValueError("global_step is required for cyclic_learning_rate.") with ops.name_scope(name, None, [learning_rate, global_step]) as name: learning_rate = ops.convert_to_tensor(learning_rate, name="learning_rate") dtype = learning_rate.dtype max_lr = math_ops.cast(max_lr, dtype) global_step = math_ops.cast(global_step, dtype) step_size = math_ops.cast(step_size, dtype) def cyclic_lr(): """Helper to recompute learning rate; most helpful in eager-mode.""" # computing: cycle = floor( 1 + global_step / ( 2 * step_size ) ) # print(global_step.numpy()) double_step = math_ops.multiply(2., step_size) global_div_double_step = math_ops.divide(global_step, double_step) cycle = math_ops.floor( math_ops.add(1., global_div_double_step)) # computing: x = abs( global_step / step_size – 2 * cycle + 1 ) double_cycle = math_ops.multiply(2., cycle) global_div_step = math_ops.divide(global_step, step_size) tmp = math_ops.subtract(global_div_step, double_cycle) x = math_ops.abs(math_ops.add(1., tmp)) # computing: clr = learning_rate + ( max_lr – learning_rate ) * max( 0, 1 - x ) a1 = math_ops.maximum(0., math_ops.subtract(1., x)) a2 = math_ops.subtract(max_lr, learning_rate) clr = math_ops.multiply(a1, a2) if mode == 'triangular2': clr = math_ops.divide(clr, math_ops.cast( math_ops.pow(2, math_ops.cast( cycle - 1, tf.int32)), tf.float32)) if mode == 'exp_range': clr = math_ops.multiply( math_ops.pow(gamma, global_step), clr) return math_ops.add(clr, learning_rate, name=name) if not context.executing_eagerly(): cyclic_lr = cyclic_lr() # tf.summary.scalar("/".join([self.flags.trainer, # 'cyclic_learning_rate']), cyclic_lr) return cyclic_lr
from tensorflow.keras.callbacks import * from tensorflow.keras import backend as K import numpy as np class CyclicLR(Callback): """This callback implements a cyclical learning rate policy (CLR). The method cycles the learning rate between two boundaries with some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186). The amplitude of the cycle can be scaled on a per-iteration or per-cycle basis. This class has three built-in policies, as put forth in the paper. "triangular": A basic triangular cycle w/ no amplitude scaling. "triangular2": A basic triangular cycle that scales initial amplitude by half each cycle. "exp_range": A cycle that scales initial amplitude by gamma**(cycle iterations) at each cycle iteration. For more detail, please see paper. # Example ```python clr = CyclicLR(base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular') model.fit(X_train, Y_train, callbacks=[clr]) ``` Class also supports custom scaling functions: ```python clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.)) clr = CyclicLR(base_lr=0.001, max_lr=0.006, step_size=2000., scale_fn=clr_fn, scale_mode='cycle') model.fit(X_train, Y_train, callbacks=[clr]) ``` # Arguments base_lr: initial learning rate which is the lower boundary in the cycle. max_lr: upper boundary in the cycle. Functionally, it defines the cycle amplitude (max_lr - base_lr). The lr at any cycle is the sum of base_lr and some scaling of the amplitude; therefore max_lr may not actually be reached depending on scaling function. step_size: number of training iterations per half cycle. Authors suggest setting step_size 2-8 x training iterations in epoch. mode: one of {triangular, triangular2, exp_range}. Default 'triangular'. Values correspond to policies detailed above. If scale_fn is not None, this argument is ignored. gamma: constant in 'exp_range' scaling function: gamma**(cycle iterations) scale_fn: Custom scaling policy defined by a single argument lambda function, where 0 <= scale_fn(x) <= 1 for all x >= 0. mode paramater is ignored scale_mode: {'cycle', 'iterations'}. Defines whether scale_fn is evaluated on cycle number or cycle iterations (training iterations since start of cycle). Default is 'cycle'. """ def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular', gamma=1., scale_fn=None, scale_mode='cycle'): super(CyclicLR, self).__init__() self.base_lr = base_lr self.max_lr = max_lr self.step_size = step_size self.mode = mode self.gamma = gamma if scale_fn == None: if self.mode == 'triangular': self.scale_fn = lambda x: 1. self.scale_mode = 'cycle' elif self.mode == 'triangular2': self.scale_fn = lambda x: 1 / (2. ** (x - 1)) self.scale_mode = 'cycle' elif self.mode == 'exp_range': self.scale_fn = lambda x: gamma ** (x) self.scale_mode = 'iterations' else: self.scale_fn = scale_fn self.scale_mode = scale_mode self.clr_iterations = 0. self.trn_iterations = 0. self.history = {} self._reset() def _reset(self, new_base_lr=None, new_max_lr=None, new_step_size=None): """Resets cycle iterations. Optional boundary/step size adjustment. """ if new_base_lr != None: self.base_lr = new_base_lr if new_max_lr != None: self.max_lr = new_max_lr if new_step_size != None: self.step_size = new_step_size self.clr_iterations = 0. def clr(self): cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size)) x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1) if self.scale_mode == 'cycle': return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, ( 1 - x)) * self.scale_fn(cycle) else: return self.base_lr + (self.max_lr - self.base_lr) * np.maximum(0, ( 1 - x)) * self.scale_fn(self.clr_iterations) def on_train_begin(self, logs={}): logs = logs or {} if self.clr_iterations == 0: K.set_value(self.model.optimizer.lr, self.base_lr) else: K.set_value(self.model.optimizer.lr, self.clr()) def on_batch_end(self, epoch, logs=None): logs = logs or {} self.trn_iterations += 1 self.clr_iterations += 1 self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr)) self.history.setdefault('iterations', []).append(self.trn_iterations) for k, v in logs.items(): self.history.setdefault(k, []).append(v) K.set_value(self.model.optimizer.lr, self.clr())