diff --git a/config/config_cim_cnn_param.py b/config/config_cim_cnn_param.py index adc432ef53372f72288271a913ebf20c8b1a8b06..4fdd7cfd631a390a74e900f123ae40aac6e58e9f 100644 --- a/config/config_cim_cnn_param.py +++ b/config/config_cim_cnn_param.py @@ -33,7 +33,7 @@ Nimg_save = 128; epochs = 30 batch_size = 128*10 # batch_size = 128 -lr = 0.01 +lr = 0.005 decay = 0.000025 # Decay & lr factors decay_at_epoch = [15, 75, 150 ] @@ -48,21 +48,22 @@ finetune = False ######### Hardware information ######### ######################################## # ARCHITECTURE-RELATED PARAMETERS -arch = '6T'; +cim_type = 'charge'; +arch = '10TC'; tech = 'GF22nmFDX'; typeT = 'RVT'; # SUPPLY and BACK-BIAS VDD = 0.8; -Vmax_beta = 0.12; +Vmax_beta = 0.05; BBN = 0; BBP = 0; # CIM-SRAM I/O RESOLUTION -IAres = 1; +IAres = 4; Wres = 1; -OAres = 32; +OAres = 4; # ABN resolution (if enabled) -r_gamma = 6; -r_beta = 3; +r_gamma = 5; +r_beta = 5; # MAXIMUM INPUT VECTOR SIZE for ALL layers Nrows = 1152; Ncols = 512; @@ -81,9 +82,9 @@ IS_EMBEDDED = 0; # Ideal or effective ABN HW model IDEAL_ABN = 1; # ABN model includes ADC behaviour -ABN_INC_ADC = 1; +ABN_INC_ADC = 0; # Use post-layout model instead of pre-layour versions -FLAG_PL = 1; +FLAG_PL = 0; # Enable saving SAVE_EN = 1; # Is first layer FC (depends on network_struct) diff --git a/layers/analog_BN_charge_interp_PL.py b/layers/analog_BN_charge_interp_PL.py new file mode 100644 index 0000000000000000000000000000000000000000..ab959f9f2b5137e039801a2896f3606b6db2d478 --- /dev/null +++ b/layers/analog_BN_charge_interp_PL.py @@ -0,0 +1,353 @@ +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// +# /////////////////////////// Custom batchnorm implementing actual hardware ABN ////////////////////////////// +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// + +# Inspired from https://stackoverflow.com/questions/54101593/conditional-batch-normalization-in-keras + +import numpy as np +import math + +import tensorflow as tf +import keras.backend as K + +from keras import regularizers, initializers, constraints +#from keras.legacy import interfaces +from keras.layers import Layer, Input, InputSpec +from keras.models import Model + +# Current ABN model +from models.ABN_charge import makeLookupABN, doInterpABN +from models.ABN_charge import round_through, floor_through + + +class Analog_BN(Layer): + """ Analog batchnorm layer + """ + # /// Init layer /// +# @interfaces.legacy_batchnorm_support + def __init__(self, + axis=-1, + momentum=0.99, + epsilon=1e-5, + center=True, + scale=True, + renorm = True, + beta_initializer='zeros', + gamma_initializer=tf.keras.initializers.Constant(value=3), + moving_mean_initializer='zeros', + moving_variance_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + activity_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + hardware = None, + NB = None, + m_sigma = 1, + Npoints = 401, + EN_NOISE = 0, + **kwargs): + + super(Analog_BN, self).__init__(**kwargs) + self.axis = axis + self.momentum = momentum + self.epsilon = epsilon + self.center = center + self.scale = scale + self.renorm = renorm + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.moving_mean_initializer = initializers.get(moving_mean_initializer) + self.moving_variance_initializer = (initializers.get(moving_variance_initializer)) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + self.hardware = hardware; + self.EN_NOISE = EN_NOISE; + self.m_sigma = m_sigma; + # -- Interpolation info -- + self.Npoints = Npoints; + self.ABN_lookup = None; + self.sig_ABN_lookup = None; + + # /// Build layer /// + def build(self,input_shape): + dim = input_shape[self.axis]; + + if dim is None: + raise ValueError('Axis ' + str(self.axis) + ' of ' + 'input tensor should have a defined dimension ' + 'but the layer received an input with shape ' + + str(input_shape) + '.') + shape = (dim,) + + if self.scale: + # gamma_constraint = Clip(0.0,4.0) + + self.gamma = self.add_weight(shape = (1,), + name = 'gamma', + initializer = self.gamma_initializer, + regularizer = self.gamma_regularizer, + constraint = self.gamma_constraint) + else: + self.gamma = None + + if self.center: + # beta_constraint = Clip(-100.0,100.0); + + self.beta = self.add_weight(shape = shape, + name = 'beta', + initializer = self.beta_initializer, + regularizer = self.beta_regularizer, + constraint = self.beta_constraint) + + else: + self.beta = None + + if self.renorm: + self.moving_mean_DP = self.add_weight( + shape=shape, + name='moving_mean_DP', + initializer=self.moving_mean_initializer, + trainable=False) + self.moving_variance_DP = self.add_weight( + shape=shape, + name='moving_variance_DP', + initializer=self.moving_variance_initializer, + trainable=False) + else: + self.moving_mean_DP = K.variable(0.0) + self.moving_variance_DP = K.variable(1.0) + + self.m_sigma = self.add_weight(shape = (1,), + name = 'm_sigma', + initializer = initializers.get(tf.keras.initializers.Constant(value=self.m_sigma)), + trainable=False) + + # Spice-extracted lookup table between D_OUT, V_DP and T_ABN (hardcoded hardware has to match CIM params) + print('Retrieving actual charge-domain ABN response...') + self.ABN_lookup = self.hardware.sramInfo.ABN_LUT; + self.sig_ABN_lookup = self.hardware.sramInfo.sig_ABN_LUT; + print('Done !') + + super(Analog_BN, self).build(input_shape) + + # /// Call layer (train or inference) /// + def call(self,inputs,training=None): + + input_shape = K.int_shape(inputs); + # print(input_shape) + + # Prepare broadcasting shape. + ndim = len(input_shape) + reduction_axes = list(range(len(input_shape))) + del reduction_axes[self.axis] + broadcast_shape = [1] * len(input_shape) + broadcast_shape[self.axis] = input_shape[self.axis] + + # Determines whether broadcasting is needed. + needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) + + def normalize_inference(): + # Explicitely broadcast parameters when required. + if needs_broadcasting: + # Norm params + if self.renorm: + broadcast_moving_mean_DP = K.reshape(self.moving_mean_DP, + broadcast_shape); + broadcast_moving_variance_DP = K.reshape(self.moving_variance_DP, + broadcast_shape); + else: + broadcast_moving_mean_DP = None; + broadcast_moving_variance_DP = None; + # Scale param + if self.scale: + broadcast_gamma = K.reshape(self.gamma,broadcast_shape); + else: + broadcast_gamma = None + # Offset param + if self.center: + broadcast_beta = K.reshape(self.beta,broadcast_shape); + else: + broadcast_beta = None + + # Return batchnorm + return ABN( + inputs, + self.ABN_lookup, + self.sig_ABN_lookup, + self.V_DP_half_LUT, + self.devGainLUT, + broadcast_moving_mean_DP, + broadcast_moving_variance_DP, + broadcast_beta, + broadcast_gamma, + axis = self.axis, + epsilon = self.epsilon, + m_sigma = self.m_sigma, + hardware = self.hardware, + Npoints = self.Npoints, + EN_NOISE=self.EN_NOISE, + training=training) + else: + return ABN( + inputs, + self.ABN_lookup, + self.sig_ABN_lookup, + self.V_DP_half_LUT, + self.devGainLUT, + self.moving_mean_DP, + self.moving_variance_DP, + self.beta, + self.gamma, + axis = self.axis, + epsilon = self.epsilon, + m_sigma = self.m_sigma, + hardware = self.hardware, + Npoints = self.Npoints, + EN_NOISE=self.EN_NOISE, + training=training) + + # If the learning phase is *static* and set to inference: + if training in {0, False}: + return normalize_inference() + + + # If the learning is either dynamic, or set to training: + (normed_training,mean_DP,variance_DP) = \ + norm_ABN_in_train( + inputs,self.ABN_lookup,self.sig_ABN_lookup,self.V_DP_half_LUT, self.devGainLUT, self.beta, self.gamma, self.renorm, reduction_axes, + epsilon=self.epsilon,m_sigma=self.m_sigma,hardware=self.hardware,Npoints=self.Npoints,EN_NOISE=self.EN_NOISE,training=training) + # ??? + if K.backend() != 'cntk': + sample_size = K.prod([K.shape(inputs)[axis] + for axis in reduction_axes]) + sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) + if K.backend() == 'tensorflow' and sample_size.dtype != 'float32': + sample_size = K.cast(sample_size, dtype='float32') + + # sample variance - unbiased estimator of population variance + variance_DP *= sample_size / (sample_size - (1.0 + self.epsilon)) + + # Update moving mean and variance during training + self.add_update([K.moving_average_update(self.moving_mean_DP, + mean_DP, + self.momentum), + K.moving_average_update(self.moving_variance_DP, + variance_DP, + self.momentum)]) + + # Pick ABN result for either training or inference + return K.in_train_phase(normed_training, + normalize_inference, + training=training) + + + def get_config(self): + config = { + 'axis': self.axis, + 'momentum': self.momentum, + 'epsilon': self.epsilon, + 'center': self.center, + 'scale': self.scale, + 'renorm': self.renorm, + 'beta_initializer': initializers.serialize(self.beta_initializer), + 'gamma_initializer': initializers.serialize(self.gamma_initializer), + 'moving_mean_initializer': + initializers.serialize(self.moving_mean_initializer), + 'moving_variance_initializer': + initializers.serialize(self.moving_variance_initializer), + 'beta_regularizer': regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': constraints.serialize(self.beta_constraint), + 'gamma_constraint': constraints.serialize(self.gamma_constraint) + } + base_config = super(Analog_BN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + + return input_shape[1] +############################################## Internal functions ################################################## + +# Perform ABN +def ABN(V_DP,ABN_lookup,sig_ABN_lookup,V_DP_half_LUT,devGainLUT,mov_mean_DP=0.0,mov_variance_DP=1.0, beta=0.0,gamma=0.0,axis=-1,epsilon=1e-5,m_sigma=1,hardware=None,Npoints=401,EN_NOISE=0,training=False): + + # Get hardware parameters + VDD = hardware.sramInfo.VDD.data; + + r_gamma = hardware.sramInfo.r_gamma; Ns_gamma = 2**r_gamma; + r_beta = hardware.sramInfo.r_beta; Ns_beta = 2**r_beta; + OAres = hardware.sramInfo.OAres; + + Vmax_beta = hardware.sramInfo.Vmax_beta; + Vlsb_beta = Vmax_beta/2**(r_beta-1) + + # Set 'None' parameters to their initial values + if gamma is None: + gamma = K.constant(1.0); + if beta is None: + beta = K.constant(0.0); + if mov_mean_DP is None: + mov_mean_DP = K.constant(DR_tuple[1]); + if mov_variance_DP is None: + mov_variance_DP = K.constant(1.0); + + # // Specify non-centernormalized correction factors // + mu_goal = 0; + sigma_goal = VDD/m_sigma; var_goal = sigma_goal*sigma_goal; + + # // Equivalent gain computation // + # Get custom renorm factors (single gain for all columns) + mov_variance_DP_t = K.mean(mov_variance_DP)/var_goal; + sigma_DP_t = K.sqrt(mov_variance_DP_t); + # Get equivalent coefficients + gamma_eq = gamma/(sigma_DP_t + epsilon); + # Add Bernouilli matrices to regularize gain training (not mandatory) +# bern_matrix = tf.random.uniform(shape=tf.shape(gamma_eq),maxval=1); +# bern_matrix = tf.math.greater(bern_matrix,0.2); bern_matrix = tf.cast(bern_matrix,dtype='float32'); +# gamma_eq = bern_matrix*round_through(gamma_eq)+(1-bern_matrix)*gamma_eq; + + # // Equivalent offset computation // + sigma_DP = K.sqrt(mov_variance_DP); + mov_mean_DP_t = mov_mean_DP - mu_goal/sigma_goal*sigma_DP; + beta_eq = beta/gamma_eq - mov_mean_DP; + # Convert into voltage domain and add to ABN input + V_beta = K.clip(round_through(beta_eq/Vlsb_beta)*Vlsb_beta,-Vmax_beta,Vmax_beta); + V_DP = V_DP + V_beta; + + # Restrict gain factor to power-of-2 + log_gamma_eq = round_through(tf.math.log2(gamma_eq)); + gamma_eq = K.pow(2,log_gamma_eq); + + + # // Get ABN distribution from LUTs based on the gain/offset mapping // + D_OUT = doInterpABN(ABN_lookup,gamma_eq,V_DP,Ns_gamma,Ns_gamma,VDD,Npoints); + if(EN_NOISE): + sig_D_OUT = doInterpABN(sig_ABN_lookup,gamma_eq,V_DP,Ns_gamma,Ns_gamma,VDD,Npoints); + sig_D_OUT = sig_D_OUT*K.random_normal(shape=tf.shape(D_OUT),mean=0.,stddev=1.,dtype='float32'); + D_OUT = D_OUT + sig_D_OUT; + + # Reshape into the right order + return D_OUT; + +# Compute mean and variance of the batch then perform ABN with it, when enabled +def norm_ABN_in_train(V_DP,ABN_lookup,sig_ABN_lookup,V_DP_half_LUT,devGainLUT,beta=0.0,gamma=1.0,renorm=True,axis=-1,epsilon=1e-5,m_sigma=1,hardware=None,Npoints=401,EN_NOISE=0,training=False): + # Compute mean and variance of each batch when desired + if(renorm): + # Eventually reshape V_DP in case of CONV2D operation + Ncols = K.int_shape(V_DP)[-1]; + V_DP_flat = tf.reshape(V_DP,(-1,Ncols)); + # Get mean and variance + mean_DP = K.mean(V_DP_flat,axis=0); + variance_DP = K.var(V_DP_flat,axis=0); + else: + mean_DP = K.constant(0.0); + variance_DP = K.constant(1.0); + # Compute ABN with specified mean and variance + V_DP_BN = ABN(V_DP,ABN_lookup,sig_ABN_lookup,V_DP_half_LUT,devGainLUT,mean_DP,variance_DP,beta,gamma,axis,epsilon,m_sigma,hardware,Npoints,EN_NOISE,training); + # Return a tuple of BN_result, mean and variance + return (V_DP_BN,mean_DP,variance_DP); + diff --git a/layers/analog_BN_charge_model.py b/layers/analog_BN_charge_model.py new file mode 100644 index 0000000000000000000000000000000000000000..42d291e1b3a9588a7067ade6bd1af45965fe52bb --- /dev/null +++ b/layers/analog_BN_charge_model.py @@ -0,0 +1,330 @@ +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// +# /////////////////////////// Custom batchnorm implementing actual hardware ABN ////////////////////////////// +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// + +# Inspired from https://stackoverflow.com/questions/54101593/conditional-batch-normalization-in-keras + +import numpy as np +import math + +import tensorflow as tf +import keras.backend as K + +from keras import regularizers, initializers, constraints +#from keras.legacy import interfaces +from keras.layers import Layer, Input, InputSpec +from keras.models import Model + +from models.ABN_charge import round_through, floor_through + +class Analog_BN(Layer): + """ Analog batchnorm layer + """ + # /// Init layer /// +# @interfaces.legacy_batchnorm_support + def __init__(self, + axis=-1, + momentum=0.99, + epsilon=1e-5, + center=True, + scale=True, + renorm = True, + beta_initializer='zeros', + gamma_initializer='ones', + moving_mean_initializer='zeros', + moving_variance_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + activity_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + hardware = None, + NB = None, + m_sigma = 1, + **kwargs): + + super(Analog_BN, self).__init__(**kwargs) + self.axis = axis + self.momentum = momentum + self.epsilon = epsilon + self.center = center + self.scale = scale + self.renorm = renorm + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.moving_mean_initializer = initializers.get(moving_mean_initializer) + self.moving_variance_initializer = (initializers.get(moving_variance_initializer)) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + self.hardware = hardware; + self.m_sigma = m_sigma; + + # /// Build layer /// + def build(self,input_shape): + dim = input_shape[self.axis]; + + if dim is None: + raise ValueError('Axis ' + str(self.axis) + ' of ' + 'input tensor should have a defined dimension ' + 'but the layer received an input with shape ' + + str(input_shape) + '.') + shape = (dim,) + + if self.scale: + # gamma_constraint = Clip(0.0,4.0) + + self.gamma = self.add_weight(shape = (1,), + name = 'gamma', + initializer = self.gamma_initializer, + regularizer = self.gamma_regularizer, + constraint = self.gamma_constraint) + else: + self.gamma = None + + if self.center: + # beta_constraint = Clip(-100.0,100.0); + + self.beta = self.add_weight(shape = shape, + name = 'beta', + initializer = self.beta_initializer, + regularizer = self.beta_regularizer, + constraint = self.beta_constraint) + else: + self.beta = None + + if self.renorm: + self.moving_mean_DP = self.add_weight( + shape=shape, + name='moving_mean_DP', + initializer=self.moving_mean_initializer, + trainable=False) + self.moving_variance_DP = self.add_weight( + shape=shape, + name='moving_variance_DP', + initializer=self.moving_variance_initializer, + trainable=False) + else: + self.moving_mean_DP = K.variable(0.0) + self.moving_variance_DP = K.variable(1.0) + + + super(Analog_BN, self).build(input_shape) + + # /// Call layer (train or inference) /// + def call(self,inputs,training=None): + + input_shape = K.int_shape(inputs) + + # Prepare broadcasting shape. + ndim = len(input_shape) + reduction_axes = list(range(len(input_shape))) + del reduction_axes[self.axis] + broadcast_shape = [1] * len(input_shape) + broadcast_shape[self.axis] = input_shape[self.axis] + + # Determines whether broadcasting is needed. + needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) + + def normalize_inference(): + # Explicitely broadcast parameters when required. + if needs_broadcasting: + # Norm params + if self.renorm: + broadcast_moving_mean_DP = K.reshape(self.moving_mean_DP, + broadcast_shape); + broadcast_moving_variance_DP = K.reshape(self.moving_variance_DP, + broadcast_shape); + else: + broadcast_moving_mean_DP = None; + broadcast_moving_variance_DP = None; + # Scale param + if self.scale: + broadcast_gamma = K.reshape(self.gamma,broadcast_shape); + else: + broadcast_gamma = None + # Offset param + if self.center: + broadcast_beta = K.reshape(self.beta,broadcast_shape); + else: + broadcast_beta = None + # Return batchnorm + return ABN( + inputs, + broadcast_moving_mean_DP, + broadcast_moving_variance_DP, + broadcast_beta, + broadcast_gamma, + axis = self.axis, + epsilon = self.epsilon, + m_sigma = self.m_sigma, + hardware = self.hardware, + training=training) + else: + return ABN( + inputs, + self.moving_mean_DP, + self.moving_variance_DP, + self.beta, + self.gamma, + axis = self.axis, + epsilon = self.epsilon, + m_sigma = self.m_sigma, + hardware = self.hardware, + training=training) + + # If the learning phase is *static* and set to inference: + if training in {0, False}: + return normalize_inference() + + + # If the learning is either dynamic, or set to training: + (normed_training,mean_DP,variance_DP) = \ + norm_ABN_in_train( + inputs, self.beta, self.gamma, self.renorm, reduction_axes, + epsilon=self.epsilon,m_sigma=self.m_sigma,hardware=self.hardware,training=training); + # ??? + if K.backend() != 'cntk': + sample_size = K.prod([K.shape(inputs)[axis] + for axis in reduction_axes]) + sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) + if K.backend() == 'tensorflow' and sample_size.dtype != 'float32': + sample_size = K.cast(sample_size, dtype='float32') + + # sample variance - unbiased estimator of population variance + variance_DP *= sample_size / (sample_size - (1.0 + self.epsilon)) + + # Update moving mean and variance during training + self.add_update([K.moving_average_update(self.moving_mean_DP, + mean_DP, + self.momentum), + K.moving_average_update(self.moving_variance_DP, + variance_DP, + self.momentum)]) + + # Pick ABN result for either training or inference + return K.in_train_phase(normed_training, + normalize_inference, + training=training) + + + def get_config(self): + config = { + 'axis': self.axis, + 'momentum': self.momentum, + 'epsilon': self.epsilon, + 'center': self.center, + 'scale': self.scale, + 'renorm': self.renorm, + 'beta_initializer': initializers.serialize(self.beta_initializer), + 'gamma_initializer': initializers.serialize(self.gamma_initializer), + 'moving_mean_initializer': + initializers.serialize(self.moving_mean_initializer), + 'moving_variance_initializer': + initializers.serialize(self.moving_variance_initializer), + 'beta_regularizer': regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': constraints.serialize(self.beta_constraint), + 'gamma_constraint': constraints.serialize(self.gamma_constraint) + } + base_config = super(Analog_BN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + + return input_shape[1] +############################################## Internal functions ################################################## + +# Perform ABN +def ABN(V_DP,mov_mean_DP=0.0,mov_variance_DP=1.0,beta=0.0,gamma=0.0,axis=-1,epsilon=1e-5,m_sigma=1,hardware=None,training=False): + # Get min and max DR limits + VDD = hardware.sramInfo.VDD.data; + + r_gamma = hardware.sramInfo.r_gamma; + r_beta = hardware.sramInfo.r_beta; + OAres = hardware.sramInfo.OAres; + + Vmax_beta = hardware.sramInfo.Vmax_beta; + Vlsb_beta = Vmax_beta/2**(r_beta-1); + Vadc_step = VDD/(2**OAres); + + # Set 'None' parameters to their initial values + if gamma is None: + gamma = K.constant(1.0); + if beta is None: + beta = K.constant(0.0); + if mov_mean_DP is None: + mov_mean_DP = K.constant(DR_tuple[1]); + if mov_variance_DP is None: + mov_variance_DP = K.constant(1.0); + + # Specify non-centernormalized correction factors +# mu_goal = VDD/2; + sigma_goal = VDD/m_sigma; var_goal = sigma_goal*sigma_goal; + +# # Get custom renorm factors +# sigma_DP = K.sqrt(mov_variance_DP); +# mov_mean_DP_t = mov_mean_DP - mu_goal/sigma_goal*sigma_DP; +# # mov_mean_DP_t = K.zeros_like(mov_mean_DP); + mov_variance_DP_t = K.mean(mov_variance_DP)/var_goal; +# mov_variance_DP_t = mov_variance_DP/var_goal; +# # Get equivalent coefficients +# sigma_DP_t = K.sqrt(mov_variance_DP_t); + + gamma_eq = gamma/(K.sqrt(mov_variance_DP_t) + epsilon); + beta_eq = beta/gamma_eq - mov_mean_DP; + + # Restrict gain factor to power-of-2 + log_gamma_eq = round_through(tf.math.log(gamma_eq)/tf.math.log(2.)); + gamma_eq = K.pow(2.,log_gamma_eq); + + # Quantize results + gamma_eq = K.clip(round_through(gamma_eq),1,2**r_gamma); + V_beta = K.clip(round_through(beta_eq/Vlsb_beta)*Vlsb_beta,-Vmax_beta,Vmax_beta); + + # Model transfer function + V_ABN_temp = K.mean(gamma_eq)*((V_DP-VDD/2)+V_beta); + V_ABN = V_ABN_temp + VDD/2; + + # Quantize output + D_OUT = K.clip(floor_through(V_ABN/Vadc_step),0,2**OAres-1); + + # Debug + # tf.print("V_DP",V_DP[0:8,0]); + # tf.print("gamma",gamma); + # tf.print("gamma_eq",gamma_eq); + # tf.print("beta_eq",beta_eq[0:8]); + # tf.print("log_gamma",log_gamma_eq); + # tf.print("V_ABN",V_ABN[0:8,0]); + # tf.print("D_OUT",D_OUT[0:8,0]); + + + # Return (unclipped) result + return D_OUT; + # return V_ABN + +# Compute mean and variance of the batch then perform ABN with it, when enabled +def norm_ABN_in_train(V_DP,beta=0.0,gamma=1.0,renorm=True,axis=-1,epsilon=1e-5,m_sigma=1,hardware=None,training=False): + # Get min and max DR limits + VDD = hardware.sramInfo.VDD.data; + + # Compute mean and variance of each batch when desired + if(renorm): + # Model transfer function + V_out = V_DP-VDD/2; + + # Get mean and variance + mean_DP = K.mean(V_out,axis=0); + variance_DP = K.var(V_DP,axis=0); + else: + mean_DP = K.constant(0.0); + variance_DP = K.constant(1.0); + # Compute ABN with specified mean and variance + V_DP_BN = ABN(V_DP,mean_DP,variance_DP,beta,gamma,axis,epsilon,m_sigma,hardware,training); + # Return a tuple of BN_result, mean and variance + return (V_DP_BN,mean_DP,variance_DP); + + + diff --git a/layers/analog_BN_current_interp_PL.py b/layers/analog_BN_current_interp_PL.py index a11607dfc8c95788ebf8d8f1cb8a3edd713effcc..8ed80d4ca67ed4e858dbe34fb61ba2f8953b85e2 100644 --- a/layers/analog_BN_current_interp_PL.py +++ b/layers/analog_BN_current_interp_PL.py @@ -15,11 +15,6 @@ from keras import regularizers, initializers, constraints from keras.layers import Layer, Input, InputSpec from keras.models import Model -# Temporary folder -import tempfile -import sys -import subprocess -import time # Current ABN model from models.ABN_current import makeLookupABN, doInterpABN diff --git a/layers/quantized_layers_IMC.py b/layers/quantized_layers_IMC.py index 4f13a923eabdfc976ae916f85feac38d914fa59b..1267e4e2c7b04658146f357dbd992dab7c0ae9dd 100644 --- a/layers/quantized_layers_IMC.py +++ b/layers/quantized_layers_IMC.py @@ -11,14 +11,18 @@ from keras import initializers # Quantization functions from layers.binary_ops import binarize as binarize # Analog MAC operator -#from models.MAC_current import MAC_op_diff, MAC_op_se -from models.MAC_current import MAC_op_diff_num as MAC_op_diff -from models.MAC_current import MAC_op_se_num as MAC_op_se -from models.CONV_current import CONV_op_diff_num as CONV_op_diff -from models.CONV_current import CONV_op_se_num as CONV_op_se +#from models.MAC_current import MAC_op_current_diff, MAC_op_current_se +from models.MAC_current import MAC_op_diff_num as MAC_op_current_diff +from models.MAC_current import MAC_op_se_num as MAC_op_current_se +from models.MAC_charge import MAC_op_se_ana as MAC_op_charge_se +from models.CONV_current import CONV_op_diff_num as CONV_op_current_diff +from models.CONV_current import CONV_op_se_num as CONV_op_current_se +from models.CONV_charge import CONV_op_se_ana as CONV_op_charge_se + +from models.DTSE import DTSE_ideal, DTSE_PL +from models.MBIT_unit import MBIT_W_ideal, MBIT_W_num + from models.ADC import quant_uni -from models.DTSE import DTSE_PL -from models.DTSE import DTSE_ideal # Hardware parameters generation from utils.config_hardware_model import genHardware # Rounding facility @@ -48,14 +52,16 @@ class Clip(constraints.Constraint): "min_value": self.min_value, "max_value": self.max_value} +### --- FC (dense) CIM-aware layers --- ### -class QuantizedDense(Dense): - ''' Quantized Dense layer - References: +# --- Current-domain CIM --- +class CIM_current_dense(Dense): + ''' Current-domain CIM dense layer + Based on: "QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] ''' def __init__(self, units, H=1., nb=16, m_T_DP=1., sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs): - super(QuantizedDense, self).__init__(units, **kwargs) + super(CIM_current_dense, self).__init__(units, **kwargs) self.H = H self.nb = nb self.kernel_lr_multiplier = kernel_lr_multiplier @@ -74,7 +80,7 @@ class QuantizedDense(Dense): self.DTSE_LUT = None - super(QuantizedDense, self).__init__(units, **kwargs) + super(CIM_current_dense, self).__init__(units, **kwargs) def build(self, input_shape): assert len(input_shape) >= 2 @@ -159,15 +165,12 @@ class QuantizedDense(Dense): FLAG_PL = self.FLAG_PL; # Wrap correct MAC_op function - if(archType == '6T'): - MAC_op = MAC_op_diff; - elif(archType == '8T'): - MAC_op = MAC_op_se; + if(FLAG_SE_DP): + MAC_op = MAC_op_current_se; else: - raise NameError('Error: selected architecture (cell type) not supported during FC layer compute !\n'); - + MAC_op = MAC_op_current_diff; + # Apply T_DP transform - # T_DP_conf = K.clip(round_through(self.m_T_DP),1,8)/8; T_DP_conf = K.clip(round_through(self.m_T_DP),0,8-1); # Emulate CIM-SRAM analog DP operation, possibly with parallel macros @@ -196,9 +199,9 @@ class QuantizedDense(Dense): C_int_dtse = self.hardware.sramInfo.C_int_dtse.data; C_L_dtse = self.hardware.sramInfo.C_L_dtse.data; # Apply DTSE - V_DP = DTSE_PL(V_BL_bin,V_BLB_bin,C_int_dtse,C_L_dtse,self.DTSE_LUT,VDD_DTSE,self.nb); + V_DP.append(DTSE_PL(V_BL_bin,V_BLB_bin,C_int_dtse,C_L_dtse,self.DTSE_LUT,VDD_DTSE,self.nb)); else: - V_DP = DTSE_ideal(V_BL_bin,V_BLB_bin,VDD_DTSE,0,self.nb); + V_DP.append(DTSE_ideal(V_BL_bin,V_BLB_bin,VDD_DTSE,0,self.nb)); # Combine the result as if ideal charge-sharing (--> could implement actual charge-sharing !) if(FLAG_SE_DP): @@ -225,9 +228,9 @@ class QuantizedDense(Dense): C_int_dtse = self.hardware.sramInfo.C_int_dtse.data; C_L_dtse = self.hardware.sramInfo.C_L_dtse.data; # Apply DTSE - V_DP = DTSE_PL(V_DP_bin[0],V_DP_bin[1],C_int_dtse,C_L_dtse,self.DTSE_LUT,VDD_DTSE,self.nb); + V_DP = DTSE_PL(V_BL_bin,V_BLB_bin,C_int_dtse,C_L_dtse,self.DTSE_LUT,VDD_DTSE,self.nb); else: - V_DP = DTSE_ideal(V_DP_bin[0],V_DP_bin[1],VDD_DTSE,0,self.nb); + V_DP = DTSE_ideal(V_BL_bin,V_BLB_bin,VDD_DTSE,0,self.nb); # Add bias to PA (if used) if self.use_bias: @@ -239,7 +242,7 @@ class QuantizedDense(Dense): if(self.FLAG_QUANT): IAres = self.hardware.sramInfo.IAres OAres = self.hardware.sramInfo.OAres - NB = self.hardware.sramInfo.NB.data + NB = self.input_dim[-1] PAmax = (2**IAres-1)*NB DRval = self.hardware.sramInfo.DR.data; VDD = self.hardware.sramInfo.VDD.data; @@ -256,18 +259,212 @@ class QuantizedDense(Dense): config = {'H': self.H, 'kernel_lr_multiplier': self.kernel_lr_multiplier, 'bias_lr_multiplier': self.bias_lr_multiplier} - base_config = super(QuantizedDense, self).get_config() + base_config = super(CIM_current_dense, self).get_config() return dict(list(base_config.items()) + list(config.items())) + +# --- Charge-domain CIM --- +class CIM_charge_dense(Dense): + ''' Charge-domain CIM dense layer + Based on: + "QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] + ''' + def __init__(self, units, H=1., nb=16, m_T_DP=1., sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs): + super(CIM_charge_dense, self).__init__(units, **kwargs) + self.H = H + self.nb = nb + self.kernel_lr_multiplier = kernel_lr_multiplier + self.bias_lr_multiplier = bias_lr_multiplier + self.m_T_DP_init = m_T_DP; + + self.EN_NOISE = EN_NOISE + self.FLAG_QUANT = FLAG_QUANT + self.FLAG_PL = FLAG_PL + self.IS_TRAINABLE_DP = IS_TRAINABLE_DP + + self.sramInfo = sramInfo + self.hardware = None + self.MoM_noise = None + self.input_dim = None + + self.MBIT_LUT = None + + super(CIM_charge_dense, self).__init__(units, **kwargs) + + def build(self, input_shape): + assert len(input_shape) >= 2 + input_dim = input_shape[1] + self.input_dim = input_dim + if self.H == 'Glorot': + self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units))) + #print('Glorot H: {}'.format(self.H)) + if self.kernel_lr_multiplier == 'Glorot': + self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units))) + #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier)) + + self.kernel_constraint = Clip(-self.H, self.H) + self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) + # Binary values, nb-quantized weights in parallel columns + self.kernel = self.add_weight(shape=(input_dim, self.units*self.nb), + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) + if self.use_bias: + self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] + self.bias = self.add_weight(shape=(self.units,), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + else: + self.lr_multipliers = [self.kernel_lr_multiplier] + self.bias = None + + # Train DP timing config or not + if self.IS_TRAINABLE_DP: + self.m_T_DP = self.add_weight(shape=(1,), + initializer = initializers.get(tf.keras.initializers.Constant(value=1.)), + name = 'T_DP_conf', + regularizer = None, + constraint = None); + else: + self.m_T_DP = 1.; + + # Get DP electrical quantities for this layer + Nrows = self.sramInfo.Nrows.data + N_cim = int(math.ceil((input_dim-1)/Nrows)); + + # Generate hardware parameters + hardware = genHardware(self.sramInfo) + self.hardware = hardware + # Create V_th distribution + if(self.sramInfo.noise_at_inf): + if(self.sramInfo.IS_NUM): + self.MoM_noise = K.random_normal(shape=(K.int_shape(self.kernel)[-1],),mean=0.,stddev=1.); + else: + sig_MoM = self.hardware.sig_MoM + self.MoM_noise = K.random_normal(shape=K.int_shape(self.kernel),mean=0,stddev=sig_MoM) + else: + if(self.sramInfo.IS_NUM): + self.MoM_noise = K.zeros_like(K.int_shape(self.kernel)[-1]); + else: + self.MoM_noise = K.zeros_like(K.int_shape(self.kernel)); + + # Recover DTSE best-fitting coefficients + self.MBIT_LUT = self.sramInfo.MBIT_W_LUT; + + # Perform build + self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) + self.built = True + + + def call(self, inputs): + # Quantize weights in parallel columns + W_bin = binarize(self.kernel, H=self.H); + # Check if a single CIM-SRAM is sufficient, or ideal charge-share of their analog outputs + Nrows = self.hardware.sramInfo.Nrows.data; + N_cim = int(math.ceil((self.input_dim-1)/Nrows)); + # Retrieve architecture type (diff or se) and derive flag + archType = self.hardware.sramInfo.arch.name; + FLAG_SE_DP = (archType == '10TC'); + FLAG_PL = self.FLAG_PL; + + # Wrap correct MAC_op function + if(FLAG_SE_DP): + MAC_op = MAC_op_charge_se; + else: + raise NameError('Differential charge-domain operations not supported !'); + + # Apply T_DP transform + T_DP_conf = K.clip(round_through(self.m_T_DP),0,4-1); + + # Emulate CIM-SRAM analog DP operation, possibly with parallel macros + if(N_cim > 1): + # Separate inputs and weights in sub-matrices + inputs = tf.unstack(K.reshape(inputs,(-1,int(self.input_dim/N_cim),N_cim)),axis=-1) + W_bin = K.permute_dimensions(K.reshape(K.permute_dimensions(W_bin,(1,0)),(-1,int(self.input_dim/N_cim),N_cim)),(1,2,0)) + W_bin = tf.unstack(W_bin,axis=1) + # Perform CIM-SRAM operations over all sub-matrices (i.e. different CIM-SRAMs) + V_DP = []; V_BL = []; V_BLB = []; + for i in range(N_cim): + # Compute analog dot-product + V_DP_bin = MAC_op(self.hardware,inputs[i],W_bin[i],self.MoM_noise,T_DP_conf,self.EN_NOISE); + # Reshape outputs and apply spatial mbit weighting, whenever relevant + if(FLAG_SE_DP): + V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); + # Perform mbit-weight accumulation + if(FLAG_PL): + V_DP.append(MBIT_W_num(V_DP_bin,self.sramInfo,self.nb,self.MBIT_LUT)); + else: + V_DP.append(MBIT_W_ideal(V_DP_bin,self.sramInfo,self.nb)); + else: + raise NameError('Differential charge-domain operations not supported !'); + # Combine the result as if ideal charge-sharing between macro's + if(FLAG_SE_DP): + V_DP = K.sum(tf.stack(V_DP,axis=2),axis=2)/N_cim; + else: + raise NameError('Differential charge-domain operations not supported !'); + + # Single CIM-SRAM tile sufficient + else: + # Compute analog dot-product + V_DP_bin = MAC_op(self.hardware,inputs,W_bin,self.MoM_noise,T_DP_conf,self.EN_NOISE); + # Reshape outputs and apply DTSE, whenever relevant + if(FLAG_SE_DP): + V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); + # Perform mbit-weight accumulation + if(FLAG_PL): + V_DP = MBIT_W_num(V_DP_bin,self.sramInfo,self.MBIT_LUT); + else: + V_DP = MBIT_W_ideal(V_DP_bin,self.sramInfo); + else: + raise NameError('Differential charge-domain operations not supported !'); + + + # Add bias to DP result (if used) + if self.use_bias: + if(self.FLAG_QUANT): + V_DP = K.bias_add(V_DP, self.bias) + else: + V_DP = K.bias_add(V_DP,self.bias); + + # DBN: ADC quantization occurs at this point + if(self.FLAG_QUANT): + IAres = self.hardware.sramInfo.IAres; + OAres = self.hardware.sramInfo.OAres; + NB = self.input_dim; + PAmax = (2**IAres-1)*NB; + DRval = self.hardware.sramInfo.DR.data; + VDD = self.hardware.sramInfo.VDD.data; + #DO = V_DP; + DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType); + # Return quantized output + return DO + # ABN: propagate the analog value + else: + return V_DP + + + def get_config(self): + config = {'H': self.H, + 'kernel_lr_multiplier': self.kernel_lr_multiplier, + 'bias_lr_multiplier': self.bias_lr_multiplier} + base_config = super(CIM_charge_dense, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + -class QuantizedConv2D(Conv2D): +### --- CONV-2D CIM-aware layers --- +# --- Current-domain CIM --- +class CIM_current_conv2D(Conv2D): '''Quantized Convolution2D layer References: "QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] ''' def __init__(self, filters, m_T_DP=1, nRep=0, kernel_regularizer=None,activity_regularizer=None, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, H=1., nb=16, padding_num=0, sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, **kwargs): - super(QuantizedConv2D, self).__init__(filters, **kwargs) + super(CIM_current_Conv2D, self).__init__(filters, **kwargs) self.H = H self.nb = nb self.padding_num = padding_num @@ -395,9 +592,9 @@ class QuantizedConv2D(Conv2D): # Wrap correct CONV_op function if(archType == '6T'): - CONV_op = CONV_op_diff; + CONV_op = CONV_op_current_diff; elif(archType == '8T'): - CONV_op = CONV_op_se; + CONV_op = CONV_op_current_se; else: raise NameError('Error: selected architecture (cell type) not supported during 2DCONV layer compute !\n'); @@ -465,10 +662,194 @@ class QuantizedConv2D(Conv2D): config = {'H': self.H, 'kernel_lr_multiplier': self.kernel_lr_multiplier, 'bias_lr_multiplier': self.bias_lr_multiplier} - base_config = super(QuantizedConv2D, self).get_config() + base_config = super(CIM_current_Conv2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) +# --- Charge-domain CIM --- +class CIM_charge_conv2D(Conv2D): + '''Quantized Convolution2D layer + References: + "QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] + ''' + def __init__(self, filters, m_T_DP=1, nRep=0, kernel_regularizer=None,activity_regularizer=None, kernel_lr_multiplier='Glorot', + bias_lr_multiplier=None, H=1., nb=16, padding_num=0, sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, **kwargs): + super(CIM_current_Conv2D, self).__init__(filters, **kwargs) + self.H = H + self.nb = nb + self.padding_num = padding_num + self.m_T_DP_init = m_T_DP + self.nRep = 2**nRep; + self.kernel_lr_multiplier = kernel_lr_multiplier + self.bias_lr_multiplier = bias_lr_multiplier + self.activity_regularizer =activity_regularizer + self.kernel_regularizer = kernel_regularizer + + self.sramInfo = sramInfo + self.hardware = None + self.MoM_noise = None + + self.EN_NOISE = EN_NOISE + self.FLAG_QUANT = FLAG_QUANT + self.FLAG_PL = FLAG_PL + self.IS_TRAINABLE_DP = IS_TRAINABLE_DP + + self.MBIT_LUT = None + + def build(self, input_shape): + if self.data_format == 'channels_first': + channel_axis = 1 + else: + channel_axis = -1 + if input_shape[channel_axis] is None: + raise ValueError('The channel dimension of the inputs ' + 'should be defined. Found `None`.') + + # Replicate inputs along channel axis + input_dim = input_shape[channel_axis]; + kernel_shape = self.kernel_size + (input_dim, self.filters*self.nb) + + base = self.kernel_size[0] * self.kernel_size[1] + if self.H == 'Glorot': + nb_input = int(input_dim * base) + nb_output = int(self.filters * base) + self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output))) + #print('Glorot H: {}'.format(self.H)) + + if self.kernel_lr_multiplier == 'Glorot': + nb_input = int(input_dim * base) + nb_output = int(self.filters * base) + self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output))) + + self.kernel_constraint = Clip(-self.H, self.H) + self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) + self.kernel = self.add_weight(shape=kernel_shape, + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) -# Aliases + if self.use_bias: + self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] + self.bias = self.add_weight((self.filters*self.nb,), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + + else: + self.lr_multipliers = [self.kernel_lr_multiplier] + self.bias = None -QuantizedConvolution2D = QuantizedConv2D + # Train DP timing config or not + if self.IS_TRAINABLE_DP: + self.m_T_DP = self.add_weight(shape=(1,), + initializer = initializers.get(tf.keras.initializers.Constant(value=self.m_T_DP_init)), + regularizer = None, + constraint = None); + else: + self.m_T_DP = 1.; + + # Get DP electrical quantities for this layer + self.sramInfo.NB.data = base*input_dim; + # Generate hardware parameters + hardware = genHardware(self.sramInfo); + self.hardware = hardware; + # Create V_th distribution + if(self.sramInfo.noise_at_inf): + if(self.sramInfo.IS_NUM): + self.MoM_noise = K.random_normal(shape=(K.int_shape(self.kernel)[-1],),mean=0.,stddev=1.); + else: + sig_MoM = self.hardware.sig_MoM + self.MoM_noise = K.random_normal(shape=K.int_shape(self.kernel),mean=0,stddev=sig_MoM) + else: + if(self.sramInfo.IS_NUM): + self.MoM_noise = K.zeros_like(K.int_shape(self.kernel)[-1]); + else: + self.MoM_noise = K.zeros_like(K.int_shape(self.kernel)); + + # Recover DTSE best-fitting coefficients + self.MBIT_LUT = self.sramInfo.MBIT_W_LUT; + + # Set input spec. + self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) + self.built = True + + def call(self, inputs): + binary_kernel = binarize(self.kernel, H=self.H); + if self.data_format == 'channels_first': + binary_kernel = tf.tile(binary_kernel,(self.nRep,1,1,1)); + else: + binary_kernel = tf.tile(binary_kernel,(1,1,self.nRep,1)); + + # Retrieve architecture type (diff or se) and derive flag + archType = self.hardware.sramInfo.arch.name; + FLAG_SE_DP = (archType == '10TC') or self.FLAG_QUANT; + + # Apply T_DP transform + T_DP_conf = K.clip(round_through(self.m_T_DP),0,4-1); + + # Replicate inputs along channel axis + if self.data_format == 'channels_first': + inputs = tf.tile(inputs,[1,self.nRep,1,1]); + else: + inputs = tf.tile(inputs,[1,1,1,self.nRep]); + + # Wrap correct CONV_op function + if(FLAG_SE_DP): + CONV_op = CONV_op_charge_se; + else: + raise NameError('Error: selected architecture (cell type) not supported during 2DCONV layer compute !\n'); + + inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier + inputs_qnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\ + * inverse_kernel_lr_multiplier + + # Compute analog-like column-wise CIM-SRAM equivalent conv2d + outputs_qnn_gradient = CONV_op( + self.hardware, + inputs_qnn_gradient, + binary_kernel, + self.Vt_noise, + T_DP_conf, + self.data_format, + self.padding_num, + self.EN_NOISE) + + if(FLAG_SE_DP): + V_DP_bin = (outputs_qnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient))\ + * self.kernel_lr_multiplier + else: + raise NameError('Differential charge-domain operations not supported !'); + # Apply MBIT conversion, when relevant + if(FLAG_SE_DP): + V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); + # Perform mbit-weight accumulation + if(FLAG_PL): + V_DP = MBIT_W_num(V_DP_bin,self.sramInfo,self.MBIT_LUT); + else: + V_DP = MBIT_W_ideal(V_DP_bin,self.sramInfo); + else: + raise NameError('Differential charge-domain operations not supported !'); + + # DBN: apply ADC quantization + if(self.FLAG_QUANT): + IAres = self.hardware.sramInfo.IAres + OAres = self.hardware.sramInfo.OAres + NB = self.hardware.sramInfo.NB.data + PAmax = (2**IAres-1)*NB + DRval = self.hardware.sramInfo.DR.data; + VDD = self.hardware.sramInfo.VDD.data; + # Return digitized output + DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType); + return DO + # ABN: propagate analog value + else: + return V_DP + + + def get_config(self): + config = {'H': self.H, + 'kernel_lr_multiplier': self.kernel_lr_multiplier, + 'bias_lr_multiplier': self.bias_lr_multiplier} + base_config = super(CIM_charge_Conv2D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) diff --git a/models/ABN_charge.py b/models/ABN_charge.py new file mode 100644 index 0000000000000000000000000000000000000000..492a0229862368c76cc9c4cb769cb0a759a64b8a --- /dev/null +++ b/models/ABN_charge.py @@ -0,0 +1,77 @@ +########################################################## +######### HARDWARE MODEL FOR CURRENT-BASED ABN ########### +########################################################## +import sys +import numpy as np +import keras.backend as K +import tensorflow as tf +import tensorflow_probability as tfp + + +# /// Make 2D-lookup of coefficients for bilinear interpolation /// +# /// Bilinear equation: f(x,y) = a0 + a1*x +a2*y+a3*x*y +def makeLookupABN(ABN_lookup,x1_max,N1,x2_max,N2): + # Deduce input vectors + x1_vec = np.linspace(1,x1_max,N1); + x2_vec = np.linspace(0,x2_max,N2); + # Fill-in lookup for model coefficients + linearLookup = np.zeros((N1-1,N2-1,4)); + for i in range(N1-1): + for j in range(N2-1): + # Compute common den + den = (x1_vec[i]-x1_vec[i+1])*(x2_vec[j]-x2_vec[j+1]); + # Compute coefficients + a0 = ABN_lookup[i,j]*x1_vec[i+1]*x2_vec[j+1]-ABN_lookup[i,j+1]*x1_vec[i+1]*x2_vec[j] \ + -ABN_lookup[i+1,j]*x1_vec[i]*x2_vec[j+1]+ABN_lookup[i+1,j+1]*x1_vec[i]*x2_vec[j]; + a0 = a0/den; + a1 = -ABN_lookup[i,j]*x2_vec[j+1]+ABN_lookup[i,j+1]*x2_vec[j] \ + +ABN_lookup[i+1,j]*x2_vec[j+1]-ABN_lookup[i+1,j+1]*x2_vec[j]; + a1 = a1/den; + a2 = -ABN_lookup[i,j]*x1_vec[i+1]+ABN_lookup[i,j+1]*x1_vec[i+1] \ + +ABN_lookup[i+1,j]*x1_vec[i]-ABN_lookup[i+1,j+1]*x1_vec[i]; + a2 = a2/den; + a3 = ABN_lookup[i,j]-ABN_lookup[i,j+1]-ABN_lookup[i+1,j]+ABN_lookup[i+1,j+1]; + a3 = a3/den; + # Fill lookup + linearLookup[i,j,::] = np.array([a0,a1,a2,a3]); + # Make numpy array into constant tensor + linearLookup = linearLookup.astype("float32"); + linearLookup = tf.constant(linearLookup); + # Return table + return linearLookup; + +# /// Apply custom 2D interpolation /// +def doInterpABN(ABN_lookup,x1,x2,x1_max,N1,x2_max,N2): + # Possibly reshape x2 if CONV layer + x2_shape = tf.shape(x2); + Vlsb = x2_max/(N2-1); + x2 = floor_through(tf.reshape(x2/Vlsb,(-1,x2_shape[-1])))*Vlsb; + # Get indices + ind_x1 = K.clip(tf.math.floor(x1/x1_max*N1),0,(N1-1)-1); ind_x1 = K.cast(ind_x1,"int32"); + ind_x2 = K.clip(tf.math.floor(x2/x2_max*(N2-1)),0,(N2-1)-1); ind_x2 = K.cast(ind_x2,"int32"); + # Get corresponding coefficients + coef_vec = tf.gather_nd(ABN_lookup,tf.stack([ind_x1*K.ones_like(ind_x2),ind_x2],axis=2)); + # Perform interpolation + V_ABN = coef_vec[...,0]+coef_vec[...,1]*x1+coef_vec[...,2]*x2+coef_vec[...,3]*x1*x2; + + # Possibly reshape V_ABN to CONV format + V_ABN = tf.reshape(V_ABN,x2_shape); + # Return interpolated result + return V_ABN + +## ///////////////////// Internal functions ////////////////////// +def floor_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + floored = tf.math.floor(x); + floored_through = x + K.stop_gradient(floored - x); + return floored_through; + +def round_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + rounded = K.round(x); + rounded_through = x + K.stop_gradient(rounded - x); + return rounded_through; diff --git a/models/Analog_DP.py b/models/Analog_DP.py index b860aec2eed461de0901af7008a956439683534b..0c69eaaf4fdf147e29c8c2c1ada8db2f5e2df29c 100644 --- a/models/Analog_DP.py +++ b/models/Analog_DP.py @@ -7,7 +7,9 @@ import keras.backend as K import tensorflow as tf import numpy as np import math + from utils.linInterp import doInterpDP_2D +from models.MBIT_unit import MBIT_IN_actual ########################### NUMERICAL, INTERPOLATION-BASED MODELS ############################### def int_BL_num(hardware,IA,W_array,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE): @@ -19,14 +21,12 @@ def int_BL_num(hardware,IA,W_array,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE # Compute ideal BL/BLB results to get indexes BL_ind = K.dot(IA,W_array); y_shape = tf.shape(BL_ind); - # tf.print("BL index",BL_ind[0][0:8]); # Perform 3D interpolation (--- 2D until we get 3D tables ---) V_BL_nom_0 = doInterpDP_2D(BL_LUT[...,0],T_DP_conf,BL_ind[...,0::4],T_DP_vec,Ndp,Nrows//8); V_BL_nom_1 = doInterpDP_2D(BL_LUT[...,1],T_DP_conf,BL_ind[...,1::4],T_DP_vec,Ndp,Nrows//8); V_BL_nom_2 = doInterpDP_2D(BL_LUT[...,2],T_DP_conf,BL_ind[...,2::4],T_DP_vec,Ndp,Nrows//8); V_BL_nom_3 = doInterpDP_2D(BL_LUT[...,3],T_DP_conf,BL_ind[...,3::4],T_DP_vec,Ndp,Nrows//8); V_BL_nom = tf.stack([V_BL_nom_0,V_BL_nom_1,V_BL_nom_2,V_BL_nom_3],axis=-1); - # tf.print("V_BL",V_BL_nom[0][0:8]); # Reshape into the right order V_BL_nom = tf.reshape(V_BL_nom,y_shape); @@ -35,12 +35,37 @@ def int_BL_num(hardware,IA,W_array,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE # Reshape into the right order dist_train = K.random_normal(shape=tf.shape(V_BL_nom),mean=0.,stddev=1.,dtype='float32'); sig_V_BL = sig_V_BL*dist_train; - # tf.print("sig V_BL",sig_V_BL[0]); else: sig_V_BL = K.zeros_like(V_BL_nom); V_BL = V_BL_nom + sig_V_BL; return V_BL; +def int_DP_num(hardware,IA,W_array,DP_LUT,T_DP_conf,dist_inf,sig_DP_LUT,EN_NOISE): + # Local variables + Nrows = hardware.sramInfo.Nrows.data; + IAres = hardware.sramInfo.IAres; + Ndp = (2**IAres-1)*Nrows; + T_DP_vec = hardware.sramInfo.T_DP_vec; + # Compute ideal BL/BLB results to get indexes + DP_ind = K.dot(IA,W_array); + y_shape = tf.shape(DP_ind); + # Perform 3D interpolation (--- 2D until we get 3D tables ---) + V_DP_nom = doInterpDP_2D(DP_LUT,T_DP_conf,DP_ind,T_DP_vec,Ndp,Nrows//8); + # Add nose, when enabled + if(EN_NOISE): + sig_V_DP = doInterpDP_2D(sig_DP_LUT,T_DP_conf,DP_ind,T_DP_vec,Ndp,Nrows//8); + # Reshape into the right order + dist_train = K.random_normal(shape=tf.shape(V_DP_nom),mean=0.,stddev=1.,dtype='float32'); + sig_V_DP = sig_V_DP*dist_train; + else: + sig_V_DP = K.zeros_like(V_DP_nom); + V_DP = V_DP_nom + sig_V_DP; + + # Perform multi-bit accumulation + V_DP = MBIT_IN_actual(V_DP,hardware.sramInfo); + + return V_DP; + ##################################### ANALYTICAL MODELS ######################################### # This model supposes the following: # - Access transistors always in saturation @@ -105,7 +130,6 @@ def int_BL_DAC(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): if(Vov_max >= 0): # Compute equivalent ON conductance if(EN_NOISE): - # Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1) + 2/sqrt(Nstates-1)*Vov_max*sig_Vt_mat*K.sqrt(K.clip(MAC_val,1e-10,None)) + (sig_Vt_mat*sig_Vt_mat)*N_on; Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1) + 2/(Nstates-1)*Vov_max*K.sum(Non_vec,axis=-1)*sig_Vt_mat + (sig_Vt_mat*sig_Vt_mat)*N_on; else: Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1); @@ -113,8 +137,6 @@ def int_BL_DAC(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): gamma = beta*W/L/C_BL/2*(a1*Geq_tot+a2*Geq_tot*Geq_tot); # Compute BL integrated voltage V_BL = V0*K.exp(-gamma/Vea*T_DP)-(Vea-V_Q0)*(1-K.exp(-gamma/Vea*T_DP))+b1; - # tf.print("FC one-sided DP value: ",K.transpose(MAC_val[1,:]),summarize=-1) - # tf.print("FC BL voltage: ",K.transpose(V_BL[1,:]),summarize=-1) else: # Get the expression of the equivelent ON-conductance if(EN_NOISE): @@ -129,7 +151,6 @@ def int_BL_DAC(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): V_BL = K.clip(V_BL,0,VDD); return V_BL - # Voltage integration Model with PWM input def int_BL_PWM(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): # Retrieve resolution @@ -175,7 +196,6 @@ def int_BL_PWM(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): if(Vov_max >= 0): sig_Vt_mat = K.random_normal(shape=tf.shape(MAC_val),mean=0.,stddev=hardware.sig_Vth,dtype='float32'); sig_Vt_mat = K.in_train_phase(sig_Vt_mat,sig_Vt_inf[0]); - # sig_Vt_mat = K.in_train_phase(sig_Vt_mat,K.zeros_like(sig_Vt_mat)); else: sig_Vt_mat = K.random_normal(shape=tf.shape(W_array),mean=0.,stddev=hardware.sig_Vth,dtype='float32'); bern_matrix = tf.random.uniform(shape=tf.shape(W_array),maxval=1); @@ -196,8 +216,6 @@ def int_BL_PWM(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): gamma = beta*W/L/C_BL/2*(a1*Geq_tot+a2*Geq_tot*Geq_tot); # Compute BL integrated voltage V_BL = V0*K.exp(-gamma/Vea*T_DP)-(Vea-V_Q0)*(1-K.exp(-gamma/Vea*T_DP))+b1; - # tf.print("FC one-sided DP value: ",K.transpose(MAC_val[1,:]),summarize=-1) - # tf.print("FC BL voltage: ",K.transpose(V_BL[1]),summarize=5) else: # Compute equivalent ON conductance I_S0 = 0.2e-4; @@ -219,8 +237,38 @@ def int_BL_PWM(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): # Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1); V_BL = Ut*K.log(1+(K.exp(V0/Ut)-1)*K.exp(-a1*tau_DP*alpha_I/(Nstates-1)*((MAC_val+a4*MAC_val*MAC_val)*K.exp(K.constant(a2*(Vov_max+a3)/(n_body*Ut),dtype="float32")) + (Nrem+a4*Nrem*Nrem)*K.exp(K.constant(a2*(-mu_Vt+a3)/(n_body*Ut),dtype="float32")))+b1))-2.5e-4*V_WL_max*(NB-K.dot(IA_zero,K.ones_like(W_array))); - # tf.print("MAC",MAC_val[0],summarize=5); - # tf.print("V_BL",V_BL[0],summarize=5); # Clip result, waiting for triode V_BL = K.clip(V_BL,0,VDD); return V_BL + +# Charge-based DP model +def int_DP_cap(hardware,IA,W,Nunit,sig_MoM,EN_NOISE): + # Retrieve parameters + VDDL = hardware.sramInfo.VDD.data/2; + IAres = hardware.sramInfo.IAres; + Nrows = hardware.sramInfo.Nrows.data; + C_unit = hardware.sramInfo.C_unit; + Cc = hardware.sramInfo.Cc; + C_array = hardware.sramInfo.C_array; #32f (?) + C_adc = hardware.sramInfo.C_adc; #25.2f + C_mbit = hardware.sramInfo.C_mbit; #14.2f + # Get total amortization cap + Cp = (Nunit*C_unit)*Cc+Nunit/(Nrows/C_unit)*C_array+C_adc+C_mbit; + + # Perform DP operation + dim_IA = K.int_shape(IA); + IA = tf.linalg.matrix_transpose(IA); + V_DP = VDDL*(1+(Cc/Cp)*K.dot(IA,W)); + V_DP = tf.linalg.matrix_transpose(V_DP); + + # Accumulate multi-bit inputs + V_MBIT = MBIT_IN_actual(V_DP,hardware.sramInfo); + + # Debugs + #tf.print("V_DP",V_DP[0:8,0]); + #tf.print("V_MBIT",V_MBIT[0:8,0]); + + # Return mbit accumulated DP + return V_MBIT; + + \ No newline at end of file diff --git a/models/CONV_charge.py b/models/CONV_charge.py new file mode 100644 index 0000000000000000000000000000000000000000..acb80ac7ecccd5efa33d4bbee94b710422b95268 --- /dev/null +++ b/models/CONV_charge.py @@ -0,0 +1,170 @@ +############################################## +###### This file models the 6T CIM-SRAM ###### +###### stochastic MAC operation ###### +############################################## +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import numpy as np +from models.Analog_DP import int_DP_num, int_DP_cap + +########################### NUMERICAL, INTERPOLATION-BASED MODELS ############################### +# Convolution operation with Toeplitz matrix, input channels on repeated weights +def CONV_op_se_num(hardware,IA,W,dist_inf,T_DP_conf,data_format,padding=0,EN_NOISE=False,EN_QUANT=False): + # Check channel position and always put it second + if(data_format == 'channels_last'): + input_size = K.int_shape(IA)[1]; + n_channels_in = K.int_shape(IA)[-1]; + IA = K.permute_dimensions(IA,(0,3,1,2)); + elif(data_format != 'channels_first'): + input_size = K.int_shape(IA)[-1]; + n_channels_in = K.int_shape(IA)[1]; + raise ValueError('Unknown data format: ' + str(data_format)) + # Retrieve LUTs + DP_LUT = hardware.sramInfo.DP_LUT; + sig_DP_LUT = hardware.sramInfo.sig_DP_LUT; + # Retrieve physical quantities & config + IAres = hardware.sramInfo.IAres; + C_unit = hardware.sramInfo.C_unit; + sparse_mode = (hardware.sramInfo.input_mode == 'sparse'); + # Derive all dimensions + kern_size = K.int_shape(W); + filter_size = kern_size[0]; + # padding_size = int(tf.math.ceil((filter_size-1)/2)) + padding_size = padding; + inc_dim = padding_size-(kern_size[0]-1); + # Get corresponding LUTs + C_in_conf = np.ceil(np.log2(np.ceil(filter_size*filter_size*n_channels_in//C_unit))); + C_in_conf = C_in_conf.astype("int32"); + DP_LUT = DP_LUT[C_in_conf]; + sig_DP_LUT = sig_DP_LUT[C_in_conf]; + # Transform input into Toeplitz matrix for dot-product operation + IA_t = toeplitz(IA,filter_size,padding_size); + # Reshape IA_t to add the channels during the DP + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,(input_size+inc_dim)*(input_size+inc_dim),filter_size*filter_size*n_channels_in)); + # One-hot input vector for bit serial processing + IA_t = tf.math.floormod(tf.bitwise.right_shift(tf.expand_dims(IA_t,-1), tf.range(IAres)), 2);# Mode-dependent input values + if(sparse_mode): + IA_t = IA_t; + else: + IA_t = 2*IA_t-1; + + # Reshape weights in (K^2*C_in,C_out) format + W = K.permute_dimensions(W,(3,2,0,1)) + W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) + W = K.permute_dimensions(W,(1,0)) + + # Perform differential computation + V_DP = int_DP_num(hardware,IA_t,W_BL,DP_LUT,T_DP_conf,dist_inf,sig_DP_LUT,EN_NOISE) + + # Reshape output + V_DP = K.permute_dimensions(V_DP,(0,2,1)) + V_DP = K.reshape(V_DP,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + if(data_format == 'channels_last'): + V_DP = K.permute_dimensions(V_DP,(0,2,3,1)) + + + +##################################### ANALYTICAL MODELS ######################################### +# Convolution operation with Toeplitz matrix, input channels on repeated weights +def CONV_op_se_ana(hardware,IA,W,sig_MoM_inf,data_format,EN_NOISE,EN_QUANT): + # Check channel position and always put it second + if(data_format == 'channels_last'): + input_size = K.int_shape(IA)[1]; + n_channels_in = K.int_shape(IA)[-1]; + IA = K.permute_dimensions(IA,(0,3,1,2)); + elif(data_format != 'channels_first'): + input_size = K.int_shape(IA)[-1]; + n_channels_in = K.int_shape(IA)[1]; + raise ValueError('Unknown data format: ' + str(data_format)) + + # Retrieve physical quantities & config + C_unit = hardware.sramInfo.C_unit.data; + sparse_mode = (hardware.sramInfo.input_mode == 'sparse'); + # Derive all dimensions + kern_size = K.int_shape(W); + filter_size = kern_size[0]; + Nunit = np.ceil(filter_size*filter_size*n_channels_in//C_unit); + + padding_size = int(tf.math.ceil((filter_size-1)/2)) + conv_size = input_size+padding_size + # Transform input into Toeplitz matrix for dot-product operation + IA_t = toeplitz(IA,filter_size); + # Reshape IA_t to add the channels during the DP + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,input_size*input_size,filter_size*filter_size*n_channels_in)); + # One-hot input vector for bit serial processing + IA_t = tf.unpackbits(IA_t); + # Mode-dependent input values + if(sparse_mode): + IA_t = IA_t; + else: + IA_t = 2*IA_t-1; + + # Reshape weights in (K^2*C_in,C_out) format + W = K.permute_dimensions(W,(3,2,0,1)) + W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) + W = K.permute_dimensions(W,(1,0)) + # Compute parallel dot-products corresponding to zero-padded convolutions + V_DP = int_DP_cap(hardware,IA_t,W,Nunit,sig_MoM_inf,EN_NOISE) + + # Reshape output + V_DP = K.permute_dimensions(V_DP,(0,2,1)) + V_DP = K.reshape(V_DP,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + if(data_format == 'channels_last'): + V_DP = K.permute_dimensions(V_DP,(0,2,3,1)) + + +############################### INTERNAL FUNCTIONS (IM2COL FACILITIES) #################################### + +# Transform multi-D input feature map into batchsize+2D matrix +def toeplitz(x,filter_size,padding_size): + # Get x dimensions (input feature map assumed to be have square 2D dims) + x_dim = K.int_shape(x); + # Get filter dimensions + x_size = x_dim[-1]; + c_in = x_dim[1]; + inc_dim = (filter_size-2)-padding_size; + # Get output + conv_size = x_size+2*padding_size; # Static padding of the input vector + x_pad = K.spatial_2d_padding(x,padding=((padding_size,padding_size),(padding_size,padding_size)),data_format='channels_first') + # Create set of toeplitz matrices from padded matrix rows + x_col = tf.unstack(x_pad,axis=-1); + + #shift_list = [-shift_val for shift_val in range(filter_size)]; + x_shift = [] + for i in range(filter_size): + x_temp = K.permute_dimensions(tf.stack(x_col[i:i+(x_size-2*inc_dim)],axis=2),(0,1,3,2)) + x_shift.append(tf.unstack(x_temp,axis=2)) + # print(K.int_shape(x_temp)) + # print(K.int_shape(x_shift[i])) + + x_toep = []; + for i in range(conv_size): + x_temp = [] + for j in range(filter_size): + # print('conv size: '+str(i)) + # print('filt size: '+str(j)) + # print(K.int_shape(x_shift[j][i])); + + x_temp.append(x_shift[j][i]) + x_toep.append(K.permute_dimensions(tf.stack(x_temp,axis=2),(0,1,3,2))) +# print(K.eval(x_toep[i])) + # Create final input matrix from the Toeplitz submatrices + x_b = []; + for i in range(filter_size): + x_b.append(K.concatenate(x_toep[i:i+(x_size-2*inc_dim)],axis=2)) + #print(K.eval(x_b[i][1,1,:,:])) + # Final tensor with size (batch_size,c_in,output_dim,output_dim) + x_out = K.concatenate(x_b,axis=-1); + # Permute dimensions to perform the correct DP + return x_out; + +# Image to column transform +def im2col(x,filter_size,padding_type): + image_patches = tf.image.extract_patches(x, + [1, filter_size, filter_size, 1], + [1, 1, 1, 1], [1, 1, 1, 1], + padding=padding_type); + print(K.int_shape(image_patches)); + return image_patches; + diff --git a/models/DTSE.py b/models/DTSE.py index 16ea55f37ebf0f95ffc4f58246a52c5e5dfe26ba..c3faa4d7252d3fe1ecc5fc708d54ccb55665261a 100644 --- a/models/DTSE.py +++ b/models/DTSE.py @@ -10,8 +10,8 @@ import math ## /// Simple DP binary-weighting /// def MBIT_weight(V_DP,Wres): - weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_BL)); - V_DP = weightsVec*V_DP/K.sum(weightsVec); + weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_BL))/(2**Wres); + V_DP = K.sum(V_DP*weightsVec,axis=-1); # Return binary-weighted value return V_DP; @@ -19,8 +19,8 @@ def DTSE_ideal(V_BL,V_BLB,VDD,V_beta,Wres): # Perform single-bit DTSE V_DP = (VDD+V_beta)/2 + (V_BL-V_BLB)/2; # Weight DTSE as should be - weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_BL)); - V_DP = weightsVec*V_DP/K.sum(weightsVec); + weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_BL))/(2**Wres); + V_DP = K.sum(V_DP*weightsVec,axis=-1); # Return single-ended value return V_DP @@ -36,7 +36,7 @@ def DTSE_paras(V_BL,V_BLB,VDD,V_beta,C_int_BL,C_int_BLB,C_L,Cp1,Cp2,Cp3,offset,W # Parasitic-aware DTSE model V_DP = 1/(C_int_BLB+C_int_BL+C_L+Cp1+4*Cp2+Cp3)*((C_int_BLB+Cp1)*(VDD+V_beta) - + (C_int_BL+C_L+2*Cp2+Cp3)*V_BL - C_int_BLB*V_BLB) + offset; + + (C_int_BL+C_L+2*Cp2+Cp3)*V_BL - C_int_BLB*V_BLB) + offset; # ! Outdated model ! # Return DP result return V_DP; diff --git a/models/MAC_charge.py b/models/MAC_charge.py new file mode 100644 index 0000000000000000000000000000000000000000..86a3c79f24d4576b10e520e88175d38f1eb610d1 --- /dev/null +++ b/models/MAC_charge.py @@ -0,0 +1,96 @@ +############################################## +###### This file models the 6T CIM-SRAM ###### +###### stochastic MAC operation ###### +############################################## +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import numpy as np +from models.Analog_DP import int_BL_num, int_BL_DAC, int_BL_PWM +from models.Analog_DP import int_DP_num, int_DP_cap + + + +########################### NUMERICAL, INTERPOLATION-BASED MODELS ############################### +def MAC_op_se_num(hardware,IA,W,dist_inf,T_DP_conf,EN_NOISE): + # Retrieve LUTs + DP_LUT = hardware.sramInfo.DP_LUT; + sig_DP_LUT = hardware.sramInfo.sig_DP_LUT; + # Retrieve physical quantities & config + IAres = hardware.sramInfo.IAres; + C_unit = hardware.sramInfo.C_unit; + sparse_mode = (hardware.sramInfo.input_mode == 'sparse'); + # Retrieve inputs dimensions & select desired LUT + dim_IA = K.int_shape(IA); + C_in_conf = np.ceil(np.log2(np.ceil(dim_IA[-1]//C_unit))); + C_in_conf = C_in_conf.astype("int32"); + DP_LUT = DP_LUT[C_in_conf]; + sig_DP_LUT = sig_DP_LUT[C_in_conf]; + + # One-hot input vector for bit-serial input processing + IA = tf.reshape(tf.repeat(IA,IAres,axis = -1),(-1,dim_IA[1],IAres)); + IA = tf.math.floormod(IA, 2**np.arange(IAres)); + IA = K.permute_dimensions(K.reverse(IA,axes=-1),(0,2,1)); + # Mode-dependent input values + if(sparse_mode): + IA = 2*IA-1; + IA = tf.expand_dims(IA[...,0],-1)*IA - tf.expand_dims((1-IA[...,0]),-1)*(1-IA); + else: + IA = 2*IA-1; + # Get actual values from LUTs using indexes + V_DP = int_DP_num(hardware,IA,W,DP_LUT,T_DP_conf,dist_inf,sig_DP_LUT,EN_NOISE); + # debug + # tf.print("DP conf",T_DP_conf); + return V_DP + + +##################################### ANALYTICAL MODELS ######################################### +# Differential MAC operation +def MAC_op_se_ana(hardware,IA,W,sig_MoM_inf,T_DP_conf,EN_NOISE): + # Get parameters + IAres = hardware.sramInfo.IAres; + C_unit = hardware.sramInfo.C_unit; + sparse_mode = (hardware.sramInfo.input_mode == 'sparse'); + # Retrieve inputs dimensions & select desired LUT + dim_IA = K.int_shape(IA); + Nunit = 2**np.ceil(np.log(np.ceil(dim_IA[-1]//C_unit))/np.log(2)); + Nunit = Nunit.astype("int32"); + + # One-hot input vector for bit-serial input processing + #tf.print("IA init",IA[0,0:12],summarize=50); + expVec = K.cast(K.arange(IAres),dtype=K.dtype(IA)); + IA = tf.repeat(tf.expand_dims(IA,axis=-1),IAres,axis=-1); + IA = tf.math.floormod(floor_through(IA/K.pow(2.,expVec)),2.); + IA = K.reverse(IA,axes=-1); + +# IA_flat = []; +# for i in range(IAres): +# if(i==0): +# IA_flat.append(tf.where(K.greater_equal(IA,2**(IAres-1)),K.ones_like(IA),K.zeros_like(IA))); +# else: +# IA_flat.append(tf.where(K.greater_equal(tf.math.floormod(IA,2**i),2**(IAres-i-1)),K.ones_like(IA),K.zeros_like(IA))); + +# IA = K.reverse(tf.stack(IA_flat,axis=-1),axes=-1); + + #tf.print("IA trans",IA[0,0:12],summarize=50); + # Mode-dependent input values + if(sparse_mode): + IA = 2*IA-1; + IA = tf.expand_dims(IA[...,0],-1)*IA - tf.expand_dims((1-IA[...,0]),-1)*(1-IA); + else: + IA = 2*IA-1; + # Get actual values from LUTs using indexes + V_DP = int_DP_cap(hardware,IA,W,Nunit,sig_MoM_inf,EN_NOISE); + # Return result + return V_DP + + +## ///////////////////// Internal functions ////////////////////// +def floor_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + floored = tf.math.floor(x); + floored_through = x + K.stop_gradient(floored - x); + return floored_through; + diff --git a/models/MBIT_unit.py b/models/MBIT_unit.py new file mode 100644 index 0000000000000000000000000000000000000000..3c5783d35e0356487ad666c059ad1dce72ce5dca --- /dev/null +++ b/models/MBIT_unit.py @@ -0,0 +1,87 @@ +############################################# +######### HARDWARE MODEL FOR DTSE ########### +############################################# +import sys +import numpy as np +import keras.backend as K +import tensorflow as tf +import math + +## --- Sequential input accumulation --- +# /// Ideal model /// +def MBIT_IN_ideal(V_DP,sramInfo): + # Retrieve inputs precision + IAres = sramInfo.IAres; + VDDL = sramInfo.VDD.data/2; + # Get accumulated input + if(IAres == 1): + V_DP = tf.squeeze(V_DP,axis=-1); + else: + V_DP = VDDL+K.dot(V_DP-VDDL,K.pow(2,K.arange(0,IAres)))/K.pow(2,IAres); + return V_DP; + +# /// Actual model /// +def MBIT_IN_actual(V_DP,sramInfo): + # Retrieve inputs precision + IAres = sramInfo.IAres; + VDDL = sramInfo.VDD.data/2; + C_accdp = sramInfo.C_accdp; #40f + C_adc = sramInfo.C_adc; #25.2f + C_mbit = sramInfo.C_mbit; #14.2f + # Get non-half attenuation + alpha = C_accdp/(C_accdp+C_adc+C_mbit); + # Get accumulated input + if(IAres == 1): + V_DP = tf.squeeze(V_DP,axis=-1); + else: + V_DP = K.pow(alpha,IAres)*VDDL+(1-alpha)*K.dot(V_DP,tf.expand_dims(K.pow(alpha,K.arange(0.,IAres)),-1)); + V_DP = tf.squeeze(V_DP,axis=-1); + return V_DP; + +# /// LUT-based numerical model /// +def MBIT_IN_num(V_DP,sramInfo): + # do stuff + + return V_DP + + +## --- Spatial binary weighting model --- +# /// Simple DP binary-weighting /// +def MBIT_W_ideal(V_DP,sramInfo): + # Retrieve weights precision + Wres = sramInfo.Wres; + # Get accumulated result + if(Wres == 1): + V_DP = tf.squeeze(V_DP,axis=-1); + else: + weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_DP)); + V_DP = K.dot(V_DP,tf.expand_dims(weightsVec,axis=-1)); + V_DP = tf.squeeze(V_DP,axis=-1)/K.sum(weightsVec); + # Return binary-weighted value + return V_DP; + +# /// Actual cap-wise binary weighting (model) /// +def MBIT_W_actual(V_DP,sramInfo): + # Retrieve weights precision + Wres = sramInfo.Wres; + VDDL = sramInfo.VDD.data/2; + C_accdp = sramInfo.C_accdp; #40f + C_adc = sramInfo.C_adc; #25.2f + C_mbit = sramInfo.C_mbit; #14.2f + # Get accumulated result + if(Wres == 1): + V_DP = tf.squeeze(V_DP,axis=-1); + else: + weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_DP)); + V_DP = K.dot(V_DP,tf.expand_dims(weightsVec,axis=-1)); + V_DP = tf.squeeze(V_DP,axis=-1)/K.sum(weightsVec); + # Return binary-weighted value + return V_DP; + +# /// Actual cap-wise binary weighting (LUT) /// +def MBIT_W_num(V_DP,sramInfo,LUT): + # Retrieve weights precision + Wres = sramInfo.Wres; + # Return binary-weighted value + return V_DP; + diff --git a/models/makeModel.py b/models/makeModel.py index 92821e278ab55917d84e4ca74ce3fc3c2f0b2b8b..b0acac26c8d749df79425134953acaf1f767166d 100644 --- a/models/makeModel.py +++ b/models/makeModel.py @@ -25,13 +25,13 @@ def make_model(model_type,cf,Conv_,Conv,Dens_,Dens,Act,Quant,BatchNormalization, #model.add(Dropout(0.5)) model.add(Dens_(512,cf.dim,cf.channels,6.)) #model.add(Quant((pow(2,cf.abits)-1)*cf.dim*cf.dim*cf.channels)) - model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4)) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4.)) model.add(Act()) model.add(Dropout(0.)) model.add(Dens(256,4.)) #model.add(Quant((pow(2,cf.abits)-1)*512)) - model.add(BatchNormalization(512,2)) + model.add(BatchNormalization(512,2.)) model.add(Act()) model.add(Dropout(0.)) @@ -301,52 +301,5 @@ def make_model(model_type,cf,Conv_,Conv,Dens_,Dens,Act,Quant,BatchNormalization, model.summary() return model - -### Return the input size of all (unique) layers if a model ### -def getModelSize(modelType,cf): - if(modelType == 'TEST_MLP'): - first_dim = cf.dim*cf.dim*cf.channels; - model_size = np.array([first_dim]); - elif(modelType == 'MLP_512'): - model_size = np.array([first_dim,512]); - elif(modelType == '2C2D'): - first_size = cf.kern_size*cf.kern_size*cf.channels; - model_size = np.array([first_size, - cf.kern*cf.kern*64, - cf.dim/2*cf.dim/2*64, - 512]); - elif(modelType == 'BinaryNet'): - first_size = cf.kern_size*cf.kern_size*cf.channels; - model_size = np.array([first_size, - cf.kern_size*cf.kern_size*64, - cf.kern_size*cf.kern_size*64/4, - cf.kern_size*cf.kern_size*128, - cf.kern_size*cf.kern_size*128/4, - cf.kern_size*cf.kern_size*256, - cf.kern_size*cf.kern_size*256/4, - cf.kern_size*cf.kern_size*512, - cf.dim/2*cf.dim/2*512, - 1024, - 1024]); - else: - raise NameError('Network topology not supported, please select a valid one !\n'); - - return model_size; - -### Return a 1D-array with the position indexes of the FC/CONV layers in the selected sequential CNN ### -def getIndexOut(modelType): - if(modelType == 'TEST_MLP'): - indX_out = np.array([0,3]); - elif(modelType == 'MLP_three_stage_dbn'): - indX_out = np.array([0,4,8]); - elif(modelType == 'MLP_512'): - indX_out = np.array([0,4,8,12,16]); - elif(modelType == '2C2D'): - indX_out = np.array([0,3,8,12]); - elif(modelType == 'Valavi_2019_MNIST'): - indX_out = np.array([0,3,7,10,16]); - else: - raise NameError('Network topology not supported, please implement it or select a valid one !\n'); - - return indX_out; + \ No newline at end of file diff --git a/models/model_IMC.py b/models/model_IMC.py index 0dabd287de5ac31495935e9eac979e365ad36656..d675cb482a345f887d04f7def8d838aad72b9282 100644 --- a/models/model_IMC.py +++ b/models/model_IMC.py @@ -5,10 +5,14 @@ from keras.layers.advanced_activations import LeakyReLU from keras.regularizers import l2 import numpy as np -from layers.analog_BN_current_model import Analog_BN as Analog_BN_ideal -from layers.analog_BN_current_interp_PL import Analog_BN as Analog_BN_nonideal -from layers.quantized_layers_IMC import QuantizedConv2D,QuantizedDense -#from layers.quantized_layers_IMC_ABN import QuantizedDenseABN +from layers.analog_BN_current_model import Analog_BN as Analog_BN_current_ideal +from layers.analog_BN_current_interp_PL import Analog_BN as Analog_BN_current_nonideal +from layers.analog_BN_charge_model import Analog_BN as Analog_BN_charge_ideal +from layers.analog_BN_charge_interp_PL import Analog_BN as Analog_BN_charge_nonideal +from layers.quantized_layers_IMC import CIM_current_conv2D,CIM_charge_conv2D +from layers.quantized_layers_IMC import CIM_current_dense,CIM_charge_dense + +#from layers.quantized_layers_IMC_ABN import CIM_DenseABN from layers.quantized_ops import my_quantized_relu as quantize_op from models.ADC import quant_uni,Quant_train from models.makeModel import make_model @@ -30,7 +34,18 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): FLAG_QUANT = FLAGS[1]; IDEAL_ABN = FLAGS[2]; ABN_INC_ADC = FLAGS[3]; + + # Select the correct DP layer model based on the CIM type + if(sramInfo.cim_type == 'current'): + CIM_Conv2D = CIM_current_conv2D; + CIM_Dense = CIM_current_dense; + elif(sramInfo.cim_type == 'charge'): + CIM_Conv2D = CIM_charge_conv2D; + CIM_Dense = CIM_charge_dense; + else: + raise NameError('Selected CIM type not supported !') + print('###################################################') print('########### BUILDING CIM-SRAM NETWORK #############') print('###################################################') @@ -55,32 +70,34 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): Dens_ = lambda n,i,c: Dense(n,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) elif cf.network_type=='qnn': - Conv_ = lambda s,f,i,c,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', + Conv_ = lambda s,f,i,c,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) - Conv = lambda s,f,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', + Conv = lambda s,f,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) if(FLAG_QUANT): - # Act = lambda: LeakyReLU() Act = lambda: Activation(lambda x: quant_relu(x,IAres=IAres)) else: - # Act = lambda: Activation(lambda x: binary_sigmoid_abn(x,sramInfo.VDD.data)) - Act = lambda: Quant_train(sramInfo) - # Act = lambda: Activation(lambda x: quant_uni(x,maxVal=0,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0,archType=sramInfo.arch.name)); - + if(sramInfo.cim_type == 'current'): + Act = lambda: Quant_train(sramInfo,ABN_INC_ADC) + elif(sramInfo.cim_type == 'charge'): + Act = lambda: Activation('linear'); + else: + raise NameError('Selected CIM type not supported !'); + Quant = lambda p: Activation('linear'); Dens_FP = lambda n: Dense(n,use_bias=False) - Dens = lambda n,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + Dens = lambda n,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) - Dens_ = lambda n,i,c,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + Dens_ = lambda n,i,c,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) elif cf.network_type=='full-qnn': - Conv_ = lambda s,f,i,c,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', + Conv_ = lambda s,f,i,c,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) - Conv = lambda s,f,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', + Conv = lambda s,f,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) Conv_FP_ = lambda s, f, i, c: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', @@ -89,40 +106,46 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): if(FLAG_QUANT): Act = lambda: Activation(lambda x: quant_relu(x,IAres=IAres)) else: - # Act = lambda: Activation(lambda x: binary_sigmoid_abn(x,sramInfo.VDD.data)) - Act = lambda: Quant_train(sramInfo,ABN_INC_ADC) + if(sramInfo.cim_type == 'current'): + Act = lambda: Quant_train(sramInfo,ABN_INC_ADC) + elif(sramInfo.cim_type == 'charge'): + Act = lambda: Activation('linear'); + else: + raise NameError('Selected CIM type not supported !'); # Act = lambda: Activation(lambda x: quant_uni(x,maxVal=0,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0,archType=sramInfo.arch.name)); - # Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.5*dynRange/n,archType=sramInfo.arch.name)) Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.,archType=sramInfo.arch.name)) Dens_FP = lambda n: Dense(n,use_bias=False) - Dens = lambda n,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + Dens = lambda n,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) - Dens_ = lambda n,i,c,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + Dens_ = lambda n,i,c,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) # elif cf.network_type=='full-qnn-embedded': -# Conv_ = lambda s,f,i,c,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', +# Conv_ = lambda s,f,i,c,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', # activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), # kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) -# Conv = lambda s,f,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', +# Conv = lambda s,f,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', # activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), # kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) # if(FLAG_QUANT): # Act = lambda: Activation(lambda x: quant_relu(x,IAres=IAres)) # else: -# # Act = lambda: Activation(lambda x: binary_sigmoid_abn(x,sramInfo.VDD.data)) -# Act = lambda: Quant_train(sramInfo) -# # Act = lambda: Activation(lambda x: quant_uni(x,maxVal=0,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0,archType=sramInfo.arch.name)); -# +# if(cim_type == 'current'): +# Act = lambda: Quant_train(sramInfo,ABN_INC_ADC) +# elif(cim_type == 'charge'): +# Act = lambda: Activation('linear'); +# else: +# raise NameError('Selected CIM type not supported !'); +# # # Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.5*dynRange/n,archType=sramInfo.arch.name)) # Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.,archType=sramInfo.arch.name)) # # Dens_FP = lambda n: Dense(n,use_bias=False) # -# Dens = lambda n,m: QuantizedDenseABN(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,m_sigma=4) +# Dens = lambda n,m: CIM_DenseABN(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,m_sigma=4) # -# Dens_ = lambda n,i,c,m: QuantizedDenseABN(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,m_sigma=4) +# Dens_ = lambda n,i,c,m: CIM_DenseABN(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,m_sigma=4) else: print('wrong network type, the supported network types in this repo are float, qnn, full-qnn, bnn and full-bnn') @@ -133,10 +156,19 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): # BatchNorm = lambda n,m: Activation('linear'); # elif(IDEAL_ABN): if(IDEAL_ABN): - BatchNorm = lambda n,m: Analog_BN_ideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m); + if(sramInfo.cim_type == 'current'): + BatchNorm = lambda n,m: Analog_BN_current_ideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m); + elif(sramInfo.cim_type == 'charge'): + BatchNorm = lambda n,m: Analog_BN_charge_ideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m); + else: + raise NameError('Selected CIM type not supported !'); else: - BatchNorm = lambda n,m: Analog_BN_nonideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m,EN_NOISE=EN_NOISE); - + if(sramInfo.cim_type == 'current'): + BatchNorm = lambda n,m: Analog_BN_current_nonideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m,EN_NOISE=EN_NOISE); + elif(sramInfo.cim_type == 'charge'): + BatchNorm = lambda n,m: Analog_BN_charge_nonideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m,EN_NOISE=EN_NOISE); + else: + raise NameError('Selected CIM type not supported !'); BatchNorm_FP = lambda: BatchNormalization(momentum=0.1,epsilon=1e-5) model = make_model(model_type,cf,Conv_,Conv,Dens_,Dens,Act,Quant,BatchNorm,Dens_FP,BatchNorm_FP,Conv_FP_); diff --git a/train_cim_qnn.py b/train_cim_qnn.py index 871695488041a5c5e4284135a767c58dad5ee725..ef04446795241c711bb03dbe97e54d74522ab8b3 100644 --- a/train_cim_qnn.py +++ b/train_cim_qnn.py @@ -10,10 +10,9 @@ from models.model_IMC import build_model, load_weights from utils.config_utils import Config from utils.load_data import load_dataset -from utils.config_hardware_model import SramInfo +from utils.config_hardware_model import SramInfo_current, SramInfo_charge from config.config_cim_cnn_param import* - # // Override configuration // override = {} override_dir = {} @@ -27,6 +26,13 @@ override = override_dir # Create config object cf = Config(config_path,cmd_args = override) +# // SramInfo class selection +if(cim_type == 'current'): + SramInfo = SramInfo_current; +elif(cim_type == 'charge'): + SramInfo = SramInfo_charge; +else: + raise NameError('Selected CIM type not supported !'); ############################ INTERNAL FUNCTIONS ############################## @@ -93,16 +99,16 @@ def generate_model(data_files,cf,network_struct,sramInfo,FLAGS): ### Numpy input quantization ### def quant_input(x,IAres): # Quantize between 0 and 2^IAres - m = pow(2,IAres)-1; + print(x) + m = pow(2,IAres); y = m*(x+1)/2; - return np.around(y,decimals=0); + return K.clip(np.floor(y),0,m-1); ### Input pre-processing ### def process_input(dataset,IS_FL_MLP,precisions): # Get input resolution IAres = precisions[0]; # Get training and testing sets - print('loading data\n') train_data, test_data = load_dataset(dataset) # Reshape for first layer FC if(IS_FL_MLP): diff --git a/utils/config_hardware_model.py b/utils/config_hardware_model.py index f568d3bedcf5733de53d17f31ed4d3827abbdaf7..8eb93a42dc9b32b098eb4379e3c53eb50c693ebf 100644 --- a/utils/config_hardware_model.py +++ b/utils/config_hardware_model.py @@ -7,7 +7,7 @@ import tensorflow as tf from utils.linInterp import makeLookup2D, makeLookup3D -class SramInfo: +class SramInfo_current: def __init__(self,arch,tech,typeT,VDD,BBN,BBP,IAres,Wres,OAres,r_gamma,r_beta,Nrows,CONF_FLAGS): # // Retrieve config flags // IS_EMBEDDED = CONF_FLAGS[0]; @@ -34,6 +34,7 @@ class SramInfo: self.r_gamma = r_gamma # [bits] self.r_beta = r_beta # [bits] # SRAM architecture & techno information (required by ELDO) + self.cim_type = 'current'; self.arch = SpiceObj(name=arch) # String ('6T','8T',...) self.tech = SpiceObj(name=tech); # String ('ST65nmBulk',...) self.typeT = SpiceObj(name=typeT); # String ('RVT','LVT',...) @@ -355,6 +356,154 @@ class SramInfo: # // Resistive ladder information self.Nconf_beta_ladder = 5; + +class SramInfo_charge: + def __init__(self,arch,tech,typeT,VDD,BBN,BBP,IAres,Wres,OAres,r_gamma,r_beta,Nrows,CONF_FLAGS): + # // Retrieve config flags // + IS_EMBEDDED = CONF_FLAGS[0]; + ABN_INC_ADC = CONF_FLAGS[1]; + + # // Software options // + # Simulator + self.simulator = "eldo"; + # LUT files location + dir_LUT = './LUTs'; + # Noisy foward path in training + self.noise_at_inf = True; + # T_DP fixed by hardware + self.is_const_T_DP = SpiceObj(name="is_const_T_DP",data=True); + # Analog DP type: model or numerical + self.IS_NUM = True; + + # // Architecture information // + # IN/WEIGHT/OUT Resolution + self.IAres = IAres; # [bits] + self.Wres = Wres; # [bits] + self.OAres = OAres; # [bits] + # ABN resolution + self.r_gamma = r_gamma # [bits] + self.r_beta = r_beta # [bits] + # SRAM architecture & techno information (required by ELDO) + self.cim_type = 'charge'; + self.arch = SpiceObj(name=arch) # String ('6T','8T',...) + self.tech = SpiceObj(name=tech); # String ('ST65nmBulk',...) + self.typeT = SpiceObj(name=typeT); # String ('RVT','LVT',...) + # Input conversion type & mode + self.inputType = SpiceObj(name="InnerSerial"); # String ('DAC','PWM',...) + self.input_mode = 'accurate'; # One of "accurate" or "sparse" + # Size information + self.Nrows = SpiceObj('Nrows',Nrows); + self.C_unit = 36; + # Supply information + self.VDD = SpiceObj('VDD_VAL',VDD); # [V] + self.GND = SpiceObj('GND_VAL',0); + self.BBN = SpiceObj('BBN_VAL',BBN); + self.BBP = SpiceObj('BBP_VAL',BBP); + + # // Analog DP // + # --- Hardware data --- + # Timing information + self.T_DP_vec = np.arange(4); self.T_DP_vec = self.T_DP_vec.astype("float32"); + self.T_rise = SpiceObj('T_rise',25e-12) # [s] + self.T_fall = SpiceObj('T_fall',25e-12) # [s] + self.T_start = SpiceObj('T_start',0.1e-9) # [s] + # DR & activities + self.DR = SpiceObj('DR',7/8*self.VDD.data) # [V] + # Computation and parasitic caps + self.Cc = 0.697e-15; + self.C_array = 32.5e-15; # to be checked + # --- DP post-layout LUT --- + C_in_conf = 5; + # - Nominal result - + DP_LUT_temp = []; + for i in range(C_in_conf): +# DP_vec = np.arange(-self.C_unit*np.pow(2,C_in_conf),self.C_unit*np.pow(2,C_in_conf)+1,np.pow(2,C_in_conf)); +# # Path to DP transfer function from SPICE (update when necessary) +# path_dir = dir_LUT+'/DP/TF_DP_10TC_1152cells_LUT_PL.txt'; +# #path_dir = dir_LUT+'<insert_DP_TF_filename>.txt'; +# data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); +# temp_LUT = data_temp[::,:len(T_DP_vec)]+(data_temp[::,-1]-data_temp[::,-2]); +# temp_LUT = temp_LUT.astype("float32"); +# # Get LUT +# DP_LUT_temp.append(makeLookup2D(temp_LUT,DP_vec,self.T_DP_vec)); + DP_LUT_temp.append(None); + self.DP_LUT = DP_LUT_temp; + # - Deviation per DP result - + for i in range(C_in_conf): +# DP_vec = np.arange(-self.C_unit*np.pow(2,C_in_conf),self.C_unit*np.pow(2,C_in_conf)+1,np.pow(2,C_in_conf)); +# # Path to DP transfer function from SPICE (update when necessary) +# path_dir = dir_LUT + '/DP/TF_DP_10TC_1152cells_LUT_PL_dev.txt'; +# #path_dir = dir_LUT+'/DP/<insert_DP_dev_filename>.txt' +# data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); +# temp_LUT = data_temp[::,:len(T_DP_vec)]; +# temp_LUT = temp_LUT.astype("float32"); +# # Get LUT +# DP_LUT_temp.append(makeLookup2D(temp_LUT,DP_vec,self.T_DP_vec)); + DP_LUT_temp.append(None); + self.sig_DP_LUT = DP_LUT_temp; + + # // MBIT information (seqIn-spatW processing) // + # --- Hardware data --- + self.C_mbit = 14.5e-15; + self.C_accdp = 40e-15; + # --- MBIT input post-layout LUT --- + MBIT_in_vec = np.linspace(0,self.VDD.data,400); + # Retrieve data +# path_dir = dir_LUT+'/MBIT/TF_MBIT_IN.txt'; +# #path_dir = dir_LUT+'<path_to_DTSE_filename>.txt'; +# data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); +# data_temp = np.reshape(data_temp,(len(MBIT_in_vec),len(MBIT_in_vec))); +# # Get LUT +# self.MBIT_IN_LUT = makeLookup2D(temp_LUT,MBIT_in_vec,MBIT_in_vec); + self.MBIT_IN_LUT = None; + # --- MBIT weights post-layout LUT --- +# MBIT_w_vec = np.linspace(0,self.VDD.data,400); +# # Retrieve data +# path_dir = dir_LUT+'/MBIT/TF_MBIT_W.txt'; +# #path_dir = dir_LUT+'<path_to_DTSE_filename>.txt'; +# data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); +# data_temp = np.reshape(data_temp,(len(MBIT_w_vec),len(MBIT_w_vec))); +# # Get LUT +# self.MBIT_W_LUT = makeLookup2D(temp_LUT,MBIT_w_vec,MBIT_w_vec); + self.MBIT_W_LUT = None; + + # // ABN information // + # --- Hardware data --- + # Supply voltage + self.Vmax_beta = 0.02; + # Timing + self.T_ABN = SpiceObj(name='T_ADC',data=5e-9); + # Load capacitance + self.C_adc = 25.4e-15; + +# path_dir = dir_LUT+'/ABN/mean_TF_ABN_PL_int.txt'; +# #path_dir = dir_LUT+'<path_to_ABN_TF_filename>.txt'; +# # Get & reshape data +# data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); +# temp_LUT = np.reshape(data_temp,(2**r_gamma,Nabn,8)); +# if(ABN_INC_ADC): +# temp_LUT = temp_LUT[...,4:8]; +# else: +# raise NameError('ABN with zoom-ADC must include ADC !'); +# temp_LUT = temp_LUT.astype("float32"); temp_LUT = np.flip(temp_LUT,axis=-1); +# # Make 2D lookup of linear interpolations +# self.ABN_LUT = makeLookup2D(temp_LUT,np.arange(0,2**r_gamma),np.linspace(0,self.VDD,Nabn)); +# # ABN mismatch +# path_dir = dir_LUT+'/ABN/std_TF_ABN_PL_int.txt'; +# #path_dir = dir_LUT+'<path_to_ABN_dev_filename>.txt'; +# data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); +# temp_LUT = np.reshape(data_temp,(2**r_gamma,Nabn,8)); +# if(ABN_INC_ADC): +# temp_LUT = temp_LUT[...,4:8]; +# else: +# raise NameError('ABN with zoom-ADC must include ADC !'); +# temp_LUT = temp_LUT.astype("float32"); temp_LUT = np.flip(temp_LUT,axis=-1); +# # Make 2D lookup of linear interpolations +# self.sig_ABN_LUT = makeLookup2D(temp_LUT,np.arange(0,2**r_gamma),np.linspace(0,self.VDD,Nabn)); + + self.ABN_LUT = None; + self.sig_ABN_LUT = None; + class SpiceObj: def __init__(self,name=None,data=None): @@ -481,12 +630,22 @@ def genHardware(sramInfo): n_body = 1.15; # Total BL capitalize (not really techno-relevant, depends on Nrows --> should be cap/cell) C_int = 10e-15/256; - C_par = sramInfo.C_LBL.data; + if(sramInfo.cim_type == 'current'): + C_par = sramInfo.C_LBL.data; + elif(sramInfo.cim_type == 'charge'): + C_par = sramInfo.C_array; + else: + raise NameError('Selected CIM type not supported !'); C_tot = Nrows*(C_int+C_par); # Precharge & Non-linear mapping - V_WL0 = sramInfo.act_WL.data*sramInfo.VDD.data; - V_BL0 = sramInfo.act_BL.data*sramInfo.VDD_BL.data; - V_Q0 = sramInfo.V_Q0.data; + if(sramInfo.cim_type == 'current'): + V_WL0 = sramInfo.act_WL.data*sramInfo.VDD.data; + V_BL0 = sramInfo.act_BL.data*sramInfo.VDD_BL.data; + V_Q0 = sramInfo.V_Q0.data; + else: + V_WL0 = None; + V_BL0 = None; + V_Q0 = None; else: raise NameError('Selected flavor not supported !\n') else: