diff --git a/.gitignore b/.gitignore index 7ed5a41b372e12797af9e5489f615ae77c730c51..6573e3881aa7628fa2ee30efaa5bf9f2357ce1a9 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,9 @@ my_datasets/ # Saved files saved_models/ + +# Logs +logs/ + +# Python cached files +*/__pycache__/ diff --git a/config/config_cim_cnn_param.py b/config/config_cim_cnn_param.py index 2d4a663e6459d0d9c9401c53b6db0a862405dedc..adc432ef53372f72288271a913ebf20c8b1a8b06 100644 --- a/config/config_cim_cnn_param.py +++ b/config/config_cim_cnn_param.py @@ -3,7 +3,7 @@ ### Dataset & Neural net information ### ######################################## # // Dataset // -config_path = "config_qnn_cluster" +config_path = "config_cim_cnn_param" dataset_name = "MNIST"; dim=28 channels=1 @@ -11,7 +11,7 @@ classes=10 # // Network structure // network_type = "full-qnn"; # network_struct = "1C1D" -network_struct = "MLP_128_64_10" +network_struct = "MLP_three_stage_abn" OP_TYPE = "FC"; C_IN_VEC = [1024,128]; C_OUT_VEC = [128,64]; @@ -78,8 +78,12 @@ EN_NOISE = 0; ANALOG_BN = 1; # Embedded ABN IS_EMBEDDED = 0; +# Ideal or effective ABN HW model +IDEAL_ABN = 1; # ABN model includes ADC behaviour ABN_INC_ADC = 1; +# Use post-layout model instead of pre-layour versions +FLAG_PL = 1; # Enable saving SAVE_EN = 1; # Is first layer FC (depends on network_struct) diff --git a/layers/analog_BN.py b/layers/analog_BN.py new file mode 100644 index 0000000000000000000000000000000000000000..1ea47feacd3adad14609a78d6ceed82d50e2ca36 --- /dev/null +++ b/layers/analog_BN.py @@ -0,0 +1,354 @@ +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// +# /////////////////////////// Custom batchnorm implementing actual hardware ABN ////////////////////////////// +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// + +# Inspired from https://stackoverflow.com/questions/54101593/conditional-batch-normalization-in-keras + +import numpy as np +import math + +import tensorflow as tf +import keras.backend as K + +from keras import regularizers, initializers, constraints +#from keras.legacy import interfaces +from keras.layers import Layer, Input, InputSpec +from keras.models import Model + +class Analog_BN(Layer): + """ Analog batchnorm layer + """ + # /// Init layer /// +# @interfaces.legacy_batchnorm_support + def __init__(self, + axis=-1, + momentum=0.99, + epsilon=1e-5, + center=True, + scale=True, + renorm = True, + beta_initializer='zeros', + gamma_initializer='ones', + moving_mean_initializer='zeros', + moving_variance_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + activity_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + hardware = None, + NB = None, + **kwargs): + + super(Analog_BN, self).__init__(**kwargs) + self.axis = axis + self.momentum = momentum + self.epsilon = epsilon + self.center = center + self.scale = scale + self.renorm = renorm + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.moving_mean_initializer = initializers.get(moving_mean_initializer) + self.moving_variance_initializer = (initializers.get(moving_variance_initializer)) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + self.DRlim = (hardware.sramInfo.GND.data,hardware.sramInfo.VDD.data); + self.gamma_range = 4*math.sqrt(NB) + self.ABNstates = (2**hardware.sramInfo.r_gamma,2**hardware.sramInfo.r_beta) + self.IS_DIFF = (hardware.sramInfo.arch.name == '6T'); # Update with other arch types + + # /// Build layer /// + def build(self,input_shape): + dim = input_shape[self.axis]; + + if dim is None: + raise ValueError('Axis ' + str(self.axis) + ' of ' + 'input tensor should have a defined dimension ' + 'but the layer received an input with shape ' + + str(input_shape) + '.') + shape = (dim,) + + if self.scale: + # gamma_constraint = Clip(0.0,4.0) + + self.gamma = self.add_weight(shape = shape, + name = 'gamma', + initializer = self.gamma_initializer, + regularizer = self.gamma_regularizer, + constraint = self.gamma_constraint) + else: + self.gamma = None + + if self.center: + # beta_constraint = Clip(-100.0,100.0); + + self.beta = self.add_weight(shape = shape, + name = 'beta', + initializer = self.beta_initializer, + regularizer = self.beta_regularizer, + constraint = self.beta_constraint) + else: + self.beta = None + + if self.renorm: + self.moving_mean_DP = self.add_weight( + shape=shape, + name='moving_mean_DP', + initializer=self.moving_mean_initializer, + trainable=False) + self.moving_variance_DP = self.add_weight( + shape=shape, + name='moving_variance_DP', + initializer=self.moving_variance_initializer, + trainable=False) + else: + self.moving_mean_DP = K.variable(0.0) + self.moving_variance_DP = K.variable(1.0) + + super(Analog_BN, self).build(input_shape) + + # /// Call layer (train or inference) /// + def call(self,inputs,training=None): + + input_shape = K.int_shape(inputs[0]) + + # Prepare broadcasting shape. + ndim = len(input_shape) + reduction_axes = list(range(len(input_shape))) + del reduction_axes[self.axis] + broadcast_shape = [1] * len(input_shape) + broadcast_shape[self.axis] = input_shape[self.axis] + + # Determines whether broadcasting is needed. + needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) + + def normalize_inference(): + # Explicitely broadcast parameters when required. + if needs_broadcasting: + # Norm params + if self.renorm: + broadcast_moving_mean_DP = K.reshape(self.moving_mean_DP, + broadcast_shape); + broadcast_moving_variance_DP = K.reshape(self.moving_variance_DP, + broadcast_shape); + else: + broadcast_moving_mean_DP = None; + broadcast_moving_variance_DP = None; + # Scale param + if self.scale: + broadcast_gamma = K.reshape(self.gamma,broadcast_shape); + else: + broadcast_gamma = None + # Offset param + if self.center: + broadcast_beta = K.reshape(self.beta,broadcast_shape); + else: + broadcast_beta = None + # Return batchnorm + return ABN( + inputs, + broadcast_moving_mean_DP, + broadcast_moving_variance_DP, + broadcast_beta, + broadcast_gamma, + axis = self.axis, + epsilon = self.epsilon, + DR_tuple = self.DRlim, + gamma_range = self.gamma_range, + ABNstates = self.ABNstates, + IS_DIFF = self.IS_DIFF, + training=training) + else: + return ABN( + inputs, + self.moving_mean_DP, + self.moving_variance_DP, + self.beta, + self.gamma, + axis = self.axis, + epsilon = self.epsilon, + DR_tuple = self.DRlim, + gamma_range = self.gamma_range, + ABNstates = self.ABNstates, + IS_DIFF = self.IS_DIFF, + training=training) + + # If the learning phase is *static* and set to inference: + if training in {0, False}: + return normalize_inference() + + + # If the learning is either dynamic, or set to training: + (normed_training,mean_DP,variance_DP) = \ + norm_ABN_in_train( + inputs, self.beta, self.gamma, self.renorm, reduction_axes, + epsilon=self.epsilon,DR_tuple=self.DRlim,gamma_range=self.gamma_range,ABNstates=self.ABNstates,IS_DIFF=self.IS_DIFF,training=training) + # ??? + if K.backend() != 'cntk': + sample_size = K.prod([K.shape(inputs[0])[axis] + for axis in reduction_axes]) + sample_size = K.cast(sample_size, dtype=K.dtype(inputs[0])) + if K.backend() == 'tensorflow' and sample_size.dtype != 'float32': + sample_size = K.cast(sample_size, dtype='float32') + + # sample variance - unbiased estimator of population variance + variance_DP *= sample_size / (sample_size - (1.0 + self.epsilon)) + + # Update moving mean and variance during training + self.add_update([K.moving_average_update(self.moving_mean_DP, + mean_DP, + self.momentum), + K.moving_average_update(self.moving_variance_DP, + variance_DP, + self.momentum)]) + + # Pick ABN result for either training or inference + return K.in_train_phase(normed_training, + normalize_inference, + training=training) + + + def get_config(self): + config = { + 'axis': self.axis, + 'momentum': self.momentum, + 'epsilon': self.epsilon, + 'center': self.center, + 'scale': self.scale, + 'renorm': self.renorm, + 'beta_initializer': initializers.serialize(self.beta_initializer), + 'gamma_initializer': initializers.serialize(self.gamma_initializer), + 'moving_mean_initializer': + initializers.serialize(self.moving_mean_initializer), + 'moving_variance_initializer': + initializers.serialize(self.moving_variance_initializer), + 'beta_regularizer': regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': constraints.serialize(self.beta_constraint), + 'gamma_constraint': constraints.serialize(self.gamma_constraint), + 'DRlim': self.DRlim, + 'IS_DIFF': self.IS_DIFF + } + base_config = super(Analog_BN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + + return input_shape[1] +############################################## Internal functions ################################################## + +# Perform ABN +def ABN(x_in,mov_mean_DP=0.0,mov_variance_DP=1.0,beta=0.0,gamma=0.0,axis=-1,epsilon=1e-5,DR_tuple=None,gamma_range=None,ABNstates=None,IS_DIFF=True,training=False): + # Retrieve differential or se output + if(IS_DIFF): + V_BL = x_in[0]; + V_BLB = x_in[1]; + else: + V_BL = x_in; + # tf.print("V_RBL",V_BL[0],summarize=10) + + # Get min and max DR limits + minDR = DR_tuple[0]; + maxDR = DR_tuple[1]; + + # Set 'None' parameters to their initial values + if gamma is None: + gamma = K.constant(1.0); + if beta is None: + beta = K.constant(0.0); + if mov_mean_DP is None: + mov_mean_DP = K.constant(DR_tuple[1]); + if mov_variance_DP is None: + mov_variance_DP = K.constant(1.0); + + # Specify non-centernormalized correction factors + mu_goal = maxDR/2; + sigma_goal = maxDR; var_goal = sigma_goal*sigma_goal; + + # Compute differential or single-ended DP with switched-cap unit + if(IS_DIFF): + V_DP = maxDR/2 + (V_BL-V_BLB)/2 + else: + V_DP = V_BL; + # Get custom renorm factors + sigma_DP = K.sqrt(mov_variance_DP); + mov_mean_DP_t = mov_mean_DP - mu_goal/sigma_goal*sigma_DP; + mov_variance_DP_t = mov_variance_DP/var_goal; + # Get equivalent coefficients + sigma_DP_t = K.sqrt(mov_variance_DP_t); + gamma_eq = gamma/(sigma_DP_t + epsilon); + beta_eq = beta - gamma*mov_mean_DP_t/(sigma_DP_t + epsilon); + beta_eq_norm = beta_eq/gamma_eq + maxDR/2; + # Quantize gamma and beta + Ns_gamma = ABNstates[0]; + Ns_beta = ABNstates[1]; + gamma_eq = K.clip(floor_through(gamma_eq),0,Ns_gamma-1); + # beta_eq_norm = K.clip(floor_through(beta_eq_norm/(2*maxDR/5)*256)*(maxDR)/256,-maxDR/2,maxDR/2) - maxDR/2; + beta_eq_norm = beta_eq_norm - maxDR/2 + # Apply (ideal, for now) equivalent coefficient to get ABN result. + V_ABN = gamma_eq*(V_DP+beta_eq_norm); + # Return (unclipped) result + return V_ABN; + +# Compute mean and variance of the batch then perform ABN with it, when enabled +def norm_ABN_in_train(x_tuple,beta=0.0,gamma=1.0,renorm=True,axis=-1,epsilon=1e-5,DR_tuple=None,gamma_range=None,ABNstates=None,IS_DIFF=True,training=False): + # Retrieve differential tensors + V_BL = x_tuple[0]; + V_BLB = x_tuple[1]; + # Retrieve max DR (VDD by default) + maxDR = DR_tuple[1]; + # Compute mean and variance of each batch when desired + if(renorm): + # Compute differential or single-ended DP with switched-cap unit + if(IS_DIFF): + V_DP = maxDR/2 + (V_BL-V_BLB)/2 + else: + V_DP = V_BL; + # Get mean and variance + mean_DP = K.mean(V_DP,axis=0); + variance_DP = K.var(V_DP,axis=0); + else: + mean_DP = K.constant(0.0); + variance_DP = K.constant(1.0); + # Compute ABN with specified mean and variance + V_DP_BN = ABN(x_tuple,mean_DP,variance_DP,beta,gamma,axis,epsilon,DR_tuple,gamma_range,ABNstates,IS_DIFF,training); + # Return a tuple of BN_result, mean and variance + return (V_DP_BN,mean_DP,variance_DP); + +# Gamma & Beta constaints + +class Clip(constraints.Constraint): + def __init__(self, min_value, max_value=None): + self.min_value = min_value + self.max_value = max_value + if not self.max_value: + self.max_value = -self.min_value + if self.min_value > self.max_value: + self.min_value, self.max_value = self.max_value, self.min_value + + def __call__(self, p): + return K.clip(p, self.min_value, self.max_value) + + def get_config(self): + return {"name": self.__call__.__name__, + "min_value": self.min_value, + "max_value": self.max_value} + + +# Truncated normal phi function +def phi_exp(x): + return 1/math.sqrt(2*math.pi)*K.exp(-0.5*(x*x)); +def phi_erf(x): + return 0.5*(1+tf.math.erf(x/math.sqrt(2))); + +def floor_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + floored = tf.math.floor(x); + floored_through = x + K.stop_gradient(floored - x); + return floored_through; diff --git a/layers/analog_BN_current_interp_PL.py b/layers/analog_BN_current_interp_PL.py new file mode 100644 index 0000000000000000000000000000000000000000..a11607dfc8c95788ebf8d8f1cb8a3edd713effcc --- /dev/null +++ b/layers/analog_BN_current_interp_PL.py @@ -0,0 +1,450 @@ +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// +# /////////////////////////// Custom batchnorm implementing actual hardware ABN ////////////////////////////// +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// + +# Inspired from https://stackoverflow.com/questions/54101593/conditional-batch-normalization-in-keras + +import numpy as np +import math + +import tensorflow as tf +import keras.backend as K + +from keras import regularizers, initializers, constraints +#from keras.legacy import interfaces +from keras.layers import Layer, Input, InputSpec +from keras.models import Model + +# Temporary folder +import tempfile +import sys +import subprocess +import time + +# Current ABN model +from models.ABN_current import makeLookupABN, doInterpABN +from models.ABN_current import round_through, floor_through + + +class Analog_BN(Layer): + """ Analog batchnorm layer + """ + # /// Init layer /// +# @interfaces.legacy_batchnorm_support + def __init__(self, + axis=-1, + momentum=0.99, + epsilon=1e-5, + center=True, + scale=True, + renorm = True, + beta_initializer='zeros', + gamma_initializer=tf.keras.initializers.Constant(value=3), + moving_mean_initializer='zeros', + moving_variance_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + activity_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + hardware = None, + NB = None, + m_sigma = 1, + Npoints = 401, + EN_NOISE = 0, + **kwargs): + + super(Analog_BN, self).__init__(**kwargs) + self.axis = axis + self.momentum = momentum + self.epsilon = epsilon + self.center = center + self.scale = scale + self.renorm = renorm + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.moving_mean_initializer = initializers.get(moving_mean_initializer) + self.moving_variance_initializer = (initializers.get(moving_variance_initializer)) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + self.hardware = hardware; + self.DRlim = (hardware.sramInfo.GND.data,hardware.sramInfo.VDD.data); + self.gamma_range = 4*math.sqrt(NB) + self.ABNstates = (2**hardware.sramInfo.r_gamma,2**hardware.sramInfo.r_beta) + self.IS_DIFF = (hardware.sramInfo.arch.name == '6T'); # Update with other arch types + self.EN_NOISE = EN_NOISE; + self.m_sigma = m_sigma; + # -- Interpolation info -- + self.Npoints = Npoints; + self.ABN_lookup = None; + self.sig_ABN_lookup = None; + self.V_DP_half_LUT = None; + self.devGainLUT = None; + + # /// Build layer /// + def build(self,input_shape): + dim = input_shape[self.axis]; + + if dim is None: + raise ValueError('Axis ' + str(self.axis) + ' of ' + 'input tensor should have a defined dimension ' + 'but the layer received an input with shape ' + + str(input_shape) + '.') + shape = (dim,) + + if self.scale: + # gamma_constraint = Clip(0.0,4.0) + + self.gamma = self.add_weight(shape = (1,), + name = 'gamma', + initializer = self.gamma_initializer, + regularizer = self.gamma_regularizer, + constraint = self.gamma_constraint) + else: + self.gamma = None + + if self.center: + # beta_constraint = Clip(-100.0,100.0); + + self.beta = self.add_weight(shape = shape, + name = 'beta', + initializer = self.beta_initializer, + regularizer = self.beta_regularizer, + constraint = self.beta_constraint) + + else: + self.beta = None + + if self.renorm: + self.moving_mean_DP = self.add_weight( + shape=shape, + name='moving_mean_DP', + initializer=self.moving_mean_initializer, + trainable=False) + self.moving_variance_DP = self.add_weight( + shape=shape, + name='moving_variance_DP', + initializer=self.moving_variance_initializer, + trainable=False) + else: + self.moving_mean_DP = K.variable(0.0) + self.moving_variance_DP = K.variable(1.0) + + self.m_sigma = self.add_weight(shape = (1,), + name = 'm_sigma', + initializer = initializers.get(tf.keras.initializers.Constant(value=self.m_sigma)), + trainable=False) + + # Spice-extracted lookup table between V_ABN, V_DP and T_ABN (hardcoded hardware has to match CIM params) + print('Retrieving actual current-mode ABN response...') + self.ABN_lookup = self.hardware.sramInfo.ABN_LUT; + self.sig_ABN_lookup = self.hardware.sramInfo.sig_ABN_LUT; + self.V_DP_half_LUT = self.hardware.sramInfo.V_DP_half_LUT; + self.devGainLUT = self.hardware.sramInfo.gainABN_LUT; + print('Done !') + + super(Analog_BN, self).build(input_shape) + + # /// Call layer (train or inference) /// + def call(self,inputs,training=None): + + input_shape = K.int_shape(inputs); + # print(input_shape) + + # Prepare broadcasting shape. + ndim = len(input_shape) + reduction_axes = list(range(len(input_shape))) + del reduction_axes[self.axis] + broadcast_shape = [1] * len(input_shape) + broadcast_shape[self.axis] = input_shape[self.axis] + + # Determines whether broadcasting is needed. + needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) + + def normalize_inference(): + # Explicitely broadcast parameters when required. + if needs_broadcasting: + # Norm params + if self.renorm: + broadcast_moving_mean_DP = K.reshape(self.moving_mean_DP, + broadcast_shape); + broadcast_moving_variance_DP = K.reshape(self.moving_variance_DP, + broadcast_shape); + else: + broadcast_moving_mean_DP = None; + broadcast_moving_variance_DP = None; + # Scale param + if self.scale: + broadcast_gamma = K.reshape(self.gamma,broadcast_shape); + else: + broadcast_gamma = None + # Offset param + if self.center: + broadcast_beta = K.reshape(self.beta,broadcast_shape); + else: + broadcast_beta = None + + # Return batchnorm + return ABN( + inputs, + self.ABN_lookup, + self.sig_ABN_lookup, + self.V_DP_half_LUT, + self.devGainLUT, + broadcast_moving_mean_DP, + broadcast_moving_variance_DP, + broadcast_beta, + broadcast_gamma, + axis = self.axis, + epsilon = self.epsilon, + m_sigma = self.m_sigma, + hardware = self.hardware, + DR_tuple = self.DRlim, + gamma_range = self.gamma_range, + ABNstates = self.ABNstates, + Npoints = self.Npoints, + IS_DIFF = self.IS_DIFF, + EN_NOISE=self.EN_NOISE, + training=training) + else: + return ABN( + inputs, + self.ABN_lookup, + self.sig_ABN_lookup, + self.V_DP_half_LUT, + self.devGainLUT, + self.moving_mean_DP, + self.moving_variance_DP, + self.beta, + self.gamma, + axis = self.axis, + epsilon = self.epsilon, + m_sigma = self.m_sigma, + hardware = self.hardware, + DR_tuple = self.DRlim, + gamma_range = self.gamma_range, + ABNstates = self.ABNstates, + Npoints = self.Npoints, + IS_DIFF = self.IS_DIFF, + EN_NOISE=self.EN_NOISE, + training=training) + + # If the learning phase is *static* and set to inference: + if training in {0, False}: + return normalize_inference() + + + # If the learning is either dynamic, or set to training: + (normed_training,mean_DP,variance_DP) = \ + norm_ABN_in_train( + inputs,self.ABN_lookup,self.sig_ABN_lookup,self.V_DP_half_LUT, self.devGainLUT, self.beta, self.gamma, self.renorm, reduction_axes, + epsilon=self.epsilon,m_sigma=self.m_sigma,hardware=self.hardware,DR_tuple=self.DRlim, + gamma_range=self.gamma_range,ABNstates=self.ABNstates,Npoints=self.Npoints,IS_DIFF=self.IS_DIFF,EN_NOISE=self.EN_NOISE,training=training) + # ??? + if K.backend() != 'cntk': + sample_size = K.prod([K.shape(inputs)[axis] + for axis in reduction_axes]) + sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) + if K.backend() == 'tensorflow' and sample_size.dtype != 'float32': + sample_size = K.cast(sample_size, dtype='float32') + + # sample variance - unbiased estimator of population variance + variance_DP *= sample_size / (sample_size - (1.0 + self.epsilon)) + + # Update moving mean and variance during training + self.add_update([K.moving_average_update(self.moving_mean_DP, + mean_DP, + self.momentum), + K.moving_average_update(self.moving_variance_DP, + variance_DP, + self.momentum)]) + + # Pick ABN result for either training or inference + return K.in_train_phase(normed_training, + normalize_inference, + training=training) + + + def get_config(self): + config = { + 'axis': self.axis, + 'momentum': self.momentum, + 'epsilon': self.epsilon, + 'center': self.center, + 'scale': self.scale, + 'renorm': self.renorm, + 'beta_initializer': initializers.serialize(self.beta_initializer), + 'gamma_initializer': initializers.serialize(self.gamma_initializer), + 'moving_mean_initializer': + initializers.serialize(self.moving_mean_initializer), + 'moving_variance_initializer': + initializers.serialize(self.moving_variance_initializer), + 'beta_regularizer': regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': constraints.serialize(self.beta_constraint), + 'gamma_constraint': constraints.serialize(self.gamma_constraint), + 'DRlim': self.DRlim, + 'IS_DIFF': self.IS_DIFF + } + base_config = super(Analog_BN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + + return input_shape[1] +############################################## Internal functions ################################################## + +# Perform ABN +def ABN(V_DP,ABN_lookup,sig_ABN_lookup,V_DP_half_LUT,devGainLUT,mov_mean_DP=0.0,mov_variance_DP=1.0, beta=0.0,gamma=0.0,axis=-1,epsilon=1e-5,m_sigma=1,hardware=None,DR_tuple=None,gamma_range=None,ABNstates=None,Npoints=401,IS_DIFF=True,EN_NOISE=0,training=False): + + # Get min and max DR limits + minDR = DR_tuple[0]; + maxDR = DR_tuple[1]; + # Get resolution + Ns_gamma = ABNstates[0]; + Ns_beta = ABNstates[1]; + # Get ABN input supply voltage (i.e. DTSE voltage range) + VDD = hardware.sramInfo.VDD_BL.data; + # Get global beta range + Nconf_beta_ladder = hardware.sramInfo.Nconf_beta_ladder; + Vmax_beta_g = hardware.sramInfo.Vmax_beta_g; + Vmin_beta_g = hardware.sramInfo.Vmin_beta_g; + Vlsb_global = Vmax_beta_g/Nconf_beta_ladder; + # Get local beta range + Vmax_beta_l = hardware.sramInfo.Vmax_beta_l; + Vmin_beta_l = hardware.sramInfo.Vmin_beta_l; + Vlsb_local = Vmax_beta_l/Ns_beta; + + # Get weights resolution + R_W = hardware.sramInfo.Wres; + + # Set 'None' parameters to their initial values + if gamma is None: + gamma = K.constant(1.0); + if beta is None: + beta = K.constant(0.0); + if mov_mean_DP is None: + mov_mean_DP = K.constant(DR_tuple[1]); + if mov_variance_DP is None: + mov_variance_DP = K.constant(1.0); + + # // Specify non-centernormalized correction factors // + mu_goal = 0; + sigma_goal = maxDR/m_sigma; var_goal = sigma_goal*sigma_goal; + + # // Equivalent gain computation // + # Get custom renorm factors (single gain for all columns) + mov_variance_DP_t = K.mean(mov_variance_DP)/var_goal; + sigma_DP_t = K.sqrt(mov_variance_DP_t); + # Get equivalent coefficients + gamma_eq = gamma/(sigma_DP_t + epsilon); + # Add Bernouilli matrices to regularize gain training (not mandatory) +# bern_matrix = tf.random.uniform(shape=tf.shape(gamma_eq),maxval=1); +# bern_matrix = tf.math.greater(bern_matrix,0.2); bern_matrix = tf.cast(bern_matrix,dtype='float32'); +# gamma_eq = bern_matrix*round_through(gamma_eq)+(1-bern_matrix)*gamma_eq; + gamma_eq = round_through(gamma_eq); + gamma_eq = K.clip(gamma_eq,0,Ns_gamma-1); + # Get ABN configuration from gain mapping (ideal case: Tabn_conf = gamma_eq) + Tabn_conf_0 = K.cast(K.argmin(K.abs(gamma_eq-devGainLUT[::,0])),"float32"); + Tabn_conf_1 = K.cast(K.argmin(K.abs(gamma_eq-devGainLUT[::,1])),"float32"); + Tabn_conf_2 = K.cast(K.argmin(K.abs(gamma_eq-devGainLUT[::,2])),"float32"); + Tabn_conf_3 = K.cast(K.argmin(K.abs(gamma_eq-devGainLUT[::,3])),"float32"); + + # tf.print("T_ABN_conf_0",Tabn_conf_0); + + # // Equivalent offset computation // + # Offset from distribution re-centering to fit target gain + indABN = K.cast(Tabn_conf_0,"int32") + V_DP_center_0 = tf.gather(V_DP_half_LUT[::,0],indABN); + indABN = K.cast(Tabn_conf_1,"int32") + V_DP_center_1 = tf.gather(V_DP_half_LUT[::,1],indABN); + indABN = K.cast(Tabn_conf_2,"int32") + V_DP_center_2 = tf.gather(V_DP_half_LUT[::,2],indABN); + indABN = K.cast(Tabn_conf_3,"int32") + V_DP_center_3 = tf.gather(V_DP_half_LUT[::,3],indABN); + # Reshape into the right order + V_DP_center = tf.stack([V_DP_center_0,V_DP_center_1,V_DP_center_2,V_DP_center_3],axis=0) + tenShape = K.int_shape(V_DP); + V_DP_center = tf.reshape(V_DP_center,[-1]); + V_DP_center = tf.tile(V_DP_center,[tenShape[-1]//4]); + + # Total full-precision offset + mean_tot = mov_mean_DP - V_DP_center; + dVt = beta/gamma_eq - mean_tot; + + # Get quantized global offset part + mean_dVt = K.mean(dVt); + dVt_glob = floor_through(K.clip(mean_dVt-Vmin_beta_g,0,Vmax_beta_g)/Vlsb_global)*Vlsb_global+Vmin_beta_g; + # Get quantized local offset + dVt_loc = dVt-mean_dVt; + dVt_loc = floor_through(K.clip(dVt_loc-Vmin_beta_l,0,Vmax_beta_l)/Vlsb_local)*Vlsb_local+Vmin_beta_l; + # Get actual quantized offset on the DP output + dVt = dVt_glob + dVt_loc; + + # // Get ABN distribution from LUTs based on the gain/offset mapping // + if(R_W == 4): + V_ABN = doInterpABN(ABN_lookup[::,::,::,0],Tabn_conf_0,V_DP[...,::]+dVt[::],ABNstates[0],ABNstates[0],VDD,Npoints); + if(EN_NOISE): + sig_ABN = doInterpABN(sig_ABN_lookup[::,::,::,0],Tabn_conf_0,V_DP[...,::]+dVt[::],ABNstates[0],ABNstates[0],VDD,Npoints); + sig_ABN = sig_ABN*K.random_normal(shape=tf.shape(V_ABN),mean=0.,stddev=1.,dtype='float32'); + V_ABN = V_ABN+sig_ABN; + else: + V_ABN_0 = doInterpABN(ABN_lookup[::,::,::,0],Tabn_conf_0,V_DP[...,0::4]+dVt[0::4],ABNstates[0],ABNstates[0],VDD,Npoints); + V_ABN_1 = doInterpABN(ABN_lookup[::,::,::,1],Tabn_conf_1,V_DP[...,1::4]+dVt[1::4],ABNstates[0],ABNstates[0],VDD,Npoints); + V_ABN_2 = doInterpABN(ABN_lookup[::,::,::,2],Tabn_conf_2,V_DP[...,2::4]+dVt[2::4],ABNstates[0],ABNstates[0],VDD,Npoints); + V_ABN_3 = doInterpABN(ABN_lookup[::,::,::,3],Tabn_conf_3,V_DP[...,3::4]+dVt[3::4],ABNstates[0],ABNstates[0],VDD,Npoints); + V_ABN = tf.stack([V_ABN_0,V_ABN_1,V_ABN_2,V_ABN_3],axis=-1); + if(EN_NOISE): + sig_ABN_0 = doInterpABN(sig_ABN_lookup[::,::,::,0],Tabn_conf_0,V_DP[...,0::4]+dVt[0::4],ABNstates[0],ABNstates[0],VDD,Npoints); + sig_ABN_1 = doInterpABN(sig_ABN_lookup[::,::,::,1],Tabn_conf_1,V_DP[...,1::4]+dVt[1::4],ABNstates[0],ABNstates[0],VDD,Npoints); + sig_ABN_2 = doInterpABN(sig_ABN_lookup[::,::,::,2],Tabn_conf_2,V_DP[...,2::4]+dVt[2::4],ABNstates[0],ABNstates[0],VDD,Npoints); + sig_ABN_3 = doInterpABN(sig_ABN_lookup[::,::,::,3],Tabn_conf_3,V_DP[...,3::4]+dVt[3::4],ABNstates[0],ABNstates[0],VDD,Npoints); + sig_ABN = tf.stack([sig_ABN_0,sig_ABN_1,sig_ABN_2,sig_ABN_3],axis=-1); + sig_ABN = sig_ABN*K.random_normal(shape=tf.shape(sig_ABN),mean=0.,stddev=1.,dtype='float32'); + # Add noise + V_ABN = V_ABN+sig_ABN; + + # // Debug (uncomment desired lines) // + # tf.print("1/sigma",1/sigma_DP_t); + # tf.print("gamma",gamma); + # tf.print("total gain",gamma_eq); + # tf.print("Tabn_conf_0",Tabn_conf_0); + # tf.print("V_DP",V_DP[0][0:12]); + # tf.print("V_DP center",V_DP_center); + # tf.print("V_DP shifted",V_DP[0]+dVt[0]); + # tf.print("V_ABN_0",V_ABN_0[0][0:8]); + # tf.print("V_ABN_1",V_ABN_1[0]); + # tf.print("V_ABN_2",V_ABN_2[0]); + # tf.print("V_ABN_3",V_ABN_3[0]); + # tf.print("dVt",dVt) + # tf.print("dVt",(dVt_abs[0:12]-Vmin_beta_l)/Vlsb_local,summarize=8) + + # Reshape into the right order + V_ABN = tf.reshape(V_ABN,tf.shape(V_DP)); + # tf.print("V_ABN",V_ABN[0][0:12],summarize=8); + # print(K.int_shape(V_ABN)) + + return V_ABN; + +# Compute mean and variance of the batch then perform ABN with it, when enabled +def norm_ABN_in_train(V_DP,ABN_lookup,sig_ABN_lookup,V_DP_half_LUT,devGainLUT,beta=0.0,gamma=1.0,renorm=True,axis=-1,epsilon=1e-5,m_sigma=1,hardware=None,DR_tuple=None,gamma_range=None,ABNstates=None,Npoints=401,IS_DIFF=True,EN_NOISE=0,training=False): + # Compute mean and variance of each batch when desired + if(renorm): + # Eventually reshape V_DP in case of CONV2D operation + Ncols = K.int_shape(V_DP)[-1]; + V_DP_flat = tf.reshape(V_DP,(-1,Ncols)); + # Get mean and variance + mean_DP = K.mean(V_DP_flat,axis=0); + variance_DP = K.var(V_DP_flat,axis=0); + + else: + mean_DP = K.constant(0.0); + variance_DP = K.constant(1.0); + # Compute ABN with specified mean and variance + V_DP_BN = ABN(V_DP,ABN_lookup,sig_ABN_lookup,V_DP_half_LUT,devGainLUT,mean_DP,variance_DP,beta,gamma,axis,epsilon,m_sigma,hardware,DR_tuple,gamma_range,ABNstates,Npoints,IS_DIFF,EN_NOISE,training); + # Return a tuple of BN_result, mean and variance + return (V_DP_BN,mean_DP,variance_DP); + diff --git a/layers/analog_BN_current_model.py b/layers/analog_BN_current_model.py new file mode 100644 index 0000000000000000000000000000000000000000..ba0df01961b28363cc59e4060a9d93662f3658f8 --- /dev/null +++ b/layers/analog_BN_current_model.py @@ -0,0 +1,353 @@ +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// +# /////////////////////////// Custom batchnorm implementing actual hardware ABN ////////////////////////////// +# //////////////////////////////////////////////////////////////////////////////////////////////////////////// + +# Inspired from https://stackoverflow.com/questions/54101593/conditional-batch-normalization-in-keras + +import numpy as np +import math + +import tensorflow as tf +import keras.backend as K + +from keras import regularizers, initializers, constraints +#from keras.legacy import interfaces +from keras.layers import Layer, Input, InputSpec +from keras.models import Model + + +class Analog_BN(Layer): + """ Analog batchnorm layer + """ + # /// Init layer /// +# @interfaces.legacy_batchnorm_support + def __init__(self, + axis=-1, + momentum=0.99, + epsilon=1e-5, + center=True, + scale=True, + renorm = True, + beta_initializer='zeros', + gamma_initializer='ones', + moving_mean_initializer='zeros', + moving_variance_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + activity_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + hardware = None, + NB = None, + m_sigma = 1, + **kwargs): + + super(Analog_BN, self).__init__(**kwargs) + self.axis = axis + self.momentum = momentum + self.epsilon = epsilon + self.center = center + self.scale = scale + self.renorm = renorm + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.moving_mean_initializer = initializers.get(moving_mean_initializer) + self.moving_variance_initializer = (initializers.get(moving_variance_initializer)) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.activity_regularizer = regularizers.get(activity_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + self.hardware = hardware; + self.DRlim = (hardware.sramInfo.GND.data,hardware.sramInfo.VDD.data/2); + self.gamma_range = 4*math.sqrt(NB) + self.ABNstates = (2**hardware.sramInfo.r_gamma,2**hardware.sramInfo.r_beta) + self.IS_DIFF = (hardware.sramInfo.arch.name == '6T'); # Update with other arch types + self.m_sigma = m_sigma; + + # /// Build layer /// + def build(self,input_shape): + dim = input_shape[self.axis]; + + if dim is None: + raise ValueError('Axis ' + str(self.axis) + ' of ' + 'input tensor should have a defined dimension ' + 'but the layer received an input with shape ' + + str(input_shape) + '.') + shape = (dim,) + + if self.scale: + # gamma_constraint = Clip(0.0,4.0) + + self.gamma = self.add_weight(shape = (1,), + name = 'gamma', + initializer = self.gamma_initializer, + regularizer = self.gamma_regularizer, + constraint = self.gamma_constraint) + else: + self.gamma = None + + if self.center: + # beta_constraint = Clip(-100.0,100.0); + + self.beta = self.add_weight(shape = shape, + name = 'beta', + initializer = self.beta_initializer, + regularizer = self.beta_regularizer, + constraint = self.beta_constraint) + else: + self.beta = None + + if self.renorm: + self.moving_mean_DP = self.add_weight( + shape=shape, + name='moving_mean_DP', + initializer=self.moving_mean_initializer, + trainable=False) + self.moving_variance_DP = self.add_weight( + shape=shape, + name='moving_variance_DP', + initializer=self.moving_variance_initializer, + trainable=False) + else: + self.moving_mean_DP = K.variable(0.0) + self.moving_variance_DP = K.variable(1.0) + + # Dummy value to match PL layer + self.m_sigma = self.add_weight(shape = (1,), + name = 'm_sigma', + initializer = initializers.get(tf.keras.initializers.Constant(value=0.)), + trainable=False); + + + super(Analog_BN, self).build(input_shape) + + # /// Call layer (train or inference) /// + def call(self,inputs,training=None): + + input_shape = K.int_shape(inputs) + + # Prepare broadcasting shape. + ndim = len(input_shape) + reduction_axes = list(range(len(input_shape))) + del reduction_axes[self.axis] + broadcast_shape = [1] * len(input_shape) + broadcast_shape[self.axis] = input_shape[self.axis] + + # Determines whether broadcasting is needed. + needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) + + def normalize_inference(): + # Explicitely broadcast parameters when required. + if needs_broadcasting: + # Norm params + if self.renorm: + broadcast_moving_mean_DP = K.reshape(self.moving_mean_DP, + broadcast_shape); + broadcast_moving_variance_DP = K.reshape(self.moving_variance_DP, + broadcast_shape); + else: + broadcast_moving_mean_DP = None; + broadcast_moving_variance_DP = None; + # Scale param + if self.scale: + broadcast_gamma = K.reshape(self.gamma,broadcast_shape); + else: + broadcast_gamma = None + # Offset param + if self.center: + broadcast_beta = K.reshape(self.beta,broadcast_shape); + else: + broadcast_beta = None + # Return batchnorm + return ABN( + inputs, + broadcast_moving_mean_DP, + broadcast_moving_variance_DP, + broadcast_beta, + broadcast_gamma, + axis = self.axis, + epsilon = self.epsilon, + m_sigma = self.m_sigma, + hardware = self.hardware, + DR_tuple = self.DRlim, + gamma_range = self.gamma_range, + ABNstates = self.ABNstates, + IS_DIFF = self.IS_DIFF, + training=training) + else: + return ABN( + inputs, + self.moving_mean_DP, + self.moving_variance_DP, + self.beta, + self.gamma, + axis = self.axis, + epsilon = self.epsilon, + m_sigma = self.m_sigma, + hardware = self.hardware, + DR_tuple = self.DRlim, + gamma_range = self.gamma_range, + ABNstates = self.ABNstates, + IS_DIFF = self.IS_DIFF, + training=training) + + # If the learning phase is *static* and set to inference: + if training in {0, False}: + return normalize_inference() + + + # If the learning is either dynamic, or set to training: + (normed_training,mean_DP,variance_DP) = \ + norm_ABN_in_train( + inputs, self.beta, self.gamma, self.renorm, reduction_axes, + epsilon=self.epsilon,m_sigma=self.m_sigma,hardware=self.hardware,DR_tuple=self.DRlim, + gamma_range=self.gamma_range,ABNstates=self.ABNstates,IS_DIFF=self.IS_DIFF,training=training) + # ??? + if K.backend() != 'cntk': + sample_size = K.prod([K.shape(inputs)[axis] + for axis in reduction_axes]) + sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) + if K.backend() == 'tensorflow' and sample_size.dtype != 'float32': + sample_size = K.cast(sample_size, dtype='float32') + + # sample variance - unbiased estimator of population variance + variance_DP *= sample_size / (sample_size - (1.0 + self.epsilon)) + + # Update moving mean and variance during training + self.add_update([K.moving_average_update(self.moving_mean_DP, + mean_DP, + self.momentum), + K.moving_average_update(self.moving_variance_DP, + variance_DP, + self.momentum)]) + + # Pick ABN result for either training or inference + return K.in_train_phase(normed_training, + normalize_inference, + training=training) + + + def get_config(self): + config = { + 'axis': self.axis, + 'momentum': self.momentum, + 'epsilon': self.epsilon, + 'center': self.center, + 'scale': self.scale, + 'renorm': self.renorm, + 'beta_initializer': initializers.serialize(self.beta_initializer), + 'gamma_initializer': initializers.serialize(self.gamma_initializer), + 'moving_mean_initializer': + initializers.serialize(self.moving_mean_initializer), + 'moving_variance_initializer': + initializers.serialize(self.moving_variance_initializer), + 'beta_regularizer': regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': constraints.serialize(self.beta_constraint), + 'gamma_constraint': constraints.serialize(self.gamma_constraint), + 'DRlim': self.DRlim, + 'IS_DIFF': self.IS_DIFF + } + base_config = super(Analog_BN, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + + return input_shape[1] +############################################## Internal functions ################################################## + +# Perform ABN +def ABN(V_DP,mov_mean_DP=0.0,mov_variance_DP=1.0,beta=0.0,gamma=0.0,axis=-1,epsilon=1e-5,m_sigma=1,hardware=None,DR_tuple=None,gamma_range=None,ABNstates=None,IS_DIFF=True,training=False): + # Get min and max DR limits + minDR = DR_tuple[0]; + maxDR = DR_tuple[1]; + + r_gamma = hardware.sramInfo.r_gamma; + r_beta = hardware.sramInfo.r_beta; + + Vmax_beta = hardware.sramInfo.Vmax_beta_g; + Vlsb_beta = Vmax_beta/2**(r_beta-1); + + # Set 'None' parameters to their initial values + if gamma is None: + gamma = K.constant(1.0); + if beta is None: + beta = K.constant(0.0); + if mov_mean_DP is None: + mov_mean_DP = K.constant(DR_tuple[1]); + if mov_variance_DP is None: + mov_variance_DP = K.constant(1.0); + + # Specify non-centernormalized correction factors +# mu_goal = maxDR/2; +# sigma_goal = maxDR/m_sigma; var_goal = sigma_goal*sigma_goal; +# +# # Get custom renorm factors +# sigma_DP = K.sqrt(mov_variance_DP); +# mov_mean_DP_t = mov_mean_DP - mu_goal/sigma_goal*sigma_DP; +# # mov_mean_DP_t = K.zeros_like(mov_mean_DP); +# mov_variance_DP_t = K.mean(mov_variance_DP)/var_goal; +# # Get equivalent coefficients +# sigma_DP_t = K.sqrt(mov_variance_DP_t); + + gamma_eq = gamma/(K.sqrt(mov_variance_DP) + epsilon); + beta_eq = beta/gamma_eq - mov_mean_DP; + + # Quantize results + gamma_eq = K.clip(round_through(gamma_eq),0,2**r_gamma); + beta_eq = K.clip(round_through(beta_eq/Vlsb_beta)*Vlsb_beta,-Vmax_beta,Vmax_beta); + + # Model transfer function + V_ABN_temp = gamma_eq*((V_DP-maxDR/2)+beta_eq); + V_ABN = V_ABN_temp + maxDR/2; + + # Return (unclipped) result + return V_ABN; + +# Compute mean and variance of the batch then perform ABN with it, when enabled +def norm_ABN_in_train(V_DP,beta=0.0,gamma=1.0,renorm=True,axis=-1,epsilon=1e-5,m_sigma=1,hardware=None,DR_tuple=None,gamma_range=None,ABNstates=None,IS_DIFF=True,training=False): + # Get min and max DR limits + minDR = DR_tuple[0]; + maxDR = DR_tuple[1]; + + # Compute mean and variance of each batch when desired + if(renorm): + # Model transfer function + V_out = V_DP-maxDR/2; + + # Get mean and variance + mean_DP = K.mean(V_out,axis=0); + variance_DP = K.var(V_DP,axis=0); + else: + mean_DP = K.constant(0.0); + variance_DP = K.constant(1.0); + # Compute ABN with specified mean and variance + V_DP_BN = ABN(V_DP,mean_DP,variance_DP,beta,gamma,axis,epsilon,m_sigma,hardware,DR_tuple,gamma_range,ABNstates,IS_DIFF,training); + # Return a tuple of BN_result, mean and variance + return (V_DP_BN,mean_DP,variance_DP); + +# Floor-through locally redefined (would be better to have it somewhere) +def floor_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + floored = tf.math.floor(x); + floored_through = x + K.stop_gradient(floored - x); + return floored_through; + +def round_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + rounded = tf.math.round(x); + rounded_through = x + K.stop_gradient(rounded - x); + return rounded_through; + +def round_through_lut(x): + rounded = K.cast(K.round(x),"int32"); + rounded_through = x + K.stop_gradient(rounded - x); + return rounded_through; + + + diff --git a/layers/binary_layers_IMC.py b/layers/binary_layers_IMC.py deleted file mode 100644 index 1bb4f77b3800508803bcd98f550254ef6b46ad95..0000000000000000000000000000000000000000 --- a/layers/binary_layers_IMC.py +++ /dev/null @@ -1,415 +0,0 @@ -# -*- coding: utf-8 -*- -import numpy as np -import math - -from keras import backend as K -import tensorflow as tf - -from keras.layers import InputSpec, Layer, Dense, Conv2D -from keras import constraints -from keras import initializers -# Binarization functions -from layers.binary_ops import binarize, binarize_exp, binarize_ssb -from layers.binary_ops import binary_sigmoid_p -# Analog MAC operator -from models.MAC_current import MAC_op_se_ana as MAC_op_se -from models.MAC_current import MAC_op_diff_ana as MAC_op_diff -from models.CONV_current import CONV_op_se_ana as CONV_op_se -from models.CONV_current import CONV_op_diff_ana as CONV_op_diff -# ADC model -from models.ADC import quant_uni -# Hardware parameters generation -from utils.config_hardware_model import genHardware -# Temporary dir -import tempfile -import sys -import subprocess -import time -# Modeling files -import os -scriptpath = "../lib_modelcim/" -sys.path.append(os.path.abspath(scriptpath)); -from preProc_wrapper import preProcSat as getHardwareData -from fit_spice import DP_fit - -class Clip(constraints.Constraint): - def __init__(self, min_value, max_value=None): - self.min_value = min_value - self.max_value = max_value - if not self.max_value: - self.max_value = -self.min_value - if self.min_value > self.max_value: - self.min_value, self.max_value = self.max_value, self.min_value - - def __call__(self, p): - return K.clip(p, self.min_value, self.max_value) - - def get_config(self): - return {"name": self.__call__.__name__, - "min_value": self.min_value, - "max_value": self.max_value} - - -class BinaryDense(Dense): - ''' Binarized Dense layer - References: - "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] - ''' - def __init__(self, units, H=1.,sramInfo=None, EN_NOISE=0, EN_QUANT=1, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs): - super(BinaryDense, self).__init__(units, **kwargs) - self.H = H - self.kernel_lr_multiplier = kernel_lr_multiplier - self.bias_lr_multiplier = bias_lr_multiplier - - self.EN_NOISE = EN_NOISE - self.EN_QUANT = EN_QUANT - - self.sramInfo = sramInfo - self.hardware = None - self.Vt_noise = None - self.input_dim = None - - super(BinaryDense, self).__init__(units, **kwargs) - - def build(self, input_shape): - assert len(input_shape) >= 2 - input_dim = input_shape[1] - self.input_dim = input_dim; - - if self.H == 'Glorot': - self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units))) - #print('Glorot H: {}'.format(self.H)) - if self.kernel_lr_multiplier == 'Glorot': - self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units))) - #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier)) - - # Retrieve architecture type (diff or se) and derive flag - archType = self.sramInfo.arch.name; - # if(archType == '6T'): - self.kernel_constraint = Clip(-self.H, self.H) - self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) - # elif(archType == '8T'): - # self.kernel_constraint = Clip(0, self.H) - # self.kernel_initializer = initializers.RandomUniform(0, self.H) - # else: - # error('Unsupported cell type during binary weights initialization !'); - - self.kernel = self.add_weight(shape=(input_dim, self.units), - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - if self.use_bias: - self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] - self.bias = self.add_weight(shape=(self.output_dim,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.lr_multipliers = [self.kernel_lr_multiplier] - self.bias = None - - # Get DP electrical quantities for this layer - Nrows = self.sramInfo.Nrows.data - N_cim = int(math.ceil((input_dim-1)/Nrows)); - self.sramInfo.NB.data = int(input_dim/N_cim); - print(f'######## FC layer with {self.sramInfo.NB.data} cells/op supplied at {self.sramInfo.VDD.data:.2f}V ######## ') - path_dir = '/export/home/adkneip/Documents/PhD/ELDO/IMC_PYTHON/CURRENT_MAC/'+self.sramInfo.arch.name+'_CELL/' - ################################################# USE TEMPORARY SIM DIRECTORY ##################################################### - with tempfile.TemporaryDirectory(dir=path_dir,prefix='SimFolder_') as path_to_file: - #print(path_to_file) - # Copy .cir files into temporary simu folder -- '*' sumbol bugs for some reason - if(self.sramInfo.simulator == "eldo"): - file_table = np.array(['MAC_DC.cir','MAC_NL.cir','MAC_satCal.cir','MAC_time.cir','MAC_train_MC.cir']); - elif(self.sramInfo.simulator == "spectre"): - file_table = np.array(['MAC_DC.scs','MAC_satCal.scs','MAC_time.scs', - 'MAC_DC.mdl','MAC_satCal.mdl','MAC_time.mdl']); - else: - sys.exit('Error: selected simulator not supported !\n'); - for file_temp in file_table: - commandLine = ['cp',path_dir+'RefFolder/'+file_temp,path_to_file+'/']; - proc = subprocess.run(commandLine); - if(proc.returncode != 0): - sys.exit('Error: could not copy reference files into temporary sim folder !\n'); - # Create temporary data file - commandLine = ['mkdir',path_to_file+'/data']; - proc = subprocess.run(commandLine); - if(proc.returncode != 0): - sys.exit('Error: could not copy reference files into temporary sim folder !\n'); - # Perform Spice simulations - self.sramInfo = getHardwareData(path_to_file,self.sramInfo) - # time.sleep(300); # For debug - ################################################################################################################################### - # Generate hardware parameters - hardware = genHardware(self.sramInfo) - # Compute the appropriate curve-fitting factors - # hardware.a1 = 1; hardware.a2 = 1; hardware.b1 = 1; - # self.hardware = hardware - print(f'######## Performing three-parametric best curve-fitting ######## ') - self.hardware = DP_fit(path_dir,'early',hardware) - # Create V_th distribution - mu_Vth = self.hardware.mu_Vth - sig_Vth = self.hardware.sig_Vth - # self.Vt_noise = K.random_normal(shape=(self.units,),mean=0,stddev=sig_Vth) - self.Vt_noise = K.random_normal(shape=(self.units,),mean=0,stddev=0) - - # Perform build - self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) - self.built = True - - def call(self, inputs): - # Binarize weights - W_bin = binarize(self.kernel, H=self.H); - # Check if a single CIM-SRAM is sufficient, or ideal charge-share of their analog outputs - Nrows = self.hardware.sramInfo.Nrows.data - N_cim = int(math.ceil((self.input_dim-1)/Nrows)); - # Retrieve architecture type (diff or se) and derive flag - archType = self.hardware.sramInfo.arch.name; - IS_SE_OUT = (archType == '8T') or self.EN_QUANT; - # Wrap correct MAC_op function - if(archType == '6T'): - MAC_op = MAC_op_diff; - elif(archType == '8T'): - MAC_op = MAC_op_se; - else: - raise NameError('Error: selected architecture (cell type) not supported during FC layer compute !\n'); - # Emulate 6T-based CIM-SRAM analog MAC operation, possibly with parallel macros - if(N_cim > 1): - # Separate inputs and weights in sub-matrices - inputs = tf.unstack(K.reshape(inputs,(-1,int(self.input_dim/N_cim),N_cim)),axis=-1) - W_bin = K.permute_dimensions(K.reshape(K.permute_dimensions(W_bin,(1,0)),(-1,int(self.input_dim/N_cim),N_cim)),(1,2,0)) - W_bin = tf.unstack(W_bin,axis=1) - # Perform CIM-SRAM operations over all sub-matrices (i.e. different CIM-SRAMs) - V_DP = []; - for i in range(N_cim): - V_DP.append(MAC_op(self.hardware,inputs[i],W_bin[i],self.Vt_noise,self.EN_NOISE,self.EN_QUANT)) - # Combine the result as if ideal charge-sharing (--> could implement actual charge-sharing !) - if(IS_SE_OUT): - V_DP = K.sum(tf.stack(V_DP,axis=2),axis=2)/N_cim; - else: - V_BL = K.sum(tf.stack(V_DP[0],axis=2),axis=2)/N_cim; - V_BLB = K.sum(tf.stack(V_DP[1],axis=2),axis=2)/N_cim; - else: - if(IS_SE_OUT): - V_DP = MAC_op(self.hardware,inputs,W_bin,self.Vt_noise,self.EN_NOISE,self.EN_QUANT); - else: - (V_BL,V_BLB) = MAC_op(self.hardware,inputs,W_bin,self.Vt_noise,self.EN_NOISE,self.EN_QUANT); - # Add bias to PA - if self.use_bias: - if(IS_SE_OUT): - V_DP = K.bias_add(V_DP, self.bias) - else: - V_BL = K.bias_add(V_BL,self.bias) - V_BLB = K.bias_add(V_BLB,self.bias) - - # Quantify the PA to get the digitized OA - IAres = self.hardware.sramInfo.IAres; - OAres = self.hardware.sramInfo.OAres; - NB = self.hardware.sramInfo.NB.data; - PAmax = (2**IAres-1)*NB; - DRval = self.hardware.sramInfo.DR.data; - VDD = self.hardware.sramInfo.VDD.data; - if(self.EN_QUANT): - DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType); - # Return quantized output - return DO - elif(archType == '8T'): - return V_DP - else: - # Return unquantized differential output - return K.concatenate([V_BL[np.newaxis,...],V_BLB[np.newaxis,...]],axis=0) - - def get_config(self): - config = {'H': self.H, - 'kernel_lr_multiplier': self.kernel_lr_multiplier, - 'bias_lr_multiplier': self.bias_lr_multiplier} - base_config = super(BinaryDense, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class BinaryConv2D(Conv2D): - '''Binarized Convolution2D layer - References: - "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] - ''' - def __init__(self, filters, kernel_regularizer=None,activity_regularizer=None, kernel_lr_multiplier='Glorot', - bias_lr_multiplier=None, H=1.,sramInfo=None, EN_NOISE=0, EN_QUANT=1, **kwargs): - super(BinaryConv2D, self).__init__(filters, **kwargs) - self.H = H - self.kernel_lr_multiplier = kernel_lr_multiplier - self.bias_lr_multiplier = bias_lr_multiplier - self.activity_regularizer = activity_regularizer - self.kernel_regularizer = kernel_regularizer - - self.sramInfo = sramInfo - self.hardware = None - self.Vt_noise = None - - self.EN_NOISE = EN_NOISE - self.EN_QUANT = EN_QUANT - - def build(self, input_shape): - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - - input_dim = input_shape[channel_axis] - kernel_shape = self.kernel_size + (input_dim, self.filters) - #kernel_shape = self.kernel_size + (self.filters,) - - base = self.kernel_size[0] * self.kernel_size[1] - if self.H == 'Glorot': - nb_input = int(input_dim * base) - nb_output = int(self.filters * base) - self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output))) - #print('Glorot H: {}'.format(self.H)) - - if self.kernel_lr_multiplier == 'Glorot': - nb_input = int(input_dim * base) - nb_output = int(self.filters * base) - self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output))) - #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier)) - - self.kernel_constraint = Clip(-self.H, self.H) - self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) - #self.bias_initializer = initializers.RandomUniform(-self.H, self.H) - self.kernel = self.add_weight(shape=kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) -# print(K.int_shape(self.kernel)) - - if self.use_bias: - self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] - self.bias = self.add_weight((self.filters,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - - else: - self.lr_multipliers = [self.kernel_lr_multiplier] - self.bias = None - - # Get DP electrical quantities for this layer - self.sramInfo.NB.data = base*input_dim; - print(f'######## 2D-CONV layer with {self.sramInfo.NB.data} cells/op supplied at {self.sramInfo.VDD.data:.2f}V ######## ') - path_dir = '/export/home/adkneip/Documents/PhD/ELDO/IMC_PYTHON/CURRENT_MAC/'+self.sramInfo.arch.name+'_CELL/' - ################################################# USE TEMPORARY SIM DIRECTORY ##################################################### - with tempfile.TemporaryDirectory(dir=path_dir,prefix='SimFolder_') as path_to_file: - #print(path_to_file) - # Copy .cir files into temporary simu folder -- '*' sumbol bugs for some reason - if(self.sramInfo.simulator == "eldo"): - file_table = np.array(['MAC_DC.cir','MAC_NL.cir','MAC_satCal.cir','MAC_time.cir','MAC_train_MC.cir']); - elif(self.sramInfo.simulator == "spectre"): - file_table = np.array(['MAC_DC.scs','MAC_satCal.scs','MAC_time.scs', - 'MAC_DC.mdl','MAC_satCal.mdl','MAC_time.mdl']); - else: - sys.exit('Error: selected simulator not supported !\n'); - for file_temp in file_table: - commandLine = ['cp',path_dir+'RefFolder/'+file_temp,path_to_file+'/']; - proc = subprocess.run(commandLine); - if(proc.returncode != 0): - sys.exit('Error: could not copy reference files into temporary sim folder !\n'); - # Create temporary data file - commandLine = ['mkdir',path_to_file+'/data']; - proc = subprocess.run(commandLine); - if(proc.returncode != 0): - sys.exit('Error: could not copy reference files into temporary sim folder !\n'); - # Perform Spice simulations - self.sramInfo = getHardwareData(path_to_file,self.sramInfo) - ################################################################################################################################### - # Generate hardware parameters - hardware = genHardware(self.sramInfo) - # Compute the appropriate curve-fitting factors - # hardware.a1 = 1; hardware.a2 = 1; hardware.b1 = 1; - # self.hardware = hardware - print(f'######## Performing three-parametric best curve-fitting ######## ') - self.hardware = DP_fit(path_dir,'early',hardware) - # Create V_th distribution - sig_Vth = self.hardware.sig_Vth - #self.Vt_noise = K.random_normal(shape=(input_dim,),mean=0,stddev=sig_Vth) - self.Vt_noise = K.random_normal(shape=(input_dim,),mean=0,stddev=0) - - - # Set input spec. - self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs): - binary_kernel = binarize(self.kernel, H=self.H) - # Retrieve architecture type (diff or se) and derive flag - archType = self.hardware.sramInfo.arch.name; - IS_SE_OUT = (archType == '8T') or self.EN_QUANT; - # Wrap correct CONV_op function - if(archType == '6T'): - CONV_op = CONV_op_diff; - elif(archType == '8T'): - CONV_op = CONV_op_se; - else: - raise NameError('Error: selected architecture (cell type) not supported during 2DCONV layer compute !\n'); - - inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier - inputs_bnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\ - * inverse_kernel_lr_multiplier - - outputs_bnn_gradient = CONV_op( - self.hardware, - inputs_bnn_gradient, - binary_kernel, - self.Vt_noise, - self.data_format, - self.EN_NOISE, - self.EN_QUANT) - - if(IS_SE_OUT): - V_DP = (outputs_bnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient))\ - * self.kernel_lr_multiplier - else: - V_BL = (outputs_bnn_gradient[0] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient[0]))\ - * self.kernel_lr_multiplier - V_BLB = (outputs_bnn_gradient[1] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient[1]))\ - * self.kernel_lr_multiplier - - if self.use_bias: - if(IS_SE_OUT): - V_DP = K.bias_add(V_DP,self.bias,data_format=self.data_format); - else: - V_BL = K.bias_add(V_BL,self.bias,data_format=self.data_format); - V_BLB = K.bias_add(V_BLB,self.bias,data_format=self.data_format); - - # Quantify the PA to get the digitized OA - IAres = self.hardware.sramInfo.IAres - OAres = self.hardware.sramInfo.OAres - NB = self.hardware.sramInfo.NB.data - PAmax = (2**IAres-1)*NB - DRval = self.hardware.sramInfo.DR.data; - VDD = self.hardware.sramInfo.VDD.data; - if(self.EN_QUANT): - DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType); - # Return digitized output - return DO - elif(archType == '8T'): - return V_DP - else: - # Return unquantized differential output - return (V_BL,V_BLB) - - def get_config(self): - config = {'H': self.H, - 'kernel_lr_multiplier': self.kernel_lr_multiplier, - 'bias_lr_multiplier': self.bias_lr_multiplier} - base_config = super(BinaryConv2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -# Aliases - -BinaryConvolution2D = BinaryConv2D diff --git a/layers/binary_ops.py b/layers/binary_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..8b14e76c7751473bd26959cd4a18e9d09501b977 --- /dev/null +++ b/layers/binary_ops.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +import keras.backend as K + +import tensorflow as tf +from tensorflow.math import exp + +def round_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + rounded = K.round(x) + return x + K.stop_gradient(rounded - x) + +def round_through_p(x,p): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + rounded = K.switch(K.greater_equal(x,0),K.ones_like(x),K.zeros_like(x)); + return x + K.stop_gradient(rounded - x) + + +def _hard_sigmoid(x): + '''Hard sigmoid different from the more conventional form (see definition of K.hard_sigmoid). + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + x = 1*(0.5 * x) + 0.5 + return K.clip(x, 0, 1) + +def _hard_sigmoid_p(x,p): + '''Hard sigmoid different from the more conventional form (see definition of K.hard_sigmoid). + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + return K.clip(x,-p, p) + #x = 10*x; + #x = p*((exp(x)-exp(-x))/(exp(x)+exp(-x))) + #return x + +def _hard_sigmoid_asym(x): + '''Hard sigmoid different from the more conventional form (see definition of K.hard_sigmoid). + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + return K.clip(x, 0, 1) + + +def binary_sigmoid(x): + '''Binary hard sigmoid for training binarized neural network. + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + return round_through(_hard_sigmoid(x)) + +def binary_sigmoid_abn(x,p): + '''Binary hard sigmoid for training binarized neural network. + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + return round_through_p(_hard_sigmoid_p(x,p),p) + +def binary_sigmoid_asym(x): + '''Binary hard sigmoid for training binarized neural network. + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + return round_through(_hard_sigmoid_asym(x)) + +def binary_sigmoid_p(x,p): + '''Binary hard sigmoid for training binarized neural network. + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + return round_through_p(_hard_sigmoid_p(x,p),p) + + +def binary_tanh(x): + '''Binary hard sigmoid for training binarized neural network. + The neurons' activations binarization function + It behaves like the sign function during forward propagation + And like: + hard_tanh(x) = 2 * _hard_sigmoid(x) - 1 + clear gradient when |x| > 1 during back propagation + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + x = 2 * round_through(_hard_sigmoid(x)) - 1 + #x = tf.Print(x,[x],summarize=10,first_n=2) + return x + +def binary_tanh_p(x,p): + '''Binary hard sigmoid for training binarized neural network. + The neurons' activations binarization function + It behaves like the sign function during forward propagation + And like: + hard_tanh(x) = 2 * _hard_sigmoid(x) - 1 + clear gradient when |x| > 1 during back propagation + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + x = 2 * round_through(_hard_sigmoid_p(x,p)) - p + # x = round_through(_hard_sigmoid_p(x,p)) + #x = tf.Print(x,[x],summarize=10,first_n=2) + return x + + +def binarize(W, H=1): + '''The weights' binarization function, + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + # [-H, H] -> -H or H + Wb = H * binary_tanh(W / H) + #Wb = tf.Print(Wb,[Wb,W],summarize=5,first_n=2) + return Wb + +def binarize_exp(W, H=1): + '''The weights' binarization function, + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + # [-H, H] -> -H or H + Wb = K.abs(round_through(K.clip(W,-1,1))); + #Wb = tf.Print(Wb,[Wb,W],summarize=5,first_n=2) + return Wb + +def binarize_stoch_quant(W, H=1): + '''The weights' binarization function, + + # Reference: + - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + # [-H, H] -> -H or H + bern_matrix = tf.random.uniform(shape=tf.shape(W),maxval=1); + bern_matrix = tf.math.greater(bern_matrix,0.2); bern_matrix = tf.cast(bern_matrix,dtype='float32'); + Wb = bern_matrix*round_through(W)+(1-bern_matrix)*W; + Wb = K.clip(Wb,-1,1) + #Wb = tf.Print(Wb,[Wb,W],summarize=5,first_n=2) + return Wb + +def binarize_ssb(W,H=1,b=1.0): + ''' S. Darabi 2020, SSb activation ''' + W = W/H; + sigmo = K.sigmoid(b*W) + W_ssb = H*(2*sigmo*(1+b*W*(1-sigmo))-1); + return W_ssb + K.stop_gradient(K.switch(K.greater_equal(W_ssb,0),K.ones_like(W_ssb),-K.ones_like(W_ssb)) - W_ssb); + +def _mean_abs(x, axis=None, keepdims=False): + return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims)) + + +def xnorize(W, H=1., axis=None, keepdims=False): + Wb = binarize(W, H) + Wa = _mean_abs(W, axis, keepdims) + + return Wa, Wb diff --git a/layers/quantized_layers_IMC.py b/layers/quantized_layers_IMC.py new file mode 100644 index 0000000000000000000000000000000000000000..4f13a923eabdfc976ae916f85feac38d914fa59b --- /dev/null +++ b/layers/quantized_layers_IMC.py @@ -0,0 +1,474 @@ +# -*- coding: utf-8 -*- +import numpy as np +import math + +from keras import backend as K +import tensorflow as tf + +from keras.layers import InputSpec, Layer, Dense, Conv2D +from keras import constraints +from keras import initializers +# Quantization functions +from layers.binary_ops import binarize as binarize +# Analog MAC operator +#from models.MAC_current import MAC_op_diff, MAC_op_se +from models.MAC_current import MAC_op_diff_num as MAC_op_diff +from models.MAC_current import MAC_op_se_num as MAC_op_se +from models.CONV_current import CONV_op_diff_num as CONV_op_diff +from models.CONV_current import CONV_op_se_num as CONV_op_se +from models.ADC import quant_uni +from models.DTSE import DTSE_PL +from models.DTSE import DTSE_ideal +# Hardware parameters generation +from utils.config_hardware_model import genHardware +# Rounding facility +from layers.quantized_ops import round_through +# Temporary dir +import tempfile +import sys +import subprocess +import time + + +class Clip(constraints.Constraint): + def __init__(self, min_value, max_value=None): + self.min_value = min_value + self.max_value = max_value + if not self.max_value: + self.max_value = -self.min_value + if self.min_value > self.max_value: + self.min_value, self.max_value = self.max_value, self.min_value + + def __call__(self, p): + #todo: switch for clip through? + return K.clip(p, self.min_value, self.max_value) + + def get_config(self): + return {"name": self.__call__.__name__, + "min_value": self.min_value, + "max_value": self.max_value} + + +class QuantizedDense(Dense): + ''' Quantized Dense layer + References: + "QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] + ''' + def __init__(self, units, H=1., nb=16, m_T_DP=1., sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs): + super(QuantizedDense, self).__init__(units, **kwargs) + self.H = H + self.nb = nb + self.kernel_lr_multiplier = kernel_lr_multiplier + self.bias_lr_multiplier = bias_lr_multiplier + self.m_T_DP_init = m_T_DP; + + self.EN_NOISE = EN_NOISE + self.FLAG_QUANT = FLAG_QUANT + self.FLAG_PL = FLAG_PL + self.IS_TRAINABLE_DP = IS_TRAINABLE_DP + + self.sramInfo = sramInfo + self.hardware = None + self.Vt_noise = None + self.input_dim = None + + self.DTSE_LUT = None + + super(QuantizedDense, self).__init__(units, **kwargs) + + def build(self, input_shape): + assert len(input_shape) >= 2 + input_dim = input_shape[1] + self.input_dim = input_dim + + if self.H == 'Glorot': + self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units))) + #print('Glorot H: {}'.format(self.H)) + if self.kernel_lr_multiplier == 'Glorot': + self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units))) + #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier)) + + self.kernel_constraint = Clip(-self.H, self.H) + self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) + # Binary values, nb-quantized weights in parallel columns + self.kernel = self.add_weight(shape=(input_dim, self.units*self.nb), + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) + if self.use_bias: + self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] + self.bias = self.add_weight(shape=(self.units,), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + else: + self.lr_multipliers = [self.kernel_lr_multiplier] + self.bias = None + + # Train DP timing config or not + if self.IS_TRAINABLE_DP: + self.m_T_DP = self.add_weight(shape=(1,), + initializer = initializers.get(tf.keras.initializers.Constant(value=1.)), + name = 'T_DP_conf', + regularizer = None, + constraint = None); + else: + self.m_T_DP = 1.; + + # Get DP electrical quantities for this layer + Nrows = self.sramInfo.Nrows.data + N_cim = int(math.ceil((input_dim-1)/Nrows)); + self.sramInfo.NB.data = int(input_dim/N_cim); + + # Generate hardware parameters + hardware = genHardware(self.sramInfo) + self.hardware = hardware + # Create V_th distribution + if(self.sramInfo.noise_at_inf): + if(self.sramInfo.IS_NUM): + self.Vt_noise = K.random_normal(shape=(K.int_shape(self.kernel)[-1],),mean=0.,stddev=1.); + else: + sig_Vth = self.hardware.sig_Vth + self.Vt_noise = K.random_normal(shape=K.int_shape(self.kernel),mean=0,stddev=sig_Vth) + # self.Vt_noise = K.random_normal(shape=(self.units,),mean=0,stddev=0) + else: + if(self.sramInfo.IS_NUM): + self.Vt_noise = K.zeros_like(K.int_shape(self.kernel)[-1]); + else: + self.Vt_noise = K.zeros_like(K.int_shape(self.kernel)); + + # Recover DTSE best-fitting coefficients + self.DTSE_LUT = self.sramInfo.DTSE_LUT; + + # Perform build + self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) + self.built = True + + + def call(self, inputs): + # Quantize weights in parallel columns + W_bin = binarize(self.kernel, H=self.H); + # Check if a single CIM-SRAM is sufficient, or ideal charge-share of their analog outputs + Nrows = self.hardware.sramInfo.Nrows.data; + N_cim = int(math.ceil((self.input_dim-1)/Nrows)); + # Retrieve architecture type (diff or se) and derive flag + archType = self.hardware.sramInfo.arch.name; + FLAG_SE_DP = (archType == '8T') or self.FLAG_QUANT; + FLAG_PL = self.FLAG_PL; + + # Wrap correct MAC_op function + if(archType == '6T'): + MAC_op = MAC_op_diff; + elif(archType == '8T'): + MAC_op = MAC_op_se; + else: + raise NameError('Error: selected architecture (cell type) not supported during FC layer compute !\n'); + + # Apply T_DP transform + # T_DP_conf = K.clip(round_through(self.m_T_DP),1,8)/8; + T_DP_conf = K.clip(round_through(self.m_T_DP),0,8-1); + + # Emulate CIM-SRAM analog DP operation, possibly with parallel macros + if(N_cim > 1): + # Separate inputs and weights in sub-matrices + inputs = tf.unstack(K.reshape(inputs,(-1,int(self.input_dim/N_cim),N_cim)),axis=-1) + W_bin = K.permute_dimensions(K.reshape(K.permute_dimensions(W_bin,(1,0)),(-1,int(self.input_dim/N_cim),N_cim)),(1,2,0)) + W_bin = tf.unstack(W_bin,axis=1) + # Perform CIM-SRAM operations over all sub-matrices (i.e. different CIM-SRAMs) + V_DP = []; V_BL = []; V_BLB = []; + for i in range(N_cim): + # Compute analog dot-product + V_DP_bin = MAC_op(self.hardware,inputs[i],W_bin[i],self.Vt_noise,T_DP_conf,self.EN_NOISE,self.FLAG_QUANT); + + # Reshape outputs and apply DTSE, whenever relevant + if(FLAG_SE_DP): + V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); + V_DP.append(MBIT_weight(V_DP_bin,self.nb)); + else: + V_BL_bin = K.reshape(V_DP_bin[0],(-1,self.units,self.nb)); + V_BLB_bin = K.reshape(V_DP_bin[1],(-1,self.units,self.nb)); + # Perform DTSE + VDD_DTSE = self.hardware.sramInfo.VDD_DTSE; + if(FLAG_PL): + # Retrieve actual DTSE params + C_int_dtse = self.hardware.sramInfo.C_int_dtse.data; + C_L_dtse = self.hardware.sramInfo.C_L_dtse.data; + # Apply DTSE + V_DP = DTSE_PL(V_BL_bin,V_BLB_bin,C_int_dtse,C_L_dtse,self.DTSE_LUT,VDD_DTSE,self.nb); + else: + V_DP = DTSE_ideal(V_BL_bin,V_BLB_bin,VDD_DTSE,0,self.nb); + + # Combine the result as if ideal charge-sharing (--> could implement actual charge-sharing !) + if(FLAG_SE_DP): + V_DP = K.sum(tf.stack(V_DP,axis=2),axis=2)/N_cim; + else: + V_BL = K.sum(tf.stack(V_BL,axis=2),axis=2)/N_cim; + V_BLB = K.sum(tf.stack(V_BLB,axis=2),axis=2)/N_cim; + V_DP = V_BL-V_BLB; + else: + # Compute analog dot-product + V_DP_bin = MAC_op(self.hardware,inputs,W_bin,self.Vt_noise,T_DP_conf,self.EN_NOISE,self.FLAG_QUANT); + # Reshape outputs and apply DTSE, whenever relevant + if(FLAG_SE_DP): + # Note: no support for PL + V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); + V_DP = MBIT_weight(V_DP_bin,self.nb); + else: + V_BL_bin = K.reshape(V_DP_bin[0],(-1,self.units,self.nb)); + V_BLB_bin = K.reshape(V_DP_bin[1],(-1,self.units,self.nb)); + # Perform DTSE + VDD_DTSE = self.hardware.sramInfo.VDD_DTSE; + if(FLAG_PL): + # Retrieve actual DTSE params + C_int_dtse = self.hardware.sramInfo.C_int_dtse.data; + C_L_dtse = self.hardware.sramInfo.C_L_dtse.data; + # Apply DTSE + V_DP = DTSE_PL(V_DP_bin[0],V_DP_bin[1],C_int_dtse,C_L_dtse,self.DTSE_LUT,VDD_DTSE,self.nb); + else: + V_DP = DTSE_ideal(V_DP_bin[0],V_DP_bin[1],VDD_DTSE,0,self.nb); + + # Add bias to PA (if used) + if self.use_bias: + if(self.FLAG_QUANT): + V_DP = K.bias_add(V_DP, self.bias) + else: + V_DP = K.bias_add(V_DP,self.bias); + # DBN: ADC quantization occurs at this point + if(self.FLAG_QUANT): + IAres = self.hardware.sramInfo.IAres + OAres = self.hardware.sramInfo.OAres + NB = self.hardware.sramInfo.NB.data + PAmax = (2**IAres-1)*NB + DRval = self.hardware.sramInfo.DR.data; + VDD = self.hardware.sramInfo.VDD.data; + #DO = V_DP; + DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType); + # Return quantized output + return DO + # ABN: propagate the analog value + else: + return V_DP + + + def get_config(self): + config = {'H': self.H, + 'kernel_lr_multiplier': self.kernel_lr_multiplier, + 'bias_lr_multiplier': self.bias_lr_multiplier} + base_config = super(QuantizedDense, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class QuantizedConv2D(Conv2D): + '''Quantized Convolution2D layer + References: + "QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] + ''' + def __init__(self, filters, m_T_DP=1, nRep=0, kernel_regularizer=None,activity_regularizer=None, kernel_lr_multiplier='Glorot', + bias_lr_multiplier=None, H=1., nb=16, padding_num=0, sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, **kwargs): + super(QuantizedConv2D, self).__init__(filters, **kwargs) + self.H = H + self.nb = nb + self.padding_num = padding_num + self.m_T_DP_init = m_T_DP + self.nRep = 2**nRep; + self.kernel_lr_multiplier = kernel_lr_multiplier + self.bias_lr_multiplier = bias_lr_multiplier + self.activity_regularizer =activity_regularizer + self.kernel_regularizer = kernel_regularizer + + self.sramInfo = sramInfo + self.hardware = None + self.Vt_noise = None + + self.EN_NOISE = EN_NOISE + self.FLAG_QUANT = FLAG_QUANT + self.FLAG_PL = FLAG_PL + self.IS_TRAINABLE_DP = IS_TRAINABLE_DP + + self.DTSE_LUT = None + + def build(self, input_shape): + if self.data_format == 'channels_first': + channel_axis = 1 + else: + channel_axis = -1 + if input_shape[channel_axis] is None: + raise ValueError('The channel dimension of the inputs ' + 'should be defined. Found `None`.') + + # Replicate inputs along channel axis + # input_dim = self.nRep*input_shape[channel_axis] + input_dim = input_shape[channel_axis]; + kernel_shape = self.kernel_size + (input_dim, self.filters*self.nb) + + base = self.kernel_size[0] * self.kernel_size[1] + if self.H == 'Glorot': + nb_input = int(input_dim * base) + nb_output = int(self.filters * base) + self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output))) + #print('Glorot H: {}'.format(self.H)) + + if self.kernel_lr_multiplier == 'Glorot': + nb_input = int(input_dim * base) + nb_output = int(self.filters * base) + self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output))) + #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier)) + + self.kernel_constraint = Clip(-self.H, self.H) + self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) + #self.bias_initializer = initializers.RandomUniform(-self.H, self.H) + self.kernel = self.add_weight(shape=kernel_shape, + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) + + if self.use_bias: + self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] + self.bias = self.add_weight((self.filters*self.nb,), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + + else: + self.lr_multipliers = [self.kernel_lr_multiplier] + self.bias = None + + # Train DP timing config or not + if self.IS_TRAINABLE_DP: + self.m_T_DP = self.add_weight(shape=(1,), + initializer = initializers.get(tf.keras.initializers.Constant(value=self.m_T_DP_init)), + regularizer = None, + constraint = None); + else: + self.m_T_DP = 1.; + + # Get DP electrical quantities for this layer + self.sramInfo.NB.data = base*input_dim; + # Generate hardware parameters + hardware = genHardware(self.sramInfo) + self.hardware = hardware + # Create V_th distribution + if(self.sramInfo.noise_at_inf): + if(self.sramInfo.IS_NUM): + self.Vt_noise = K.random_normal(shape=(K.int_shape(self.kernel)[-1],),mean=0.,stddev=1.); + else: + sig_Vth = self.hardware.sig_Vth + self.Vt_noise = K.random_normal(shape=K.int_shape(self.kernel),mean=0,stddev=sig_Vth) + # self.Vt_noise = K.random_normal(shape=(self.units,),mean=0,stddev=0) + else: + if(self.sramInfo.IS_NUM): + self.Vt_noise = K.zeros_like(K.int_shape(self.kernel)[-1]); + else: + self.Vt_noise = K.zeros_like(K.int_shape(self.kernel)); + + # Recover DTSE best-fitting coefficients + self.DTSE_LUT = self.sramInfo.DTSE_LUT; + + # Set input spec. + self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) + self.built = True + + def call(self, inputs): + binary_kernel = binarize(self.kernel, H=self.H); + if self.data_format == 'channels_first': + binary_kernel = tf.tile(binary_kernel,(self.nRep,1,1,1)); + else: + binary_kernel = tf.tile(binary_kernel,(1,1,self.nRep,1)); + + # Retrieve architecture type (diff or se) and derive flag + archType = self.hardware.sramInfo.arch.name; + FLAG_SE_DP = (archType == '8T') or self.FLAG_QUANT; + + # Apply T_DP transform + # T_DP_conf = K.clip(round_through(self.m_T_DP),1,8)/self.m_T_DP_init; + T_DP_conf = K.clip(round_through(self.m_T_DP),0,8-1); + + # Replicate inputs along channel axis + if self.data_format == 'channels_first': + inputs = tf.tile(inputs,[1,self.nRep,1,1]); + else: + inputs = tf.tile(inputs,[1,1,1,self.nRep]); + + # Wrap correct CONV_op function + if(archType == '6T'): + CONV_op = CONV_op_diff; + elif(archType == '8T'): + CONV_op = CONV_op_se; + else: + raise NameError('Error: selected architecture (cell type) not supported during 2DCONV layer compute !\n'); + + inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier + inputs_qnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\ + * inverse_kernel_lr_multiplier + + # Compute analog-like column-wise CIM-SRAM equivalent conv2d + outputs_qnn_gradient = CONV_op( + self.hardware, + inputs_qnn_gradient, + binary_kernel, + self.Vt_noise, + T_DP_conf, + self.data_format, + self.padding_num, + self.EN_NOISE, + self.FLAG_QUANT) + + if(self.FLAG_QUANT): + V_DP_bin = (outputs_qnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient))\ + * self.kernel_lr_multiplier + else: + V_BL_bin = (outputs_qnn_gradient[0] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient[0]))\ + * self.kernel_lr_multiplier + V_BLB_bin = (outputs_qnn_gradient[1] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient[1]))\ + * self.kernel_lr_multiplier + + # Apply DTSE conversion, when relevant + if(FLAG_SE_DP): + V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); + V_DP = MBIT_weight(V_DP_bin,self.nb); + else: + V_BL_bin = K.reshape(V_BL_bin,(-1,self.units,self.nb)); + V_BLB_bin = K.reshape(V_BLB_bin,(-1,self.units,self.nb)); + # Post-layout model + if(FLAG_PL): + # Retrieve actual DTSE params + C_int_dtse = self.hardware.sramInfo.C_int_dtse.data; + C_L_dtse = self.hardware.sramInfo.C_L_dtse.data; + VDD_DTSE = self.hardware.sramInfo.VDD_DTSE; + # Apply DTSE + V_DP = DTSE_PL(V_BL_bin,V_BLB_bin,C_int_dtse,C_L_dtse,self.DTSE_LUT,VDD_DTSE,self.nb); + # Ideal model + else: + V_DP = DTSE_ideal(V_BL_bin,V_BLB_bin,VDD_DTSE,self.nb); + + # DBN: apply ADC quantization + if(self.FLAG_QUANT): + IAres = self.hardware.sramInfo.IAres + OAres = self.hardware.sramInfo.OAres + NB = self.hardware.sramInfo.NB.data + PAmax = (2**IAres-1)*NB + DRval = self.hardware.sramInfo.DR.data; + VDD = self.hardware.sramInfo.VDD.data; + # Return digitized output + DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType); + return DO + # ABN: propagate analog value + else: + return V_DP + + + def get_config(self): + config = {'H': self.H, + 'kernel_lr_multiplier': self.kernel_lr_multiplier, + 'bias_lr_multiplier': self.bias_lr_multiplier} + base_config = super(QuantizedConv2D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +# Aliases + +QuantizedConvolution2D = QuantizedConv2D diff --git a/layers/quantized_ops.py b/layers/quantized_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..980d741c8248572de84c4fd75ed985b44cc34336 --- /dev/null +++ b/layers/quantized_ops.py @@ -0,0 +1,247 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import numpy as np + + +def round_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + rounded = K.round(x) + rounded_through = x + K.stop_gradient(rounded - x) + return rounded_through + + +def clip_through(x, min_val, max_val): + '''Element-wise clipping with gradient propagation + Analogue to round_through + ''' + clipped = K.clip(x, min_val, max_val) + clipped_through= x + K.stop_gradient(clipped-x) + return clipped_through + + +def clip_through(x, min, max): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + clipped = K.clip(x,min,max) + return x + K.stop_gradient(clipped - x) + + +def _hard_sigmoid(x): + '''Hard sigmoid different from the more conventional form (see definition of K.hard_sigmoid). + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + return K.clip((x+1)/2, 0, 1) + + + + +def quantize_old(W, nb = 16, clip_through=False): + + '''The weights' binarization function, + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + + non_sign_bits = nb-1 + m = pow(2,non_sign_bits) + #W = tf.Print(W,[W],summarize=20) + if clip_through: + Wq = clip_through(round_through(W*m),-m,m-1)/m + else: + Wq = K.clip(round_through(W*m),-m,m-1)/m + #Wq = tf.Print(Wq,[Wq],summarize=20) + return Wq + + +def quantized_relu(W, nb=16): + + '''The weights' binarization function, + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + #non_sign_bits = nb-1 + #m = pow(2,non_sign_bits) + #Wq = K.clip(round_through(W*m),0,m-1)/m + + nb_bits = nb + Wq = K.clip(2. * (round_through(_hard_sigmoid(W) * pow(2, nb_bits)) / pow(2, nb_bits)) - 1., 0, + 1 - 1.0 / pow(2, nb_bits - 1)) + return Wq + + +def quantized_tanh(W, nb=16): + + '''The weights' binarization function, + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + non_sign_bits = nb-1 + m = pow(2,non_sign_bits) + #W = tf.Print(W,[W],summarize=20) + Wq = K.clip(round_through(W*m),-m,m-1)/m + #Wq = tf.Print(Wq,[Wq],summarize=20) + return Wq + +def quantized_leakyrelu(W, nb=16, alpha=0.1): + + '''The weights' binarization function, + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + if alpha != 0.: + negative_part = tf.nn.relu(-W) + W = tf.nn.relu(W) + if alpha != 0.: + alpha = tf.cast(tf.convert_to_tensor(alpha), W.dtype.base_dtype) + W -= alpha * negative_part + + non_sign_bits = nb-1 + m = pow(2,non_sign_bits) + #W = tf.Print(W,[W],summarize=20) + Wq = clip_through(round_through(W*m),-m,m-1)/m + #Wq = tf.Print(Wq,[Wq],summarize=20) + + return Wq + +def quantized_maxrelu(W, nb=16): + + '''The weights' binarization function, + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + non_sign_bits = nb-1 + max_ = tf.reduce_max((W)) + #max_ = tf.Print(max_,[max_]) + max__ = tf.pow(2.0,tf.ceil(tf.log(max_)/tf.log(tf.cast(tf.convert_to_tensor(2.0), W.dtype.base_dtype)))) + #max__ = tf.Print(max__,[max__]) + m = pow(2,non_sign_bits) + #W = tf.Print(W,[W],summarize=20) + Wq = max__*clip_through(round_through(W/max__*(m)),0,m-1)/(m) + #Wq = tf.Print(Wq,[Wq],summarize=20) + + return Wq + +def quantized_leakymaxrelu(W, nb=16, alpha=0.1): + + '''The weights' binarization function, + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + if alpha != 0.: + negative_part = tf.nn.relu(-W) + W = tf.nn.relu(W) + if alpha != 0.: + alpha = tf.cast(tf.convert_to_tensor(alpha), W.dtype.base_dtype) + W -= alpha * negative_part + + max_ = tf.reduce_max((W)) + #max_ = tf.Print(max_,[max_]) + max__ = tf.pow(2.0,tf.ceil(tf.log(max_)/tf.log(tf.cast(tf.convert_to_tensor(2.0), W.dtype.base_dtype)))) + #max__ = tf.Print(max__,[max__]) + + non_sign_bits = nb-1 + m = pow(2,non_sign_bits) + #W = tf.Print(W,[W],summarize=20) + Wq = max__* clip_through(round_through(W/max__*m),-m,m-1)/m + #Wq = tf.Print(Wq,[Wq],summarize=20) + + return Wq + + + +def _mean_abs(x, axis=None, keepdims=False): + return K.stop_gradient(K.mean(K.abs(x), axis=axis, keepdims=keepdims)) + + +def xnorize(W, H=1., axis=None, keepdims=False): + Wb = quantize(W, H) + Wa = _mean_abs(W, axis, keepdims) + + return Wa, Wb + +######################################################################################################## + +def quantize(W, nb = 16, clip_through=False): + + '''The weights' binarization function, + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + + m = pow(2,nb) + #W = tf.Print(W,[W],summarize=20) + if clip_through: + Wq = (2*round_through((m-1)*clip_through(0.5*W+0.5,0,1))-(nb/2))/(nb/2) + else: + Wq = (2/(m-1)*round_through((m-1)*K.clip(0.5*W+0.5,0,1))-1)/(m/2) + # Wq = K.clip(round_through(W*m),-m,m-1)/m +# Wq = tf.Print(Wq,[Wq],summarize=20) + return Wq + +def quantized_sigmoid(x, nb=16): + + '''The weights' binarization function, + + # Reference: + - [QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} + + ''' + non_sign_bits = nb-1 + m = pow(2,non_sign_bits) + #W = tf.Print(W,[W],summarize=20) + xq = K.clip(round_through(m*x),-m,m-1)/m + #Wq = tf.Print(Wq,[Wq],summarize=20) + return xq + +def floor_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + floored = tf.floor(x) + floored_through = x + K.stop_gradient(floored - x) + return floored_through + +def quantADC(x,maxMAC=16,OAres=1): + # We consider differential operators + if(pow(2,OAres)>=2*maxMAC+1): + x_q = x+maxMAC; + else: + x_q = floor_through((x+maxMAC)/(2*maxMAC+1)*pow(2,OAres)) + # Prevent overflow + K.switch(K.equal(x_q,2^OAres),(2^OAres-1)*K.ones_like(x_q),x_q) + return x_q + +# Numpy input quantization +def quant_input(x,IAres): + # Quantize between 0 and 2^IAres + m = pow(2,IAres)-1; + y = m*(x+1)/2; + return np.around(y,decimals=0); + +def my_quantized_relu(x,IAres): + # Quantize between 0 and 2^IAres-1 + y = K.clip(round_through(_hard_sigmoid(x) * pow(2, IAres)),0,pow(2,IAres)-1) + return y + diff --git a/models/ABN_current.py b/models/ABN_current.py new file mode 100644 index 0000000000000000000000000000000000000000..ccf85dfa7dc028b546704fb0f198db3f8e18840a --- /dev/null +++ b/models/ABN_current.py @@ -0,0 +1,344 @@ +########################################################## +######### HARDWARE MODEL FOR CURRENT-BASED ABN ########### +########################################################## +import sys +import numpy as np +import keras.backend as K +import tensorflow as tf +import tensorflow_probability as tfp + +# /// Hardcoded fitting - external beta/gamma /// +def ABN_current_model_hard_ext(hardware,V_DP,gamma_eq=1.,beta_eq=0.): + # // Retrieve transistor characteristics // + Vt = hardware.Vt_abn; + mu = hardware.mu; + e_ox = hardware.e_ox; t_ox = hardware.t_ox; + C_ox = e_ox/t_ox; beta_0 = mu*C_ox; + Vea = hardware.Vea; eta = hardware.eta; + n = hardware.n_body; Ut = hardware.Ut; + # Retrieve transistor dimensions + W = hardware.sramInfo.W_ABN.data; + L = hardware.sramInfo.L_ABN.data; + # Retrieve load and parasitic caps + C_ABN = hardware.sramInfo.C_ABN.data; + C_paras = 0; + C_tot = C_ABN + C_paras; + # Retrieve equivalent ABN duration + T_ABN = hardware.sramInfo.T_ABN.data; + T_slew_eq = 18e-12; + T_ABN = T_ABN + T_slew_eq; + # Sub-threshold current + I_S0 = 0.18e-6*W/L; + # // Empirical correction factors // + a1 = 0.9; a2 = 5; a3 = 0.65; + b1 = 0; + dV_mid = 0.08; + # // Charge-injection equations // + dV_rise = 2.097e-3+(2.098-2.037)/60.14*V_DP; + dV_fall = -0.2e-3+(-0.2+0.319)/60.14*V_DP; + Vini = hardware.sramInfo.VDD.data + dV_rise; + # // Integrated drain current equations // + Vov = V_DP - Vt; + # Weak inversion + # I_weak = Ut*K.log(1+(K.exp(Vini/Ut)-1)*K.exp(-(I_S0*T_ABN)/(Ut*C_tot)*K.exp(Vov/(n*Ut)))); + V_weak = Vini-(I_S0*T_ABN)/(Ut*C_tot)*K.exp(Vov/(n*Ut)); + # Moderate inversion + # I_mod = Ut*K.log(1+(K.exp(Vini/Ut)-1)*K.exp(-a3*(I_S0*T_ABN)/(Ut*C_tot)*K.exp(Vov/(n*Ut)))); + V_mod = Vini-(a3*I_S0*T_ABN)/(Ut*C_tot)*K.exp(Vov/(n*Ut)); + # Strong inversion + I_sat = a1*beta_0/2*W/L*Vov*Vov/(1+a2*Vov) + T_sat = (Vini-Vov)*C_tot/I_sat; + V_sat = Vini - T_ABN/C_tot*I_sat; + V_tri = Vov*K.exp(-a1*beta_0*W/L*K.abs(T_ABN-T_sat)/C_tot*K.pow(Vov,1.8)); + # // Choose result depending on the input and output // + V_strong = K.switch(K.greater_equal(V_sat,Vov),V_sat,V_tri); + V_isMid = K.switch(K.greater_equal(Vov,dV_mid),V_strong,V_mod); + V_out = K.switch(K.greater_equal(Vov,K.zeros_like(Vov)),V_isMid,V_weak); + # V_out = K.switch(K.greater_equal(V_sat,Vini-Vov),V_sat,V_sat); + # Return ABN output + V_ABN = V_out + dV_fall; + # Apply scaling and bias linearly + V_ABN = gamma_eq*V_ABN+beta_eq; + return V_ABN + +# /// Hardcoded fitting, gain only current /// +def ABN_current_model_hard(hardware,V_DP,gamma_eq=1.,beta_eq=0.): + # // Retrieve transistor characteristics // + Vt = hardware.Vt_abn; + mu = hardware.mu; + e_ox = hardware.e_ox; t_ox = hardware.t_ox; + C_ox = e_ox/t_ox; beta_0 = mu*C_ox; + Vea = hardware.Vea; eta = hardware.eta; + n = hardware.n_body; Ut = hardware.Ut; + VDD = hardware.sramInfo.VDD.data; + # Retrieve transistor dimensions + W = hardware.sramInfo.W_ABN.data; + L = hardware.sramInfo.L_ABN.data; + # Retrieve load and parasitic caps + C_ABN = hardware.sramInfo.C_ABN.data; + C_paras = 0; + C_tot = C_ABN + C_paras; + # Retrieve equivalent ABN duration + T_ABN = hardware.sramInfo.T_ABN.data; + T_slew_eq = 18e-12; + T_ABN = T_ABN + T_slew_eq; + # Sub-threshold current + I_S0 = 0.18e-6*W/L; + # // Empirical correction factors // + a1 = 0.9; a2 = 5; a3 = 0.65; + b1 = 0; + dV_mid = 0.08; + # // Normalize beta_eq // + # beta_eq = beta_eq/(gamma_eq*T_ABN/C_tot); + # // Charge-injection equations // + dV_rise = 2.097e-3+(2.098-2.037)/60.14*V_DP; + dV_fall = -0.2e-3+(-0.2+0.319)/60.14*V_DP; + Vini = hardware.sramInfo.VDD.data + dV_rise; + # // Integrated drain current equations // + Vov = V_DP - Vt + # Weak inversion + # I_weak = Ut*K.log(1+(K.exp(Vini/Ut)-1)*K.exp(-(I_S0*T_ABN)/(Ut*C_tot)*K.exp(Vov/(n*Ut)))); + V_weak = I_S0*K.exp(Vov/(n*Ut))*gamma_eq*T_ABN/C_tot+beta_eq; + # Moderate inversion + # I_mod = Ut*K.log(1+(K.exp(Vini/Ut)-1)*K.exp(-a3*(I_S0*T_ABN)/(Ut*C_tot)*K.exp(Vov/(n*Ut)))); + V_mod = I_S0*K.exp(a3*Vov/(n*Ut))*gamma_eq*T_ABN/C_tot+beta_eq; + # Strong inversion + I_sat = a1*beta_0/2*W/L*Vov*Vov/(1+a2*Vov) + T_sat = (Vini-Vov-beta_eq)*C_tot/(gamma_eq*I_sat); + V_sat = gamma_eq*T_ABN/C_tot*I_sat+beta_eq; + I_tri = a1*beta_0*W/L*K.pow(Vov,1.8); + V_tri = (Vini-Vov)*K.exp(-I_tri*gamma_eq*(T_ABN-T_sat)/C_tot+beta_eq); + # // Choose result depending on the input and output // + V_strong = K.switch(K.less_equal(V_sat,Vini-Vov),V_sat,V_sat); + V_isMid = K.switch(K.greater_equal(Vov,dV_mid),V_strong,V_mod); + V_out = K.switch(K.greater_equal(Vov,K.zeros_like(Vov)),V_isMid,V_weak); + # V_out = K.switch(K.greater_equal(V_sat,Vini-Vov),V_sat,V_sat); + # Return ABN output + V_ABN = V_out + dV_fall; + + return V_ABN; + +# /// Hardcoded fitting, quantized gain and offset /// +def ABN_current_model_hard_quant(hardware,V_DP,gamma_eq=K.constant(1.),beta_eq=K.constant(0.),gamma_range=None,ABNstates=None): + # // Retrieve transistor characteristics // + Vt = hardware.Vt_abn; + mu = hardware.mu; + e_ox = hardware.e_ox; t_ox = hardware.t_ox; + C_ox = e_ox/t_ox; beta_0 = mu*C_ox; + Vea = hardware.Vea; eta = hardware.eta; + n = hardware.n_body; Ut = hardware.Ut; + VDD = hardware.sramInfo.VDD.data; + # Retrieve transistor dimensions + W = hardware.sramInfo.W_ABN.data; + L = hardware.sramInfo.L_ABN.data; + # Retrieve load and parasitic caps + C_ABN = hardware.sramInfo.C_ABN.data; + C_paras = 0; + C_tot = C_ABN + C_paras; + # Retrieve equivalent ABN duration + T_ABN = hardware.sramInfo.T_ABN.data; + T_slew_eq = 18e-12; + T_ABN = T_ABN + T_slew_eq; + # Sub-threshold current + I_S0 = 0.2e-6*W/L; + # ABN resolution + Ns_gamma = ABNstates[0]; + Ns_beta = ABNstates[1]; + # Set V_beta range + Vmax_beta = VDD/4; # 100mV, before DTSE + + # // Empirical correction factors // + a1 = 0.9; a2 = 5; a3 = 0.65; + b1 = 0; + dV_mid = 0.08; + # // Charge-injection equations // + dV_rise = 2.097e-3+(2.098-2.037)/60.14*V_DP; + dV_fall = -0.2e-3+(-0.2+0.319)/60.14*V_DP; + Vini = VDD + dV_rise; + # // Quantize current gain and offset // + # Offset + dVt = beta_eq/gamma_eq; + dVt_abs = K.abs(dVt); Vlsb = Vmax_beta/Ns_beta; + dVt_abs = K.clip(floor_through(dVt_abs/Vlsb)*Vlsb,0,Vmax_beta); + dVt_abs = 1/2*dVt_abs; # V_beta compressed during DTSE + dVt = K.switch(K.greater_equal(dVt,K.zeros_like(dVt)),dVt_abs,-dVt_abs); + # Gain + gamma_eq = K.clip(round_through(gamma_eq),0,Ns_gamma-1); + + # // Integrated drain current equations // + Vov = V_DP - Vt; + alpha_I = a1*beta_0/2*W/L; + tau_ABN = gamma_eq*T_ABN/C_tot; # quantized value now + # Compute effective Vov + Vov_eff = Vov+dVt; + # Weak inversion + # I_weak = I_S0*K.exp(Vov_eff/(n*Ut))*(1-K.exp(-VDD/Ut)); + exp_Vov_weak = K.exp((Vov_eff-0.012)/(n*Ut)); + C_ini = (K.exp(Vini/Ut)-1); + t_weak_eq = I_S0/Ut*tau_ABN; + V_weak = Ut*K.log(1+C_ini*K.exp(-t_weak_eq*exp_Vov_weak)); + # Moderate inversion + isMod = K.greater_equal(Vov_eff,K.zeros_like(Vov_eff)); + # I_mod = I_S0*K.exp(a3*Vov_eff/(n*Ut))*(1-K.exp(-VDD/Ut)); + V_mod = Ut*K.log(1+C_ini*K.exp(-t_weak_eq*K.pow(exp_Vov_weak,a3))); + # Strong inversion (saturation) + isSat = K.greater_equal(Vov_eff,dV_mid*K.ones_like(Vov_eff)); + I_sat = alpha_I*(Vov_eff*Vov_eff/(1+a2*K.abs(Vov_eff))); + V_sat = Vini - I_sat*tau_ABN; + # Get ABN voltage corresponding to the operating region + V_ABN = K.switch(isSat,V_sat,K.switch(isMod,V_mod,V_weak)); + V_ABN = V_ABN + dV_fall; + + return V_ABN; + +# /// Idealized, linear version of the current /// +def ABN_current_model_simple(hardware,V_DP,gamma_eq=K.constant(1.),beta_eq=K.constant(0.),gamma_range=None,ABNstates=None): + # // Retrieve transistor characteristics // + Vt = hardware.Vt_abn; + mu = hardware.mu; + e_ox = hardware.e_ox; t_ox = hardware.t_ox; + C_ox = e_ox/t_ox; beta_0 = mu*C_ox; + Vea = hardware.Vea; eta = hardware.eta; + n = hardware.n_body; Ut = hardware.Ut; + VDD = hardware.sramInfo.VDD.data; + # Retrieve transistor dimensions + W = hardware.sramInfo.W_ABN.data; + L = hardware.sramInfo.L_ABN.data; + # Retrieve load and parasitic caps + C_ABN = hardware.sramInfo.C_ABN.data; + C_paras = 0; + C_tot = C_ABN + C_paras; + # Retrieve equivalent ABN duration + T_ABN = hardware.sramInfo.T_ABN.data; + T_slew_eq = 18e-12; + T_ABN = T_ABN + T_slew_eq; + # Sub-threshold current + I_S0 = 0.2e-6*W/L; + # ABN resolution + Ns_gamma = ABNstates[0]; + Ns_beta = ABNstates[1]; + # // Empirical correction factors // + a1 = 0.9; a2 = 5; a3 = 0.65; + b1 = 0; + # // Normalize beta_eq // + # beta_eq = beta_eq/(gamma_eq*T_ABN/C_tot); + # // Charge-injection equations // + Vini = hardware.sramInfo.VDD.data; + # // Quantize current gain and offset // + gamma_eq = K.clip(round_through(gamma_eq),0,Ns_gamma-1); + beta_eq = beta_eq; + # // Integrated drain current equations // + Vov = V_DP - Vt; + alpha_I = a1*beta_0/2*W/L; + tau_ABN = gamma_eq*T_ABN/C_tot; # quantized value now + # Compute Vt shift + dVt = beta_eq/tau_ABN/alpha_I; + # Ideal linear saturation current response + I_sat = alpha_I*(Vov+dVt)*(Vov+dVt); + V_sat = Vini-I_sat*tau_ABN; + V_ABN = K.switch(K.greater_equal(Vov+dVt,K.zeros_like(Vov)),V_sat,K.zeros_like(Vov)); + + + # Return ABN output + return V_ABN; + +# /// Simple Vt shift to get the actual input mean /// +def ABN_Vt_shift(hardware,V_DP): + # // Retrieve transistor characteristics // + Vt = hardware.mu_Vt_abn; + # // Compute Vt shift // + V_out = V_DP-Vt-0.03; + return V_out; + +# /// 2D-interpolated ABN /// +def ABN_current_interp(hardware,ABN_lookup,V_DP,gamma_eq=K.constant(1.),beta_eq=K.constant(0.),gamma_range=None,ABNstates=None): + # // Retrieve local variables // + # Resolution + Ns_gamma = ABNstates[0]; + Ns_beta = ABNstates[1]; + # Beta range + VDD = hardware.sramInfo.VDD.data; + Vmax_beta = VDD; + # // Quantize current gain and offset // + # Offset + dVt = beta_eq/gamma_eq; + dVt_abs = K.abs(dVt); Vlsb = Vmax_beta/Ns_beta; + dVt_abs = K.clip(floor_through(dVt_abs/Vlsb)*Vlsb,0,Vmax_beta); + dVt = K.switch(K.greater_equal(dVt,K.zeros_like(dVt)),dVt_abs,-dVt_abs); + # Gain + gamma_eq = K.clip(round_through(gamma_eq),1,Ns_gamma); + # // Apply offset during DTSE (i.e. prior to ABN unit) // + V_DP = V_DP + dVt; + # // 2D-interpolation to apply ABN // + x_int = tf.stack([gamma_eq*K.ones_like(V_DP),V_DP],axis=2); + V_ABN = tfp.math.batch_interp_regular_nd_grid( + x_int,x_ref_min=[1,0],x_ref_max=[Ns_gamma,VDD],y_ref=ABN_lookup,axis=-2); + + return V_ABN; + +# /// Make 2D-lookup of coefficients for bilinear interpolation /// +# /// Bilinear equation: f(x,y) = a0 + a1*x +a2*y+a3*x*y +def makeLookupABN(ABN_lookup,x1_max,N1,x2_max,N2): + # Deduce input vectors + x1_vec = np.linspace(1,x1_max,N1); + x2_vec = np.linspace(0,x2_max,N2); + # Fill-in lookup for model coefficients + linearLookup = np.zeros((N1-1,N2-1,4)); + for i in range(N1-1): + for j in range(N2-1): + # Compute common den + den = (x1_vec[i]-x1_vec[i+1])*(x2_vec[j]-x2_vec[j+1]); + # Compute coefficients + a0 = ABN_lookup[i,j]*x1_vec[i+1]*x2_vec[j+1]-ABN_lookup[i,j+1]*x1_vec[i+1]*x2_vec[j] \ + -ABN_lookup[i+1,j]*x1_vec[i]*x2_vec[j+1]+ABN_lookup[i+1,j+1]*x1_vec[i]*x2_vec[j]; + a0 = a0/den; + a1 = -ABN_lookup[i,j]*x2_vec[j+1]+ABN_lookup[i,j+1]*x2_vec[j] \ + +ABN_lookup[i+1,j]*x2_vec[j+1]-ABN_lookup[i+1,j+1]*x2_vec[j]; + a1 = a1/den; + a2 = -ABN_lookup[i,j]*x1_vec[i+1]+ABN_lookup[i,j+1]*x1_vec[i+1] \ + +ABN_lookup[i+1,j]*x1_vec[i]-ABN_lookup[i+1,j+1]*x1_vec[i]; + a2 = a2/den; + a3 = ABN_lookup[i,j]-ABN_lookup[i,j+1]-ABN_lookup[i+1,j]+ABN_lookup[i+1,j+1]; + a3 = a3/den; + # Fill lookup + linearLookup[i,j,::] = np.array([a0,a1,a2,a3]); + # Make numpy array into constant tensor + linearLookup = linearLookup.astype("float32"); + linearLookup = tf.constant(linearLookup); + # Return table + return linearLookup; +# /// Apply custom 2D interpolation /// +def doInterpABN(ABN_lookup,x1,x2,x1_max,N1,x2_max,N2): + # Possibly reshape x2 if CONV layer + x2_shape = tf.shape(x2); + Vlsb = x2_max/(N2-1); + x2 = floor_through(tf.reshape(x2/Vlsb,(-1,x2_shape[-1])))*Vlsb; + # Get indices + ind_x1 = K.clip(tf.math.floor(x1/x1_max*N1),0,(N1-1)-1); ind_x1 = K.cast(ind_x1,"int32"); + ind_x2 = K.clip(tf.math.floor(x2/x2_max*(N2-1)),0,(N2-1)-1); ind_x2 = K.cast(ind_x2,"int32"); + # Get corresponding coefficients + coef_vec = tf.gather_nd(ABN_lookup,tf.stack([ind_x1*K.ones_like(ind_x2),ind_x2],axis=2)); + # Perform interpolation + V_ABN = coef_vec[...,0]+coef_vec[...,1]*x1+coef_vec[...,2]*x2+coef_vec[...,3]*x1*x2; + + # Possibly reshape V_ABN to CONV format + V_ABN = tf.reshape(V_ABN,x2_shape); + # Return interpolated result + return V_ABN + +## ///////////////////// Internal functions ////////////////////// +def floor_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + floored = tf.math.floor(x); + floored_through = x + K.stop_gradient(floored - x); + return floored_through; + +def round_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + rounded = K.round(x); + rounded_through = x + K.stop_gradient(rounded - x); + return rounded_through; diff --git a/models/ADC.py b/models/ADC.py new file mode 100644 index 0000000000000000000000000000000000000000..79a27269e15cd6e4101bb87b43105c96d5e56cfd --- /dev/null +++ b/models/ADC.py @@ -0,0 +1,128 @@ +############################################# +###### This file models ADC operations ###### +############################################# +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import tensorflow.math as math +import numpy as np + +from keras.layers import Layer + +def log2(x): + num = math.log(float(x)) + den = math.log(tf.constant(2,dtype=num.dtype)) + return (num/den) + +def floor_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + floored = math.floor(x); + floored_through = x + K.stop_gradient(floored - x); + return floored_through; + +def ceil_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + ceiled = math.ceil(x); + ceiled_through = x + K.stop_gradient(ceiled - x); + return ceiled_through; + +def clip_ssb(x,H=1,b=1.0): + ''' S. Darabi 2020, SSb activation ''' + x = x/H; + sigmo = K.sigmoid(b*(x-H/2)) + x_ssb = H*(sigmo*(1+b*(x-H/2)*(1-sigmo))); + return x_ssb; + +# Uniform differential ADC quantization with specified offset +def quant_uni(x,maxVal,dynRange,VDD,OAres,offset,archType): + # Compute ADC thresholds (dynRange is single-ended val) + if(archType == "6T"): + digOut = K.clip(floor_through((x+dynRange+offset)/dynRange*pow(2,OAres-1)),0,pow(2,OAres)-1); + elif(archType == "8T"): + digOut = K.clip(floor_through((VDD-(x-offset))/dynRange*pow(2,OAres)),0,pow(2,OAres)-1); + else: + error("Cell type not supported during uniform-ADC conversion !"); + return digOut; + +# Calibrated ADC quantization, corresponding to the specified OAres and hardware parameters +def quant_cal(x,code_vec): + digOut = K.sum(K.greater_equal(x,code_vec),axis=-1); + return digOut; + +# Class object of a trainable ADC, with weights initialized as uniform quantization +class Quant_train(Layer): + # Init method + def __init__(self, sramInfo,ABN_INC_ADC, **kwargs): + # Output precision + OAres = sramInfo.IAres; + self.Nstates = pow(2,OAres); + # Hardware config + self.GND = sramInfo.GND.data; + self.VDD = sramInfo.VDD_ADC.data; + self.DR = sramInfo.DR.data; + self.arch = sramInfo.arch.name; + # ABN including ADC flag + self.ABN_INC_ADC = ABN_INC_ADC; + + super(Quant_train,self).__init__(**kwargs); + # Build method + def build(self,input_shape): + # Initializer + VDD = self.VDD; + Nstates = self.Nstates; + + # def adc_init(shape,dtype=None): + # return (-VDD + 2*K.arange(1,Nstates,dtype=dtype)/Nstates*VDD); + + # self.kernel = self.add_weight(shape = (Nstates-1,), + # name='kernel', + # initializer = 'zeros' + # ); + super(Quant_train,self).build(input_shape); + # Call method + def call(self,inputs): + GND = self.GND + VDD = self.VDD; # 2.4V + DR = self.DR; + Nstates = self.Nstates; + # Different conversions (ADCs) depending on the architecture + if(self.arch == '6T'): + # Compute differential value + V_in = inputs; + # V_in = inputs + (VDD/6)*K.random_normal(shape=tf.shape(inputs),mean=0.,stddev=1.,dtype='float32'); + # Quantize the output result + if(self.ABN_INC_ADC): + DO = K.clip(floor_through(V_in/(VDD)*Nstates),0,Nstates-1); + else: + DO = K.clip(floor_through(V_in/(VDD/2)*Nstates),0,Nstates-1); + # tf.print("V_ABN at ADC input",V_in[0]); + # tf.print("D_out",DO[0],summarize=32); + elif(self.arch == '8T'): + # Retrieve & clip single-ended result + # V_DP = K.clip(inputs,GND,VDD); + V_DP = inputs; + # tf.print("V_RBL_BN",V_DP[0],summarize=5); + # Define ADC thresholds, from baseline uniform + quant_lvl = K.arange(1,Nstates,dtype=K.dtype(V_DP))/Nstates*VDD; + # Compute the digital output iteratively + DO = K.zeros_like(V_DP) + for i in range(self.Nstates-1): + DO = DO + ceil_through(K.clip(V_DP-quant_lvl[i],-0.9,0.9)); + # Invert output code as V_DP(DP) is a decreasing function ! + DO = (Nstates-1)*K.ones_like(DO) - DO; + else: + raise NameError('Error: selected architecture (cell type) not supported during ADC quantization !\n'); + # Return the digitized output + return DO; + + # Other methods below + + def get_config(self): + config = {'Nstates':self.Nstates,'VDD':self.VDD}; + base_config = super(Quant_train,self).get_config(); + return dict(list(base_config.items()) + list(config.items())) + \ No newline at end of file diff --git a/models/Analog_DP.py b/models/Analog_DP.py new file mode 100644 index 0000000000000000000000000000000000000000..b860aec2eed461de0901af7008a956439683534b --- /dev/null +++ b/models/Analog_DP.py @@ -0,0 +1,226 @@ +############################################## +###### This file models different implementations ###### +###### of the analog DP in-memory computing operator ###### +############################################## +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import numpy as np +import math +from utils.linInterp import doInterpDP_2D + +########################### NUMERICAL, INTERPOLATION-BASED MODELS ############################### +def int_BL_num(hardware,IA,W_array,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE): + # Local variables + Nrows = hardware.sramInfo.Nrows.data; + IAres = hardware.sramInfo.IAres; + Ndp = (2**IAres-1)*Nrows; + T_DP_vec = hardware.sramInfo.T_DP_vec; + # Compute ideal BL/BLB results to get indexes + BL_ind = K.dot(IA,W_array); + y_shape = tf.shape(BL_ind); + # tf.print("BL index",BL_ind[0][0:8]); + # Perform 3D interpolation (--- 2D until we get 3D tables ---) + V_BL_nom_0 = doInterpDP_2D(BL_LUT[...,0],T_DP_conf,BL_ind[...,0::4],T_DP_vec,Ndp,Nrows//8); + V_BL_nom_1 = doInterpDP_2D(BL_LUT[...,1],T_DP_conf,BL_ind[...,1::4],T_DP_vec,Ndp,Nrows//8); + V_BL_nom_2 = doInterpDP_2D(BL_LUT[...,2],T_DP_conf,BL_ind[...,2::4],T_DP_vec,Ndp,Nrows//8); + V_BL_nom_3 = doInterpDP_2D(BL_LUT[...,3],T_DP_conf,BL_ind[...,3::4],T_DP_vec,Ndp,Nrows//8); + V_BL_nom = tf.stack([V_BL_nom_0,V_BL_nom_1,V_BL_nom_2,V_BL_nom_3],axis=-1); + # tf.print("V_BL",V_BL_nom[0][0:8]); + # Reshape into the right order + V_BL_nom = tf.reshape(V_BL_nom,y_shape); + + if(EN_NOISE): + sig_V_BL = doInterpDP_2D(sig_BL_LUT[...,0],T_DP_conf,BL_ind,T_DP_vec,Ndp,Nrows//8); + # Reshape into the right order + dist_train = K.random_normal(shape=tf.shape(V_BL_nom),mean=0.,stddev=1.,dtype='float32'); + sig_V_BL = sig_V_BL*dist_train; + # tf.print("sig V_BL",sig_V_BL[0]); + else: + sig_V_BL = K.zeros_like(V_BL_nom); + V_BL = V_BL_nom + sig_V_BL; + return V_BL; + +##################################### ANALYTICAL MODELS ######################################### +# This model supposes the following: +# - Access transistors always in saturation +# - OFF current negligeable +# - SR and IR droops neglected +# - Mismatch considered +# - Simple models for Early effect (DIBL not considered) + +# Voltage integration Model with DAC input +def int_BL_DAC(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): + # Retrieve resolution + IAres = hardware.sramInfo.IAres; Nstates = pow(2,IAres); + # Retrieve access transistor characteristics + mu_Vt = hardware.mu_Vth; + sig_Vt = hardware.sig_Vth; + mu = hardware.mu; + e_ox = hardware.e_ox; t_ox = hardware.t_ox; + C_ox = e_ox/t_ox; + Vea = hardware.Vea; eta = hardware.eta; + Ut = hardware.Ut; n_body = hardware.n_body; + # Retrieve transistor dimensions + W = hardware.sramInfo.W_acc.data; + L = hardware.sramInfo.L_acc.data; + # Retrieve total BL capacitance + C_BL = hardware.C_tot; + # Retrieve DC & IC voltages + VDD = hardware.sramInfo.VDD.data; + WLvec = hardware.V_WL0; + V0 = hardware.V_BL0; + V_Q0 = hardware.V_Q0; + # Retrieve DP duration + T_DP = T_DP_conf*hardware.sramInfo.T_DP.data; #print('FC time: '+str(T_DP)) + # Retrieve empirical correction factors + a1 = hardware.a1; #print('FC constant: '+str(a1)) + a2 = hardware.a2; + b1 = hardware.b1; + # Supposing WL calibration, get only max activity and corresponding V_WL + V_WL_max = WLvec[-1]; + Vov_max = V_WL_max-V_Q0-mu_Vt; + # Compute full and binary side-MAC operations + MAC_val = K.dot(IA,W_array); + # Define noise matrices + if(EN_NOISE): + # Get Binary Non + IA_bin = K.switch(K.equal(IA,0),IA,K.ones_like(IA)); + N_on = K.dot(IA_bin,W_array); + # Get number of ON devices per state + Non_list = []; + for i in range(Nstates): + # Get inputs of interest + IA_state = K.switch(K.equal(IA,i),IA,K.zeros_like(IA)); + # Generate equivalent MAC value in expanded current expression + MAC_eq = i*K.sqrt(K.clip(K.dot(IA_state,W_array),1e-10,None)); + Non_list.append(MAC_eq); + Non_vec = tf.stack(Non_list,axis=2); + # Vt deviation + sig_Vt_mat = K.random_normal(shape=tf.shape(MAC_val),mean=0.,stddev=hardware.sig_Vth,dtype='float32'); + sig_Vt_mat = K.in_train_phase(sig_Vt_mat,sig_Vt_inf*K.ones_like(sig_Vt_mat)); + else: + sig_Vt_mat = K.zeros_like(MAC_val); + # Different models for weak and strong (saturation assumed) inversion (no DIBL) + if(Vov_max >= 0): + # Compute equivalent ON conductance + if(EN_NOISE): + # Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1) + 2/sqrt(Nstates-1)*Vov_max*sig_Vt_mat*K.sqrt(K.clip(MAC_val,1e-10,None)) + (sig_Vt_mat*sig_Vt_mat)*N_on; + Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1) + 2/(Nstates-1)*Vov_max*K.sum(Non_vec,axis=-1)*sig_Vt_mat + (sig_Vt_mat*sig_Vt_mat)*N_on; + else: + Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1); + beta = mu*C_ox; + gamma = beta*W/L/C_BL/2*(a1*Geq_tot+a2*Geq_tot*Geq_tot); + # Compute BL integrated voltage + V_BL = V0*K.exp(-gamma/Vea*T_DP)-(Vea-V_Q0)*(1-K.exp(-gamma/Vea*T_DP))+b1; + # tf.print("FC one-sided DP value: ",K.transpose(MAC_val[1,:]),summarize=-1) + # tf.print("FC BL voltage: ",K.transpose(V_BL[1,:]),summarize=-1) + else: + # Get the expression of the equivelent ON-conductance + if(EN_NOISE): + Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1) + 2/(Nstates-1)*Vov_max*K.sum(Non_vec,axis=-1)*sig_Vt_mat + (sig_Vt_mat*sig_Vt_mat)*N_on; + else: + Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1); + beta = mu*C_ox; + gamma = beta*W/L/C_BL/2*(a1*Geq_tot+a2*Geq_tot*Geq_tot); + # Compute BL integrated voltage + V_BL = V0*K.exp(-gamma/Vea*T_DP)-(Vea-V_Q0)*(1-K.exp(-gamma/Vea*T_DP))+b1; + # Clip result, waiting for triode + V_BL = K.clip(V_BL,0,VDD); + return V_BL + + +# Voltage integration Model with PWM input +def int_BL_PWM(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): + # Retrieve resolution + IAres = hardware.sramInfo.IAres; Nstates = pow(2,IAres); + # Retrieve access transistor characteristics + mu_Vt = hardware.mu_Vth; + sig_Vt = hardware.sig_Vth; + mu = hardware.mu; + e_ox = hardware.e_ox; t_ox = hardware.t_ox; + C_ox = e_ox/t_ox; + Vea = hardware.Vea; eta = hardware.eta; + Ut = hardware.Ut; n_body = hardware.n_body; + # Retrieve number of rows + Nrows = hardware.sramInfo.Nrows.data; + NB = hardware.sramInfo.NB.data; + # Retrieve transistor dimensions + W = hardware.sramInfo.W_acc.data; + L = hardware.sramInfo.L_acc.data; + # Retrieve total BL capacitance + C_BL = hardware.C_tot; + # Retrieve DC & IC voltages + VDD = hardware.sramInfo.VDD.data; + WLvec = hardware.V_WL0; + V0 = hardware.V_BL0; + V_Q0 = hardware.V_Q0; + # Retrieve DP duration + T_DP = T_DP_conf*hardware.sramInfo.T_DP.data; #print('FC time: '+str(T_DP)) + # Retrieve empirical correction factors + a1 = hardware.a1; #print('FC constant: '+str(a1)) + a2 = hardware.a2; + a3 = hardware.a3; + a4 = hardware.a4; + b1 = hardware.b1; + # Supposing WL calibration, get only max activity and corresponding V_WL + V_WL_max = WLvec[-1]; + Vov_max = V_WL_max-V_Q0-mu_Vt; + # Compute full and binary side-MAC operations + MAC_val = K.dot(IA,W_array); + + # Define noise matrices + if(EN_NOISE): + # Vt deviation + if(Vov_max >= 0): + sig_Vt_mat = K.random_normal(shape=tf.shape(MAC_val),mean=0.,stddev=hardware.sig_Vth,dtype='float32'); + sig_Vt_mat = K.in_train_phase(sig_Vt_mat,sig_Vt_inf[0]); + # sig_Vt_mat = K.in_train_phase(sig_Vt_mat,K.zeros_like(sig_Vt_mat)); + else: + sig_Vt_mat = K.random_normal(shape=tf.shape(W_array),mean=0.,stddev=hardware.sig_Vth,dtype='float32'); + bern_matrix = tf.random.uniform(shape=tf.shape(W_array),maxval=1); + bern_matrix = tf.math.greater(bern_matrix,0.0); bern_matrix = tf.cast(bern_matrix,dtype='float32'); + + sig_Vt_mat = bern_matrix*sig_Vt_mat; + sig_Vt_mat = K.in_train_phase(sig_Vt_mat,sig_Vt_inf*K.ones_like(sig_Vt_mat)); + else: + sig_Vt_mat = K.zeros_like(MAC_val); + # Different models for weak and strong (saturation assumed) inversion (no DIBL) + if(Vov_max >= 0): + # Compute equivalent ON conductance + if(EN_NOISE): + Geq_tot = (Vov_max*Vov_max+sig_Vt_mat*sig_Vt_mat)*MAC_val/(Nstates-1) + 2*Vov_max*sig_Vt_mat*K.sqrt(K.clip(MAC_val/(Nstates-1),1e-10,None)); + else: + Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1); + beta = mu*C_ox; + gamma = beta*W/L/C_BL/2*(a1*Geq_tot+a2*Geq_tot*Geq_tot); + # Compute BL integrated voltage + V_BL = V0*K.exp(-gamma/Vea*T_DP)-(Vea-V_Q0)*(1-K.exp(-gamma/Vea*T_DP))+b1; + # tf.print("FC one-sided DP value: ",K.transpose(MAC_val[1,:]),summarize=-1) + # tf.print("FC BL voltage: ",K.transpose(V_BL[1]),summarize=5) + else: + # Compute equivalent ON conductance + I_S0 = 0.2e-4; +# I_S0 = mu*C_ox*W/L*(n_body-1)*(Ut*Ut); + alpha_I = I_S0/Ut*W/L; + tau_DP = T_DP/C_BL; + # Expression changes for noise-aware training + if(EN_NOISE): + IA_zero = K.switch(K.equal(IA,0),K.ones_like(IA),K.zeros_like(IA)); + # Get noisy current weights + exp_Vt = K.exp(sig_Vt_mat/(n_body*Ut))*W_array; + # Get BL voltage + V_BL = Ut*K.log(1+(K.exp(V0/Ut)-1)*K.exp(-a1*tau_DP*alpha_I/(Nstates-1)*(K.dot(IA+a4*IA*IA,exp_Vt)*K.exp(K.constant(a2*(Vov_max+a3)/(n_body*Ut),dtype="float32")) + + K.dot(IA_zero+a4*IA_zero*IA_zero,exp_Vt)*K.exp(K.constant(a2*(-mu_Vt+a3)/(n_body*Ut),dtype="float32")))+b1))-2.5e-4*V_WL_max*(NB-K.dot(IA_zero,K.ones_like(W_array))); + else: + # Get Number of leaky bit-0 bitcells (bit-1 supposed non-leaky...) + IA_zero = K.switch(K.equal(IA,0),K.ones_like(IA),K.zeros_like(IA)); + Nrem = K.dot(IA_zero,W_array); + # Geq_tot = (Vov_max*Vov_max)*MAC_val/(Nstates-1); + V_BL = Ut*K.log(1+(K.exp(V0/Ut)-1)*K.exp(-a1*tau_DP*alpha_I/(Nstates-1)*((MAC_val+a4*MAC_val*MAC_val)*K.exp(K.constant(a2*(Vov_max+a3)/(n_body*Ut),dtype="float32")) + + (Nrem+a4*Nrem*Nrem)*K.exp(K.constant(a2*(-mu_Vt+a3)/(n_body*Ut),dtype="float32")))+b1))-2.5e-4*V_WL_max*(NB-K.dot(IA_zero,K.ones_like(W_array))); + # tf.print("MAC",MAC_val[0],summarize=5); + # tf.print("V_BL",V_BL[0],summarize=5); + # Clip result, waiting for triode + V_BL = K.clip(V_BL,0,VDD); + return V_BL diff --git a/models/CONV_current.py b/models/CONV_current.py new file mode 100644 index 0000000000000000000000000000000000000000..7136d20c2dc97cc7570fc27d8960712e2cdb1e10 --- /dev/null +++ b/models/CONV_current.py @@ -0,0 +1,275 @@ +############################################## +###### This file models the 6T CIM-SRAM ###### +###### stochastic MAC operation ###### +############################################## +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import numpy as np +from models.Analog_DP import int_BL_num, int_BL_DAC, int_BL_PWM + +########################### NUMERICAL, INTERPOLATION-BASED MODELS ############################### +# Convolution operation with Toeplitz matrix, input channels on repeated weights +def CONV_op_diff_num(hardware,IA,W,dist_inf,T_DP_conf,data_format,padding=0,EN_NOISE=False,EN_QUANT=False): + # Check channel position and always put it second + if(data_format == 'channels_last'): + input_size = K.int_shape(IA)[1]; + n_channels_in = K.int_shape(IA)[-1]; + IA = K.permute_dimensions(IA,(0,3,1,2)); + elif(data_format != 'channels_first'): + input_size = K.int_shape(IA)[-1]; + n_channels_in = K.int_shape(IA)[1]; + raise ValueError('Unknown data format: ' + str(data_format)) + # Retrieve LUTs + BL_LUT = hardware.sramInfo.BL_LUT; BLB_LUT = hardware.sramInfo.BLB_LUT; + sig_BL_LUT = hardware.sramInfo.sig_BL_LUT; + sig_BLB_LUT = hardware.sramInfo.sig_BLB_LUT; + # Derive all dimensions + kern_size = K.int_shape(W); + filter_size = kern_size[0]; + # padding_size = int(tf.math.ceil((filter_size-1)/2)) + padding_size = padding; + inc_dim = padding_size-(kern_size[0]-1); + # Transform input into Toeplitz matrix for dot-product operation + IA_t = toeplitz(IA,filter_size,padding_size); + # IA_t = im2col(IA,filter_size,"VALID"); + # Reshape IA_t to add the channels during the DP + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,(input_size+inc_dim)*(input_size+inc_dim),filter_size*filter_size*n_channels_in)); + # Reshape weights in (K^2*C_in,C_out) format + W = K.permute_dimensions(W,(3,2,0,1)) + W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) + W = K.permute_dimensions(W,(1,0)) + + # Define pos and neg binary weight matrices + W_ones = K.ones_like(W); + W_BL = -(W-W_ones)/2; + W_BLB = (W+W_ones)/2; + # Perform differential computation + V_BL = int_BL_num(hardware,IA_t,W_BL,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE) + V_BLB = int_BL_num(hardware,IA_t,W_BLB,BLB_LUT,T_DP_conf,dist_inf,sig_BLB_LUT,EN_NOISE) + + # Select differential or SE output and reshape it + if(EN_QUANT): + V_DP = V_BL-V_BLB + # Reshape output + V_DP = K.permute_dimensions(V_DP,(0,2,1)) + V_DP = K.reshape(V_DP,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + if(data_format == 'channels_last'): + V_DP = K.permute_dimensions(V_DP,(0,2,3,1)) + return V_DP + else: + # Reshape outputs + V_BL = K.permute_dimensions(V_BL,(0,2,1)) + V_BLB = K.permute_dimensions(V_BLB,(0,2,1)) + + V_BL = K.reshape(V_BL,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + V_BLB = K.reshape(V_BLB,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + + if(data_format == 'channels_last'): + V_BL = K.permute_dimensions(V_BL,(0,2,3,1)) + V_BLB = K.permute_dimensions(V_BLB,(0,2,3,1)) + return (V_BL,V_BLB) + +# Single-ended convolution operation with Toeplitz matrix, input channels on repeated weights +def CONV_op_se_num(hardware,IA,W,dist_inf,T_DP_conf,data_format,padding=0,EN_NOISE=False,EN_QUANT=False): + # Check channel position and always put it second + if(data_format == 'channels_last'): + IA = K.permute_dimensions(IA,(0,3,1,2)); + elif(data_format != 'channels_first'): + raise ValueError('Unknown data format: ' + str(data_format)) + # Retrieve LUTs + BL_LUT = hardware.sramInfo.BL_LUT; BLB_LUT = hardware.sramInfo.BLB_LUT; + sig_BL_LUT = hardware.sramInfo.sig_BL_LUT; + sig_BLB_LUT = hardware.sramInfo.sig_BLB_LUT; + # Derive all dimensions + input_size = K.int_shape(IA)[-1]; + n_channels_in = K.int_shape(IA)[1]; + kern_size = K.int_shape(W); + filter_size = kern_size[0]; + # padding_size = int(tf.math.ceil((filter_size-1)/2)) + padding_size = padding; + inc_dim = padding_size-(kern_size[0]-2); + # Transform input into Toeplitz matrix for dot-product operation + IA_t = toeplitz(IA,filter_size,padding_size); + # Reshape IA_t to add the channels during the DP + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,(input_size+2*inc_dim)*(input_size+2*inc_dim),filter_size*filter_size*n_channels_in)); + # Reshape weights in (K^2*C_in,C_out) format + W = K.permute_dimensions(W,(3,2,0,1)) + W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) + W = K.permute_dimensions(W,(1,0)) + # Compute parallel dot-products corresponding to zero-padded convolutions + W_ones = K.ones_like(W); + W_bin = (W+W_ones)/2; + # Perform differential computation + V_RBL = int_BL_num(hardware,IA_t,W_bin,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE) + # Reshape the always-se output + V_RBL = K.permute_dimensions(V_RBL,(0,2,1)) + V_RBL = K.reshape(V_RBL,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + if(data_format == 'channels_last'): + V_RBL = K.permute_dimensions(V_RBL,(0,2,3,1)) + return V_RBL + + +##################################### ANALYTICAL MODELS ######################################### +# Convolution operation with Toeplitz matrix, input channels on repeated weights +def CONV_op_diff_ana(hardware,IA,W,sig_Vt_inf,data_format,EN_NOISE,EN_QUANT): + # Check channel position and always put it second + if(data_format == 'channels_last'): + IA = K.permute_dimensions(IA,(0,3,1,2)); + elif(data_format != 'channels_first'): + raise ValueError('Unknown data format: ' + str(data_format)) + + # Select operator model based on input type + if(inputType == "DAC"): + int_BL = int_BL_DAC; + elif(inputType == "PWM"): + int_BL = int_BL_PWM; + else: + raise NameError("Selected input type not supported !"); + + # Derive all dimensions + input_size = K.int_shape(IA)[-1]; + n_channels_in = K.int_shape(IA)[1]; + kern_size = K.int_shape(W); + filter_size = kern_size[0]; + padding_size = int(tf.math.ceil((filter_size-1)/2)) + conv_size = input_size+padding_size + # Transform input into Toeplitz matrix for dot-product operation + IA_t = toeplitz(IA,filter_size); + # Reshape IA_t to add the channels during the DP + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,input_size*input_size,filter_size*filter_size*n_channels_in)); + # Reshape weights in (K^2*C_in,C_out) format + W = K.permute_dimensions(W,(3,2,0,1)) + W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) + W = K.permute_dimensions(W,(1,0)) + # Compute parallel dot-products corresponding to zero-padded convolutions + # --- Ideal --- + #V_DP = K.dot(IA_t,W) + # --- Actual --- + # Define pos and neg binary weight matrices + W_ones = K.ones_like(W); + W_BL = -(W-W_ones)/2; + W_BLB = (W+W_ones)/2; + # Perform differential computation + V_BL = int_BL(hardware,IA_t,W_BL,sig_Vt_inf,EN_NOISE) + V_BLB = int_BL(hardware,IA_t,W_BLB,sig_Vt_inf,EN_NOISE) + + # Select differential or SE output and reshape it + if(EN_QUANT): + V_DP = V_BL-V_BLB + # Reshape output + V_DP = K.permute_dimensions(V_DP,(0,2,1)) + V_DP = K.reshape(V_DP,(-1,kern_size[-1],input_size,input_size)) + if(data_format == 'channels_last'): + V_DP = K.permute_dimensions(V_DP,(0,2,3,1)) + return V_DP; + else: + # Reshape outputs + V_BL = K.permute_dimensions(V_BL,(0,2,1)) + V_BLB = K.permute_dimensions(V_BLB,(0,2,1)) + + V_BL = K.reshape(V_BL,(-1,kern_size[-1],input_size,input_size)) + V_BLB = K.reshape(V_BLB,(-1,kern_size[-1],input_size,input_size)) + + if(data_format == 'channels_last'): + V_BL = K.permute_dimensions(V_BL,(0,2,3,1)) + V_BLB = K.permute_dimensions(V_BLB,(0,2,3,1)) + return (V_BL,V_BLB) + +# Single-ended convolution operation with Toeplitz matrix, input channels on repeated weights +def CONV_op_se_ana(hardware,IA,W,sig_Vt_inf,data_format,EN_NOISE,EN_QUANT): + # Check channel position and always put it second + if(data_format == 'channels_last'): + IA = K.permute_dimensions(IA,(0,3,1,2)); + elif(data_format != 'channels_first'): + raise ValueError('Unknown data format: ' + str(data_format)) + + # Select operator model based on input type + if(inputType == "DAC"): + int_BL = int_BL_DAC; + elif(inputType == "PWM"): + int_BL = int_BL_PWM; + else: + raise NameError("Selected input type not supported !"); + + # Derive all dimensions + input_size = K.int_shape(IA)[-1]; + n_channels_in = K.int_shape(IA)[1]; + kern_size = K.int_shape(W); + filter_size = kern_size[0]; + padding_size = int(tf.math.ceil((filter_size-1)/2)) + conv_size = input_size+padding_size + # Transform input into Toeplitz matrix for dot-product operation + IA_t = toeplitz(IA,filter_size); + # Reshape IA_t to add the channels during the DP + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,input_size*input_size,filter_size*filter_size*n_channels_in)); + # Reshape weights in (K^2*C_in,C_out) format + W = K.permute_dimensions(W,(3,2,0,1)) + W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) + W = K.permute_dimensions(W,(1,0)) + # Compute parallel dot-products corresponding to zero-padded convolutions + W_ones = K.ones_like(W); + W_bin = (W+W_ones)/2; + # Perform differential computation + V_RBL = int_BL(hardware,IA_t,W_bin,sig_Vt_inf,EN_NOISE) + # Reshape the always-se output + V_RBL = K.permute_dimensions(V_RBL,(0,2,1)) + V_RBL = K.reshape(V_RBL,(-1,kern_size[-1],input_size,input_size)) + if(data_format == 'channels_last'): + V_RBL = K.permute_dimensions(V_RBL,(0,2,3,1)) + return V_RBL + +############################### INTERNAL FUNCTIONS (IM2COL FACILITIES) #################################### + +# Transform multi-D input feature map into batchsize+2D matrix +def toeplitz(x,filter_size,padding_size): + # Get x dimensions (input feature map assumed to be have square 2D dims) + x_dim = K.int_shape(x); + # Get filter dimensions + x_size = x_dim[-1]; + c_in = x_dim[1]; + inc_dim = (filter_size-2)-padding_size; + # Get output + conv_size = x_size+2*padding_size; # Static padding of the input vector + x_pad = K.spatial_2d_padding(x,padding=((padding_size,padding_size),(padding_size,padding_size)),data_format='channels_first') + # Create set of toeplitz matrices from padded matrix rows + x_col = tf.unstack(x_pad,axis=-1); + + #shift_list = [-shift_val for shift_val in range(filter_size)]; + x_shift = [] + for i in range(filter_size): + x_temp = K.permute_dimensions(tf.stack(x_col[i:i+(x_size-2*inc_dim)],axis=2),(0,1,3,2)) + x_shift.append(tf.unstack(x_temp,axis=2)) + # print(K.int_shape(x_temp)) + # print(K.int_shape(x_shift[i])) + + x_toep = []; + for i in range(conv_size): + x_temp = [] + for j in range(filter_size): + # print('conv size: '+str(i)) + # print('filt size: '+str(j)) + # print(K.int_shape(x_shift[j][i])); + + x_temp.append(x_shift[j][i]) + x_toep.append(K.permute_dimensions(tf.stack(x_temp,axis=2),(0,1,3,2))) +# print(K.eval(x_toep[i])) + # Create final input matrix from the Toeplitz submatrices + x_b = []; + for i in range(filter_size): + x_b.append(K.concatenate(x_toep[i:i+(x_size-2*inc_dim)],axis=2)) + #print(K.eval(x_b[i][1,1,:,:])) + # Final tensor with size (batch_size,c_in,output_dim,output_dim) + x_out = K.concatenate(x_b,axis=-1); + # Permute dimensions to perform the correct DP + return x_out; + +# Image to column transform +def im2col(x,filter_size,padding_type): + image_patches = tf.image.extract_patches(x, + [1, filter_size, filter_size, 1], + [1, 1, 1, 1], [1, 1, 1, 1], + padding=padding_type); + print(K.int_shape(image_patches)); + return image_patches; + diff --git a/models/CONV_current_num.py b/models/CONV_current_num.py new file mode 100644 index 0000000000000000000000000000000000000000..d31276906f13ef215dcbd6a91d9414dbc6f994ad --- /dev/null +++ b/models/CONV_current_num.py @@ -0,0 +1,221 @@ +############################################## +###### This file models the 6T CIM-SRAM ###### +###### stochastic MAC operation ###### +############################################## +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import numpy as np +from math import sqrt +from utils.linInterp import interp_1D + +# This model supposes the following includes post-layout LUTs of the analog DP operation + +# Convolution operation with Toeplitz matrix, input channels on repeated weights +def CONV_op_diff(hardware,IA,W,dist_inf,T_DP_conf,data_format,padding=0,EN_NOISE=False,EN_QUANT=False): + # Check channel position and always put it second + if(data_format == 'channels_last'): + input_size = K.int_shape(IA)[1]; + n_channels_in = K.int_shape(IA)[-1]; + IA = K.permute_dimensions(IA,(0,3,1,2)); + elif(data_format != 'channels_first'): + input_size = K.int_shape(IA)[-1]; + n_channels_in = K.int_shape(IA)[1]; + raise ValueError('Unknown data format: ' + str(data_format)) + # Retrieve LUTs + BL_LUT = hardware.sramInfo.BL_LUT; BLB_LUT = hardware.sramInfo.BLB_LUT; + sig_BL_LUT = hardware.sramInfo.sig_BL_LUT; + sig_BLB_LUT = hardware.sramInfo.sig_BLB_LUT; + # Derive all dimensions + kern_size = K.int_shape(W); + filter_size = kern_size[0]; + # padding_size = int(tf.math.ceil((filter_size-1)/2)) + padding_size = padding; + inc_dim = padding_size-(kern_size[0]-1); + # Transform input into Toeplitz matrix for dot-product operation + IA_t = toeplitz(IA,filter_size,padding_size); + # Reshape IA_t to add the channels during the DP + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,(input_size+inc_dim)*(input_size+inc_dim),filter_size*filter_size*n_channels_in)); +# if(padding_size == 0): +# padding_str = "VALID"; +# else: +# padding_str = "SAME"; +# IA_t = im2col(IA,filter_size,padding_str); + # Reshape weights in (K^2*C_in,C_out) format + W = K.permute_dimensions(W,(3,2,0,1)) + W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) + W = K.permute_dimensions(W,(1,0)) + + # Define pos and neg binary weight matrices + W_ones = K.ones_like(W); + W_pos = (W+W_ones)/2; + W_neg = -(W-W_ones)/2; + # Perform differential computation + V_BL = int_BL(hardware,IA_t,W_pos,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE) + V_BLB = int_BL(hardware,IA_t,W_neg,BLB_LUT,T_DP_conf,dist_inf,sig_BLB_LUT,EN_NOISE) + + # Select differential or SE output and reshape it + if(EN_QUANT): + V_DP = V_BL-V_BLB + # Reshape output + V_DP = K.permute_dimensions(V_DP,(0,2,1)) + V_DP = K.reshape(V_DP,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + if(data_format == 'channels_last'): + V_DP = K.permute_dimensions(V_DP,(0,2,3,1)) + return V_DP + else: + # Reshape outputs + V_BL = K.permute_dimensions(V_BL,(0,2,1)) + V_BLB = K.permute_dimensions(V_BLB,(0,2,1)) + + V_BL = K.reshape(V_BL,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + V_BLB = K.reshape(V_BLB,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + + if(data_format == 'channels_last'): + V_BL = K.permute_dimensions(V_BL,(0,2,3,1)) + V_BLB = K.permute_dimensions(V_BLB,(0,2,3,1)) + return (V_BL,V_BLB) + +# Single-ended convolution operation with Toeplitz matrix, input channels on repeated weights +def CONV_op_se(hardware,IA,W,dist_inf,T_DP_conf,data_format,padding=0,EN_NOISE=False,EN_QUANT=False): + # Check channel position and always put it second + if(data_format == 'channels_last'): + IA = K.permute_dimensions(IA,(0,3,1,2)); + elif(data_format != 'channels_first'): + raise ValueError('Unknown data format: ' + str(data_format)) + # Retrieve LUTs + BL_LUT = hardware.sramInfo.BL_LUT; BLB_LUT = hardware.sramInfo.BLB_LUT; + sig_BL_LUT = hardware.sramInfo.sig_BL_LUT; + sig_BLB_LUT = hardware.sramInfo.sig_BLB_LUT; + # Derive all dimensions + input_size = K.int_shape(IA)[-1]; + n_channels_in = K.int_shape(IA)[1]; + kern_size = K.int_shape(W); + filter_size = kern_size[0]; + # padding_size = int(tf.math.ceil((filter_size-1)/2)) + padding_size = padding; + inc_dim = padding_size-(kern_size[0]-2); + # Transform input into Toeplitz matrix for dot-product operation + IA_t = toeplitz(IA,filter_size,padding_size); + # Reshape IA_t to add the channels during the DP + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,(input_size+2*inc_dim)*(input_size+2*inc_dim),filter_size*filter_size*n_channels_in)); + # Reshape weights in (K^2*C_in,C_out) format + W = K.permute_dimensions(W,(3,2,0,1)) + W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) + W = K.permute_dimensions(W,(1,0)) + # Compute parallel dot-products corresponding to zero-padded convolutions + W_ones = K.ones_like(W); + W_bin = (W+W_ones)/2; + # Perform differential computation + V_RBL = int_BL(hardware,IA_t,W_bin,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE) + # Reshape the always-se output + V_RBL = K.permute_dimensions(V_RBL,(0,2,1)) + V_RBL = K.reshape(V_RBL,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + if(data_format == 'channels_last'): + V_RBL = K.permute_dimensions(V_RBL,(0,2,3,1)) + return V_RBL + +# Transform multi-D input feature map into batchsize+2D matrix +def toeplitz(x,filter_size,padding_size): + # Get x dimensions (input feature map assumed to be have square 2D dims) + x_dim = K.int_shape(x); + # Get filter dimensions + x_size = x_dim[-1]; + c_in = x_dim[1]; + inc_dim = (filter_size-2)-padding_size; + # Get output + conv_size = x_size+2*padding_size; # Static padding of the input vector + x_pad = K.spatial_2d_padding(x,padding=((padding_size,padding_size),(padding_size,padding_size)),data_format='channels_first') + # Create set of toeplitz matrices from padded matrix rows + x_col = tf.unstack(x_pad,axis=-1); + + #shift_list = [-shift_val for shift_val in range(filter_size)]; + x_shift = [] + for i in range(filter_size): + x_temp = K.permute_dimensions(tf.stack(x_col[i:i+(x_size-2*inc_dim)],axis=2),(0,1,3,2)) + x_shift.append(tf.unstack(x_temp,axis=2)) + # print(K.int_shape(x_temp)) + # print(K.int_shape(x_shift[i])) + + x_toep = []; + for i in range(conv_size): + x_temp = [] + for j in range(filter_size): + # print('conv size: '+str(i)) + # print('filt size: '+str(j)) + # print(K.int_shape(x_shift[j][i])); + + x_temp.append(x_shift[j][i]) + x_toep.append(K.permute_dimensions(tf.stack(x_temp,axis=2),(0,1,3,2))) +# print(K.eval(x_toep[i])) + # Create final input matrix from the Toeplitz submatrices + x_b = []; + for i in range(filter_size): + x_b.append(K.concatenate(x_toep[i:i+(x_size-2*inc_dim)],axis=2)) + #print(K.eval(x_b[i][1,1,:,:])) + # Final tensor with size (batch_size,c_in,output_dim,output_dim) + x_out = K.concatenate(x_b,axis=-1); + # Permute dimensions to perform the correct DP + return x_out; + +# Image to column transform +def im2col(x,filter_size,padding_type): + image_patches = tf.image.extract_patches(x, + [1, filter_size, filter_size, 1], + [1, 1, 1, 1], [1, 1, 1, 1], + padding=padding_type); + print(K.int_shape(image_patches)); + return image_patches; + + +# /// Interpolation-based analog DP /// +def int_BL(hardware,IA,W_array,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE): + # Local variables + Nrows = hardware.sramInfo.Nrows.data; + IAres = hardware.sramInfo.IAres; + Ndp = (2**IAres-1)*Nrows; + T_DP_vec = hardware.sramInfo.T_DP_vec; + # Compute ideal BL/BLB results to get indexes + BL_ind = K.dot(IA,W_array); + y_shape = tf.shape(BL_ind); + # tf.print("BL index",BL_ind[0]); + # Perform 3D interpolation (--- 2D until we get 3D tables ---) + V_BL_nom_0 = doInterpDP_2D(BL_LUT[...,0],T_DP_conf,BL_ind[...,0::4],T_DP_vec,Ndp,Nrows//8); + V_BL_nom_1 = doInterpDP_2D(BL_LUT[...,1],T_DP_conf,BL_ind[...,1::4],T_DP_vec,Ndp,Nrows//8); + V_BL_nom_2 = doInterpDP_2D(BL_LUT[...,2],T_DP_conf,BL_ind[...,2::4],T_DP_vec,Ndp,Nrows//8); + V_BL_nom_3 = doInterpDP_2D(BL_LUT[...,3],T_DP_conf,BL_ind[...,3::4],T_DP_vec,Ndp,Nrows//8); + V_BL_nom = tf.stack([V_BL_nom_0,V_BL_nom_1,V_BL_nom_2,V_BL_nom_3],axis=-1); + # tf.print("V_BL",V_BL_nom[0]); + # Reshape into the right order + V_BL_nom = tf.reshape(V_BL_nom,y_shape); + + if(EN_NOISE): + sig_V_BL = doInterpDP_2D(sig_BL_LUT[...,0],T_DP_conf,BL_ind,T_DP_vec,Ndp,Nrows//8); + # Reshape into the right order + dist_train = K.random_normal(shape=y_shape,mean=0.,stddev=1.,dtype='float32'); + sig_V_BL = sig_V_BL*K.in_train_phase(dist_train,dist_inf); + # tf.print("sig V_BL",sig_V_BL[0]); + else: + sig_V_BL = K.zeros_like(V_BL_nom); + V_BL = V_BL_nom + sig_V_BL; + return V_BL; + +# /// Apply custom 2D interpolation /// +def doInterpDP_2D(LUT,x1,x2,x1_vec,x2_max,N2): + # Possibly reshape x2 if CONV layer + x2_shape = tf.shape(x2); + x2 = tf.reshape(x2,(-1,x2_shape[-1])); + # Get indices + ind_x1 = K.cast(tf.math.floor(x1),"int32"); + ind_x2 = K.clip(tf.math.floor(x2/x2_max*N2),0,N2); ind_x2 = K.cast(ind_x2,"int32"); + # Get interpolated value for time param + x1 = interp_1D(x1_vec,x1); + # Get corresponding coefficients + coef_vec = tf.gather_nd(LUT,tf.stack([ind_x1*K.ones_like(ind_x2),ind_x2],axis=2)); + # Perform interpolation + f_int = coef_vec[...,0]+coef_vec[...,1]*x1+coef_vec[...,2]*x2+coef_vec[...,3]*x1*x2; + # Reshape result back, if needed + f_int = tf.reshape(f_int,x2_shape); + # Return interpolated result + return f_int; + diff --git a/models/DTSE.py b/models/DTSE.py new file mode 100644 index 0000000000000000000000000000000000000000..16ea55f37ebf0f95ffc4f58246a52c5e5dfe26ba --- /dev/null +++ b/models/DTSE.py @@ -0,0 +1,73 @@ +############################################# +######### HARDWARE MODEL FOR DTSE ########### +############################################# +import sys +import numpy as np +import keras.backend as K +import tensorflow as tf + +import math + +## /// Simple DP binary-weighting /// +def MBIT_weight(V_DP,Wres): + weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_BL)); + V_DP = weightsVec*V_DP/K.sum(weightsVec); + # Return binary-weighted value + return V_DP; + +def DTSE_ideal(V_BL,V_BLB,VDD,V_beta,Wres): + # Perform single-bit DTSE + V_DP = (VDD+V_beta)/2 + (V_BL-V_BLB)/2; + # Weight DTSE as should be + weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_BL)); + V_DP = weightsVec*V_DP/K.sum(weightsVec); + # Return single-ended value + return V_DP + +## /// Parasitic-aware DTSE (can be use instead of ideal model) /// +def DTSE_paras(V_BL,V_BLB,VDD,V_beta,C_int_BL,C_int_BLB,C_L,Cp1,Cp2,Cp3,offset,Wres): + # Reshape V_BL and V_BLB + V_BL = K.reshape(V_BL,(-1,Wres)); + V_BLB = K.reshape(V_BLB,(-1,Wres)); + # Binary-weighted integration caps + weightsVec = K.cast(K.pow(2,K.arange(0,Wres)),dtype=K.dtype(V_BL)); + C_int_BL_vec = weightsVec*C_int_BL; + C_int_BLB_vec = weightsVec*C_int_BLB; + + # Parasitic-aware DTSE model + V_DP = 1/(C_int_BLB+C_int_BL+C_L+Cp1+4*Cp2+Cp3)*((C_int_BLB+Cp1)*(VDD+V_beta) + + (C_int_BL+C_L+2*Cp2+Cp3)*V_BL - C_int_BLB*V_BLB) + offset; + # Return DP result + return V_DP; + +## /// Post-layout DTSE model /// +def DTSE_PL(V_BL,V_BLB,C_int,C_L,coef_vec,VDD,Wres): + # Retrieve useful values + p_dtse = coef_vec[int(math.log2(Wres))]; + # Apply correct DTSE transform depending upon weights resolution + if(Wres == 1): + V_DP_0 = 1/(2*8*C_int+C_L[0]+p_dtse[1][0]+4*p_dtse[2][0]+p_dtse[3][0])*((8*C_int+p_dtse[1][0])*(VDD+0.0) + (8*C_int+2*p_dtse[2][0]+p_dtse[3][0])*V_BLB[...,0::4] - 8*C_int*V_BL[...,0::4] + C_L[0]*VDD)+p_dtse[0][0]; + V_DP_1 = 1/(2*8*C_int+C_L[1]+p_dtse[1][1]+4*p_dtse[2][1]+p_dtse[3][1])*((8*C_int+p_dtse[1][1])*(VDD+0.0) + (8*C_int+2*p_dtse[2][1]+p_dtse[3][1])*V_BLB[...,1::4] - 8*C_int*V_BL[...,1::4] + C_L[1]*VDD)+p_dtse[0][1]; + V_DP_2 = 1/(2*8*C_int+C_L[2]+p_dtse[1][2]+4*p_dtse[2][2]+p_dtse[3][2])*((8*C_int+p_dtse[1][2])*(VDD+0.0) + (8*C_int+2*p_dtse[2][2]+p_dtse[3][2])*V_BLB[...,2::4] - 8*C_int*V_BL[...,2::4] + C_L[2]*VDD)+p_dtse[0][2]; + V_DP_3 = 1/(2*8*C_int+C_L[3]+p_dtse[1][3]+4*p_dtse[2][3]+p_dtse[3][3])*((8*C_int+p_dtse[1][3])*(VDD+0.0) + (8*C_int+2*p_dtse[2][3]+p_dtse[3][3])*V_BLB[...,3::4] - 8*C_int*V_BL[...,3::4] + C_L[3]*VDD)+p_dtse[0][3]; + V_DP = tf.stack([V_DP_0,V_DP_1,V_DP_2,V_DP_3],axis=-1); + V_DP = tf.reshape(V_DP,tf.shape(V_BL)); + elif(Wres == 2): + V_DP_0 = 1/(2*(8+4)*C_int+(C_L[0]+C_L[1])+(p_dtse[1][0]+p_dtse[1][1])+4*(p_dtse[2][0]+p_dtse[2][1])+(p_dtse[3][0]+p_dtse[3][1])) \ + *(((8+4)*C_int+p_dtse[1][0]+p_dtse[1][1])*(VDD+0.0) + (8*C_int+2*p_dtse[2][1]+p_dtse[3][1])*V_BLB[...,1::4]+(4*C_int+2*p_dtse[2][0]+p_dtse[3][0])*V_BLB[...,0::4] \ + - (8*V_BL[...,1::4]+4*V_BL[...,0::4])*C_int + (C_L[0]+C_L[1])*VDD)+(p_dtse[0][0]+p_dtse[0][1]); + V_DP_1 = 1/(2*(8+4)*C_int+(C_L[2]+C_L[3])+(p_dtse[1][2]+p_dtse[1][3])+4*(p_dtse[2][2]+p_dtse[2][3])+(p_dtse[3][2]+p_dtse[3][3])) \ + *(((8+4)*C_int+p_dtse[1][2]+p_dtse[1][3])*(VDD+0.0) + (8*C_int+2*p_dtse[2][2]+p_dtse[3][2])*V_BLB[...,2::4]+(4*C_int+2*p_dtse[2][3]+p_dtse[3][3])*V_BLB[...,3::4] \ + - (8*V_BL[...,3::4]+4*V_BL[...,2::4])*C_int + (C_L[2]+C_L[3])*VDD)+(p_dtse[0][2]+p_dtse[0][3]); + V_DP = tf.stack([V_DP_0,V_DP_1],axis=-1); + V_DP = tf.reshape(V_DP,tf.shape(V_BL)); + elif(Wres == 4): + V_DP = 1/(2*(8+4+2+1)*C_int+(C_L[0]+C_L[1]+C_L[2]+C_L[3])+(p_dtse[1][0]+p_dtse[1][1]+p_dtse[1][2]+p_dtse[1][3])+4*(p_dtse[2][0]+p_dtse[2][1]+p_dtse[2][2]+p_dtse[2][3])+(p_dtse[3][0]+p_dtse[3][1]+p_dtse[3][2]+p_dtse[3][3])) \ + *(((8+4+2+1)*C_int+(p_dtse[1][0]+p_dtse[1][1]+p_dtse[1][2]+p_dtse[1][3]))*(VDD+0.0) + (8*C_int+2*p_dtse[2][3]+p_dtse[3][3])*V_BLB[...,3::4]+ (4*C_int+2*p_dtse[2][2]+p_dtse[3][2])*V_BLB[...,2::4] \ + + (2*C_int+2*p_dtse[2][1]+p_dtse[3][1])*V_BLB[...,1::4] + (1*C_int+2*p_dtse[2][0]+p_dtse[3][0])*V_BLB[...,0::4] - (8*V_BL[...,3::4]+4*V_BL[...,2::4]+2*V_BL[...,1::4]+1*V_BL[...,0::4])*C_int \ + + (C_L[0]+C_L[1]+C_L[2]+C_L[3])*VDD)+(p_dtse[0][0]+p_dtse[0][1]+p_dtse[0][2]+p_dtse[0][3]); + else: + error("Selected weight precision {}b not supported during PL DTSE transform !".format(Wres)); + # Return DP result + return V_DP; + \ No newline at end of file diff --git a/models/MAC_current.py b/models/MAC_current.py new file mode 100644 index 0000000000000000000000000000000000000000..8c9281a3a903452fb8eac2dfcba05a059528adbf --- /dev/null +++ b/models/MAC_current.py @@ -0,0 +1,91 @@ +############################################## +###### This file models the 6T CIM-SRAM ###### +###### stochastic MAC operation ###### +############################################## +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import numpy as np +from models.Analog_DP import int_BL_num, int_BL_DAC, int_BL_PWM + +########################### NUMERICAL, INTERPOLATION-BASED MODELS ############################### +def MAC_op_diff_num(hardware,IA,W,dist_inf,T_DP_conf,EN_NOISE,EN_QUANT): + # Retrieve LUTs + BL_LUT = hardware.sramInfo.BL_LUT; BLB_LUT = hardware.sramInfo.BLB_LUT; + sig_BL_LUT = hardware.sramInfo.sig_BL_LUT; + sig_BLB_LUT = hardware.sramInfo.sig_BLB_LUT; + # Extract +1/-1 contributions (W in {-1,1}) --> Put opposite V_WL to 0 so that they contribute not + W_BL = -(W-K.ones_like(W))/2; + W_BLB = (W+K.ones_like(W))/2; + # Get actual values from LUTs using indexes + V_BL = int_BL_num(hardware,IA,W_BL,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE); + V_BLB = int_BL_num(hardware,IA,W_BLB,BLB_LUT,T_DP_conf,dist_inf,sig_BLB_LUT,EN_NOISE); + # debug + # tf.print("DP conf",T_DP_conf); + # Select differental or SE output + if(EN_QUANT): + Vmac = V_BL-V_BLB; + return Vmac; + else: + return (V_BL,V_BLB); + +def MAC_op_se_num(hardware,IA,W,T_DP_conf,dist_inf,EN_NOISE,EN_QUANT): + # Retrieve LUTs + BL_LUT = hardware.sramInfo.BL_LUT; + sig_BL_LUT = hardware.sramInfo.sig_BL_LUT; + # Extract weights + W_bin = (W+K.ones_like(W_ones))/2; + # Compute single-ended result + V_RBL = int_BL_num(hardware,IA,W_bin,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE); + # Return result + return V_RBL; + +##################################### ANALYTICAL MODELS ######################################### +# Differential MAC operation +def MAC_op_diff_ana(hardware,IA,W,sig_Vt_inf,T_DP_conf,EN_NOISE,EN_QUANT): + # Get parameters + inputType = hardware.sramInfo.inputType.name; + # Get positive and negative weights + W_ones = K.ones_like(W); + W_BL = -(W-W_ones)/2; + W_BLB = (W+W_ones)/2; + # Select operator model based on input type + if(inputType == "DAC"): + int_BL = int_BL_DAC; + elif(inputType == "PWM"): + int_BL = int_BL_PWM; + else: + raise NameError("Selected input type not supported !"); + # Compute differential result, adding noise during training only + V_BL = int_BL(hardware,IA,W_BL,sig_Vt_inf,T_DP_conf,EN_NOISE); + V_BLB = int_BL(hardware,IA,W_BLB,sig_Vt_inf,T_DP_conf,EN_NOISE); + + # Select differential or SE output + if(EN_QUANT): + Vmac = V_BL-V_BLB; + return Vmac; + else: + return (V_BL,V_BLB); + +# Single-ended MAC operation +def MAC_op_se_ana(hardware,IA,W,sig_Vt_inf,T_DP_conf,EN_NOISE,EN_QUANT): + # Get positive and negative weights + W_ones = K.ones_like(W); + W_bin = (W+W_ones)/2; + # Select operator model based on input type + if(inputType == "DAC"): + int_BL = int_BL_DAC; + elif(inputType == "PWM"): + int_BL = int_BL_PWM; + else: + raise NameError("Selected input type not supported !"); + # Compute single-ended result, adding noise during training only + V_RBL = int_BL(hardware,IA,W_bin,sig_Vt_inf,T_DP_conf,EN_NOISE); + # Output is always single-ended + return V_RBL; + +######################## Internal functions ############################# + + + + diff --git a/models/MAC_current_num.py b/models/MAC_current_num.py new file mode 100644 index 0000000000000000000000000000000000000000..54c831129d7357b00c122e1d70115b837c535ec3 --- /dev/null +++ b/models/MAC_current_num.py @@ -0,0 +1,45 @@ +############################################## +###### This file models the 6T CIM-SRAM ###### +###### stochastic MAC operation ###### +############################################## +from __future__ import absolute_import +import keras.backend as K +import tensorflow as tf +import numpy as np + +# Infer the MAC result directly fromnumerical hardware extraction + +# Accumulate currents in the acc_type manner +# V_WL and V_th are tensors +# Perform equivalent 6T-based current-MAC NIs operation +def MAC_op_diff(hardware,IA,W,dist_inf,T_DP_conf,EN_NOISE,EN_QUANT): + # Retrieve LUTs + BL_LUT = hardware.sramInfo.BL_LUT; BLB_LUT = hardware.sramInfo.BLB_LUT; + sig_BL_LUT = hardware.sramInfo.sig_BL_LUT; + sig_BLB_LUT = hardware.sramInfo.sig_BLB_LUT; + # Extract +1/-1 contributions (W in {-1,1}) --> Put opposite V_WL to 0 so that they contribute not + W_pos = (W+K.ones_like(W))/2; + W_neg = -(W-K.ones_like(W))/2; + # Get actual values from LUTs using indexes + V_BL = int_BL(hardware,IA,W_pos,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE); + V_BLB = int_BL(hardware,IA,W_neg,BLB_LUT,T_DP_conf,dist_inf,sig_BLB_LUT,EN_NOISE); + # debug + # tf.print("DP conf",T_DP_conf); + # Select differental or SE output + if(EN_QUANT): + Vmac = V_BL-V_BLB; + return Vmac; + else: + return (V_BL,V_BLB); + +def MAC_op_se(hardware,IA,W,T_DP_conf,dist_inf,EN_NOISE,EN_QUANT): + # Retrieve LUTs + BL_LUT = hardware.sramInfo.BL_LUT; + sig_BL_LUT = hardware.sramInfo.sig_BL_LUT; + # Extract weights + W_bin = (W+K.ones_like(W_ones))/2; + # Compute single-ended result + V_RBL = int_BL(hardware,IA,W_bin,BL_LUT,T_DP_conf,dist_inf,sig_BL_LUT,EN_NOISE); + # Return result + return V_RBL; + diff --git a/models/makeModel.py b/models/makeModel.py new file mode 100644 index 0000000000000000000000000000000000000000..92821e278ab55917d84e4ca74ce3fc3c2f0b2b8b --- /dev/null +++ b/models/makeModel.py @@ -0,0 +1,352 @@ +###################################################### +###### This file creates the specified NN model ###### +###### Add desired NN models below ###### +###################################################### +import numpy as np +from keras.models import Sequential, Model +from keras.layers import Reshape, MaxPooling2D, Dropout, Flatten, concatenate, Activation + +def make_model(model_type,cf,Conv_,Conv,Dens_,Dens,Act,Quant,BatchNormalization,Dens_FP,BatchNormalization_FP,Conv_FP_): + model = Sequential() + if(model_type == 'TEST_MLP'): + model.add(Dens_(512,cf.dim,cf.channels)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.dim*cf.dim*cf.channels)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Dens(cf.classes)) + #model.add(Quant((pow(2,cf.abits)-1)*512)) + model.add(BatchNormalization()) + model.add(Activation('softmax')) + + # Copy of three stage MLP for ABN tests + elif(model_type == 'MLP_three_stage_abn'): + print('MLP_three_stage ABN (post-DP) toplogy selected...\n') + #model.add(Dropout(0.5)) + model.add(Dens_(512,cf.dim,cf.channels,6.)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.dim*cf.dim*cf.channels)) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4)) + model.add(Act()) + + model.add(Dropout(0.)) + model.add(Dens(256,4.)) + #model.add(Quant((pow(2,cf.abits)-1)*512)) + model.add(BatchNormalization(512,2)) + model.add(Act()) + + model.add(Dropout(0.)) + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + #model.add(Quant((pow(2,cf.abits)-1)*256)) + model.add(Activation('softmax')) + + # Embedded three stage abn + elif(model_type == 'MLP_three_stage_abn_emb'): + print('MLP_three_stage ABN (embedded) toplogy selected...\n') + #model.add(Dropout(0.5)) + model.add(Dens_(512,cf.dim,cf.channels,1)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.dim*cf.dim*cf.channels)) + # model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,2)) + model.add(Act()) + + model.add(Dropout(0.1)) + model.add(Dens(256,1)) + #model.add(Quant((pow(2,cf.abits)-1)*512)) + # model.add(BatchNormalization(512,2)) + model.add(Act()) + + model.add(Dropout(0.15)) + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + #model.add(Quant((pow(2,cf.abits)-1)*256)) + model.add(Activation('softmax')) + + # Test with ideal ABN + elif(model_type == 'MLP_three_stage_dbn'): + print('MLP_three_stage DBN toplogy selected...\n') + #model.add(Dropout(0.5)) + model.add(Dens_(512,cf.dim,cf.channels,6)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.dim*cf.dim*cf.channels)) + model.add(BatchNormalization_FP()) + model.add(Act()) + + model.add(Dropout(0.1)) + model.add(Dens(256,4)) + #model.add(Quant((pow(2,cf.abits)-1)*512)) + model.add(BatchNormalization_FP()) + model.add(Act()) + + model.add(Dropout(0.15)) + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + #model.add(Quant((pow(2,cf.abits)-1)*256)) + model.add(Activation('softmax')) + + # A C_in-256-64-10 FC network + elif(model_type == 'MLP_256_64_10'): + print('MLP_three_stage toplogy selected...\n') + + model.add(Dens_(256,cf.dim,cf.channels,6.)) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4)) + model.add(Act()) + + model.add(Dropout(0.05)) + model.add(Dens(64,2.)) + model.add(BatchNormalization(256,2)) + model.add(Act()) + + model.add(Dropout(0.1)) + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + model.add(Activation('softmax')) + + elif(model_type == 'MLP_128_64_10'): + print('MLP_three_stage toplogy selected...\n') + + model.add(Dens_(128,cf.dim,cf.channels,6.)) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4)) + model.add(Act()) + + model.add(Dropout(0.05)) + model.add(Dens(64,2.)) + model.add(BatchNormalization(128,2)) + model.add(Act()) + + model.add(Dropout(0.1)) + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + model.add(Activation('softmax')) + + # MLP with hidden layers of size 512 + elif(model_type == 'MLP_512'): + print('MLP_512 toplogy selected...\n') + #model.add(Dropout(0.25)) + model.add(Dens_(cf.dim*cf.dim*cf.channels,cf.dim,cf.channels)) + model.add(Quant((0))) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels)) + model.add(Act()) + + model.add(Dropout(0.2)) + model.add(Dens(512)) + model.add(Quant((0))) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels)) + model.add(Act()) + + model.add(Dropout(0.2)) + model.add(Dens(512)) + model.add(Quant((0))) + model.add(BatchNormalization(512)) + model.add(Act()) + + model.add(Dropout(0.2)) + model.add(Dens(256)) + model.add(Quant((0))) + model.add(BatchNormalization(512)) + model.add(Act()) + + model.add(Dropout(0.1)) + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + model.add(Activation('softmax')) + + # Custom 2Conv-2FC model + elif(model_type == '2C2D'): + print('16C-64C-512F-128F network selected...\n') + + model.add(Conv_(cf.kern_size, 16, cf.dim, cf.channels, 6, 3)) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4)) + model.add(Act()) + + model.add(Conv(cf.kern_size, 64, 2, 1)) + model.add(BatchNormalization((cf.dim-2)*(cf.dim-2)*cf.channels,4)) + model.add(Act()) + #model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Flatten()) + + model.add(Dropout(0.15)) + model.add(Dens(512,1)) + model.add(BatchNormalization((cf.dim-2)*(cf.dim-2)*cf.channels,4)) + model.add(Act()) + + model.add(Dropout(0.1)) + model.add(Dens(128,2)) + model.add(BatchNormalization((cf.dim-2)*(cf.dim-2)*cf.channels,4)) + model.add(Act()) + + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + model.add(Activation('softmax')) + # BinaryNet model + elif(model_type == 'BinaryNet'): + print('BinaryNet network selected...\n') + + model.add(Conv_(cf.kern_size, 64,cf.dim,cf.channels)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*cf.channels)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Conv(cf.kern_size, 64)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*64)) + model.add(BatchNormalization()) + model.add(Act()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 128)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*64/4)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Conv(cf.kern_size, 128)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*128)) + model.add(BatchNormalization()) + model.add(Act()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 256)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*128/4)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Conv(cf.kern_size, 256)) + #model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*256)) + model.add(BatchNormalization()) + model.add(Act()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 512)) + # model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*256/4)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Conv(cf.kern_size, 512)) + # model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*512)) + model.add(BatchNormalization()) + model.add(Act()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + # Dense Layers + model.add(Flatten()) + model.add(Dens(1024)) + # model.add(Quant((pow(2,cf.abits)-1)*(cf.dim/2)*(cf.dim/2)*512)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Dens(1024)) + # model.add(Quant((pow(2,cf.abits)-1)*1024)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Dens(cf.classes)) + # model.add(Quant((pow(2,cf.abits)-1)*1024)) + model.add(BatchNormalization()) + model.add(Activation('softmax')) + + # VGG-16 variant (Jia et al., JSSC 2020) + elif(model_type == 'Jia_2020'): + print("VGG-16 variant (Jia et al., JSSC'20) selected..."); + + model.add(Conv_(cf.kern_size, 128,cf.dim,cf.channels)) + model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*cf.channels)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Conv(cf.kern_size, 128)) + model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*128)) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(BatchNormalization(momentum=0.1, epsilon=0.0001)) + model.add(Act()) + + model.add(Conv(cf.kern_size, 256)) + model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*256/4)) + model.add(BatchNormalization(momentum=0.1, epsilon=0.0001)) + model.add(Act()) + + model.add(Conv(cf.kern_size, 256)) + model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*256)) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(BatchNormalization(momentum=0.1, epsilon=0.0001)) + model.add(Act()) + + model.add(Conv(cf.kern_size, 256)) + model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*256/4)) + model.add(BatchNormalization(momentum=0.1, epsilon=0.0001)) + model.add(Act()) + + model.add(Conv(cf.kern_size, 256)) + model.add(Quant((pow(2,cf.abits)-1)*cf.kern_size*cf.kern_size*256)) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(BatchNormalization(momentum=0.1, epsilon=0.0001)) + model.add(Act()) + + model.add(Flatten()) + model.add(Dens(1024)) + model.add(Quant((pow(2,cf.abits)-1)*(cf.dim/2)*(cf.dim/2)*256)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Dens(1024)) + model.add(Quant((pow(2,cf.abits)-1)*1024)) + model.add(BatchNormalization()) + model.add(Act()) + + model.add(Dens(cf.classes)) + model.add(Quant((pow(2,cf.abits)-1)*1024)) + model.add(BatchNormalization()) + model.add(Activation('softmax')) + + # Raise error on unsupported model type + else: + raise NameError('Network topology not supported, please select a valid one !\n') + + model.summary() + return model + +### Return the input size of all (unique) layers if a model ### +def getModelSize(modelType,cf): + if(modelType == 'TEST_MLP'): + first_dim = cf.dim*cf.dim*cf.channels; + model_size = np.array([first_dim]); + elif(modelType == 'MLP_512'): + model_size = np.array([first_dim,512]); + elif(modelType == '2C2D'): + first_size = cf.kern_size*cf.kern_size*cf.channels; + model_size = np.array([first_size, + cf.kern*cf.kern*64, + cf.dim/2*cf.dim/2*64, + 512]); + elif(modelType == 'BinaryNet'): + first_size = cf.kern_size*cf.kern_size*cf.channels; + model_size = np.array([first_size, + cf.kern_size*cf.kern_size*64, + cf.kern_size*cf.kern_size*64/4, + cf.kern_size*cf.kern_size*128, + cf.kern_size*cf.kern_size*128/4, + cf.kern_size*cf.kern_size*256, + cf.kern_size*cf.kern_size*256/4, + cf.kern_size*cf.kern_size*512, + cf.dim/2*cf.dim/2*512, + 1024, + 1024]); + else: + raise NameError('Network topology not supported, please select a valid one !\n'); + + return model_size; + +### Return a 1D-array with the position indexes of the FC/CONV layers in the selected sequential CNN ### +def getIndexOut(modelType): + if(modelType == 'TEST_MLP'): + indX_out = np.array([0,3]); + elif(modelType == 'MLP_three_stage_dbn'): + indX_out = np.array([0,4,8]); + elif(modelType == 'MLP_512'): + indX_out = np.array([0,4,8,12,16]); + elif(modelType == '2C2D'): + indX_out = np.array([0,3,8,12]); + elif(modelType == 'Valavi_2019_MNIST'): + indX_out = np.array([0,3,7,10,16]); + else: + raise NameError('Network topology not supported, please implement it or select a valid one !\n'); + + return indX_out; + \ No newline at end of file diff --git a/models/model_IMC.py b/models/model_IMC.py index d9026db8d773940591242544e75bfb60c986a049..0dabd287de5ac31495935e9eac979e365ad36656 100644 --- a/models/model_IMC.py +++ b/models/model_IMC.py @@ -5,16 +5,11 @@ from keras.layers.advanced_activations import LeakyReLU from keras.regularizers import l2 import numpy as np -from layers.custom_regu import Reg_abn_out, Reg_l2_p -#from layers.analog_BN_current_model import Analog_BN -from layers.analog_BN_current_interp_PL import Analog_BN -from layers.binary_layers_IMC import BinaryConv2D,BinaryDense +from layers.analog_BN_current_model import Analog_BN as Analog_BN_ideal +from layers.analog_BN_current_interp_PL import Analog_BN as Analog_BN_nonideal from layers.quantized_layers_IMC import QuantizedConv2D,QuantizedDense -from layers.quantized_layers_IMC_ABN import QuantizedDenseABN +#from layers.quantized_layers_IMC_ABN import QuantizedDenseABN from layers.quantized_ops import my_quantized_relu as quantize_op -from layers.binary_ops import binary_tanh as binary_tanh_op -from layers.binary_ops import binary_sigmoid as binary_sigmoid_op -from layers.binary_ops import binary_sigmoid_abn, binary_sigmoid_p, binary_tanh, binary_tanh_p from models.ADC import quant_uni,Quant_train from models.makeModel import make_model # Hardware parameters generation @@ -23,13 +18,18 @@ from utils.config_hardware_model import genHardware from copy import deepcopy -def build_model(cf,model_type,sramInfo,EN_NOISE,EN_QUANT,ABN_INC_ADC): +def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): # Useful build variables IAres = sramInfo.IAres; Wres = sramInfo.Wres; OAres = sramInfo.OAres; - dynRange = sramInfo.VDD.data-0.108-0.04; # To be updated --> incorporate quantization directly inside IMC layer, with an EN flag - H = 1. + dynRange = sramInfo.VDD.data; + H = 1.; + # Retrieve flags + FLAG_PL = FLAGS[0]; + FLAG_QUANT = FLAGS[1]; + IDEAL_ABN = FLAGS[2]; + ABN_INC_ADC = FLAGS[3]; print('###################################################') print('########### BUILDING CIM-SRAM NETWORK #############') @@ -42,26 +42,26 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,EN_QUANT,ABN_INC_ADC): if cf.network_type =='float': Conv_ = lambda s, f, i, c: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', - kernel_regularizer=l2(cf.kernel_regularizer),input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + kernel_regularizer=l2(cf.kernel_regularizer),input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) Conv = lambda s, f: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', - kernel_regularizer=l2(cf.kernel_regularizer),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + kernel_regularizer=l2(cf.kernel_regularizer),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) Act = lambda: LeakyReLU() Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,OAres=OAres,offset=0.5*dynRange/n)) Dens_FP = lambda n: Dense(n,use_bias=False) - Dens = lambda n: Dense(n,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + Dens = lambda n: Dense(n,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) - Dens_ = lambda n,i,c: Dense(n,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + Dens_ = lambda n,i,c: Dense(n,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) elif cf.network_type=='qnn': Conv_ = lambda s,f,i,c,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) Conv = lambda s,f,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - if(EN_QUANT): + kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) + if(FLAG_QUANT): # Act = lambda: LeakyReLU() Act = lambda: Activation(lambda x: quant_relu(x,IAres=IAres)) else: @@ -73,20 +73,20 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,EN_QUANT,ABN_INC_ADC): Dens_FP = lambda n: Dense(n,use_bias=False) - Dens = lambda n,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + Dens = lambda n,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) - Dens_ = lambda n,i,c,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + Dens_ = lambda n,i,c,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) elif cf.network_type=='full-qnn': Conv_ = lambda s,f,i,c,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) Conv = lambda s,f,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) Conv_FP_ = lambda s, f, i, c: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer),input_shape = (i,i,c),use_bias=False) - if(EN_QUANT): + if(FLAG_QUANT): Act = lambda: Activation(lambda x: quant_relu(x,IAres=IAres)) else: # Act = lambda: Activation(lambda x: binary_sigmoid_abn(x,sramInfo.VDD.data)) @@ -98,84 +98,44 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,EN_QUANT,ABN_INC_ADC): Dens_FP = lambda n: Dense(n,use_bias=False) - Dens = lambda n,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - - Dens_ = lambda n,i,c,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - elif cf.network_type=='full-qnn-embedded': - Conv_ = lambda s,f,i,c,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', - activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - Conv = lambda s,f,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', - activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - if(EN_QUANT): - Act = lambda: Activation(lambda x: quant_relu(x,IAres=IAres)) - else: - # Act = lambda: Activation(lambda x: binary_sigmoid_abn(x,sramInfo.VDD.data)) - Act = lambda: Quant_train(sramInfo) - # Act = lambda: Activation(lambda x: quant_uni(x,maxVal=0,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0,archType=sramInfo.arch.name)); - - # Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.5*dynRange/n,archType=sramInfo.arch.name)) - Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.,archType=sramInfo.arch.name)) - - Dens_FP = lambda n: Dense(n,use_bias=False) - - Dens = lambda n,m: QuantizedDenseABN(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT,m_sigma=4) - - Dens_ = lambda n,i,c,m: QuantizedDenseABN(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT,m_sigma=4) - elif cf.network_type=='bnn': - Conv_ = lambda s, f,i,c: BinaryConv2D(kernel_size=(s, s), H=1, filters=f, strides=(1, 1), padding='same', - activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - Conv = lambda s, f: BinaryConv2D(kernel_size=(s, s), H=1, filters=f, strides=(1, 1), padding='same', - activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - Act = lambda: LeakyReLU() - - # Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,OAres=OAres,offset=0.5*dynRange/n)) - Quant = lambda p: Activation('linear'); - - Dens_FP = lambda n: Dense(n,use_bias=False) - - Dens = lambda n: BinaryDense(n,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - - Dens_ = lambda n,i,c: BinaryDense(n,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - elif cf.network_type=='full-bnn': - Conv_ = lambda s, f,i,c: BinaryConv2D(kernel_size=(s, s), H=1, filters=f, strides=(1, 1), padding='same', - activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - Conv = lambda s, f: BinaryConv2D(kernel_size=(s, s), H=1, filters=f, strides=(1, 1), padding='same', - activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - if(EN_QUANT): - Act = lambda: Activation(lambda x: binary_sigmoid(x)) - else: - # Act = lambda: Activation(lambda x: binary_sigmoid_abn(x,sramInfo.VDD.data)) - Act = lambda: Quant_train(sramInfo) - # Act = lambda: Activation(lambda x: quant_uni(x,maxVal=0,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0,archType=sramInfo.arch.name)); - # Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,OAres=OAres,offset=0.5*dynRange/n)); - Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.,archType=sramInfo.arch.name)) - - Dens_FP = lambda n: Dense(n,use_bias=False) - - Dens = lambda n: BinaryDense(n,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) - - Dens_ = lambda n,i,c: BinaryDense(n,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_QUANT=EN_QUANT) + Dens = lambda n,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + + Dens_ = lambda n,i,c,m: QuantizedDense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) +# elif cf.network_type=='full-qnn-embedded': +# Conv_ = lambda s,f,i,c,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', +# activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), +# kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) +# Conv = lambda s,f,m,k: QuantizedConv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', +# activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), +# kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) +# if(FLAG_QUANT): +# Act = lambda: Activation(lambda x: quant_relu(x,IAres=IAres)) +# else: +# # Act = lambda: Activation(lambda x: binary_sigmoid_abn(x,sramInfo.VDD.data)) +# Act = lambda: Quant_train(sramInfo) +# # Act = lambda: Activation(lambda x: quant_uni(x,maxVal=0,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0,archType=sramInfo.arch.name)); +# +# # Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.5*dynRange/n,archType=sramInfo.arch.name)) +# Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,VDD=sramInfo.VDD.data,OAres=OAres,offset=0.,archType=sramInfo.arch.name)) +# +# Dens_FP = lambda n: Dense(n,use_bias=False) +# +# Dens = lambda n,m: QuantizedDenseABN(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,m_sigma=4) +# +# Dens_ = lambda n,i,c,m: QuantizedDenseABN(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,m_sigma=4) else: print('wrong network type, the supported network types in this repo are float, qnn, full-qnn, bnn and full-bnn') - if(EN_QUANT): + if(FLAG_QUANT): BatchNorm = lambda: BatchNormalization(momentum=0.1,epsilon=1e-5) else: - if(cf.network_type == 'full-qnn-embedded'): - BatchNorm = lambda n,m: Activation('linear'); +# if(cf.network_type == 'full-qnn-embedded'): +# BatchNorm = lambda n,m: Activation('linear'); +# elif(IDEAL_ABN): + if(IDEAL_ABN): + BatchNorm = lambda n,m: Analog_BN_ideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m); else: - BatchNorm = lambda n,m: Analog_BN(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m,EN_NOISE=EN_NOISE - # center=False,scale=False, - # gamma_regularizer=l2(0.001),beta_regularizer=l2(0.001)) - # activity_regularizer=Reg_abn_out(1e-5,sramInfo.VDD.data)) - # activity_regularizer=Reg_l2_p(0.,0.5) - ); + BatchNorm = lambda n,m: Analog_BN_nonideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m,EN_NOISE=EN_NOISE); BatchNorm_FP = lambda: BatchNormalization(momentum=0.1,epsilon=1e-5) diff --git a/train_cim_qnn.py b/train_cim_qnn.py new file mode 100644 index 0000000000000000000000000000000000000000..871695488041a5c5e4284135a767c58dad5ee725 --- /dev/null +++ b/train_cim_qnn.py @@ -0,0 +1,270 @@ +from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, LearningRateScheduler +from tensorflow.keras.optimizers import SGD, Adam +from keras.losses import squared_hinge, categorical_crossentropy +from keras.models import Model +import tensorflow as tf +import keras.backend as K +import numpy as np + +from models.model_IMC import build_model, load_weights +from utils.config_utils import Config +from utils.load_data import load_dataset + +from utils.config_hardware_model import SramInfo +from config.config_cim_cnn_param import* + + +# // Override configuration // +override = {} +override_dir = {} + +for s in override: + s_s = s.split("=") + k = s_s[0].strip() + v = "=".join(s_s[1:]).strip() + override_dir[k]=v +override = override_dir +# Create config object +cf = Config(config_path,cmd_args = override) + + +############################ INTERNAL FUNCTIONS ############################## + +### Generate model ### +def generate_model(data_files,cf,network_struct,sramInfo,FLAGS): + # Retrieve output files + SAVE_EN = FLAGS[0]; + EN_NOISE = FLAGS[1]; + ANALOG_BN = FLAGS[2]; + IS_FL_MLP = FLAGS[3]; + IDEAL_ABN = FLAGS[4] + ABN_INC_ADC = FLAGS[5]; + FLAG_PL = FLAGS[6] + # Weights file + w_file = data_files[1]; + # Retrieve resolution(s) + IAres = sramInfo.IAres; + Wres = sramInfo.Wres; + OAres = sramInfo.OAres; + # Construct the network + print('Construct the Network(s)\n') + + # // Create ideal model // + model = build_model(cf,network_struct,sramInfo,EN_NOISE,[FLAG_PL,not(ANALOG_BN),IDEAL_ABN,ABN_INC_ADC]) + + print('Loading data\n') + train_data, val_data = load_dataset(cf.dataset_name) + + if(IS_FL_MLP): + x_train = train_data[0].reshape(train_data[0].shape[0],train_data[0].shape[1]*train_data[0].shape[2]) + x_test = val_data[0].reshape(val_data[0].shape[0],val_data[0].shape[1]*val_data[0].shape[2]) + train_data = (x_train,train_data[1]) + val_data = (x_test,val_data[1]) + + # learning rate schedule + def scheduler(epoch): + if epoch == cf.decay_at_epoch: + index = cf.decay_at_epoch.index(epoch) + factor = cf.factor_at_epoch[index] + lr = K.get_value(model.optimizer.lr) + IT = train_data[0].shape[0]/cf.batch_size + current_lr = lr * (1./(1.+cf.decay*epoch*IT)) + K.set_value(model.optimizer.lr,current_lr*factor) + print('\nEpoch {} updates LR: LR = LR * {} = {}\n'.format(epoch+1,factor, K.get_value(model.optimizer.lr))) + return K.get_value(model.optimizer.lr) + + lr_decay = LearningRateScheduler(scheduler) + + + #sgd = SGD(lr=cf.lr, decay=cf.decay, momentum=0.9, nesterov=True) + adam= Adam(lr=cf.lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=cf.decay) + + # Perform training and validation on ideal model + print('Compiling the network\n') + model.compile(loss=categorical_crossentropy,optimizer=adam,metrics=['accuracy']) + if cf.finetune: + print('Load previous weights\n') + model.load_weights(w_file) + else: + print('No weights preloaded, training from scratch selected\n') + # Return created model + return model; + +### Numpy input quantization ### +def quant_input(x,IAres): + # Quantize between 0 and 2^IAres + m = pow(2,IAres)-1; + y = m*(x+1)/2; + return np.around(y,decimals=0); + +### Input pre-processing ### +def process_input(dataset,IS_FL_MLP,precisions): + # Get input resolution + IAres = precisions[0]; + # Get training and testing sets + print('loading data\n') + train_data, test_data = load_dataset(dataset) + # Reshape for first layer FC + if(IS_FL_MLP): + x_train = train_data[0].reshape(train_data[0].shape[0],train_data[0].shape[1]*train_data[0].shape[2]) + x_test = test_data[0].reshape(test_data[0].shape[0],test_data[0].shape[1]*test_data[0].shape[2]) + train_data = (x_train,train_data[1]) + test_data = (x_test,test_data[1]) + # Quantize inputs + x_train = quant_input(train_data[0],IAres); + x_test = quant_input(test_data[0],IAres); + train_data = (x_train,train_data[1]) + test_data = (x_test,test_data[1]) + return(train_data,test_data); + +### Train and evaluate model ### +def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): + # // Local variables // + # Retrieve resolution(s) + IAres = precisions[0] + Wres = precisions[1] + OAres = precisions[2] + # Retrieve inputs + train_data = input_data[0]; + test_data = input_data[1]; + # Retrieve output files + acc_file = data_files[0]; + w_file = data_files[1]; + in_file = data_files[2]; + out_file = data_files[3]; + inference_file = data_files[4]; + + # // Iterative training // + # BN weights storage + weightsTensorVec = []; + # Average on numerous trainings + acc_iter = []; acc_max = 0; + best_model = None; + for s in range(Niter): + # // Create callbacks // + print('Setting up the network and creating callbacks\n') + early_stop = EarlyStopping(monitor='loss', min_delta=0.001, patience=10, mode='min', verbose=1) + checkpoint = ModelCheckpoint(w_file, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max', save_freq='epoch') + tensorboard = TensorBoard(log_dir='./logs/' + dataset_name+network_type+network_struct, histogram_freq=0, write_graph=True, write_images=False) + + # // Learning rate schedule // + def scheduler(epoch): + if epoch == decay_at_epoch: + index = decay_at_epoch.index(epoch) + factor = factor_at_epoch[index] + lr = K.get_value(model.optimizer.lr) + IT = train_data[0].shape[0]/batch_size + current_lr = lr * (1./(1.+decay*epoch*IT)) + K.set_value(model.optimizer.lr,current_lr*factor) + print('\nEpoch {} updates LR: LR = LR * {} = {}\n'.format(epoch+1,factor, K.get_value(model.optimizer.lr))) + return K.get_value(model.optimizer.lr) + # Create LR scheduler using this custom scheduling + lr_decay = LearningRateScheduler(scheduler) + + # // Train the model // + print('### Training the network ###\n') + history = model.fit(train_data[0],train_data[1], + batch_size = batch_size, + epochs = epochs, + verbose = progress_logging, + callbacks = [checkpoint, tensorboard,lr_decay], + validation_split = 0.15, + workers = 4, + use_multiprocessing = True + ); + + # Test model + print('### Training done ! Evaluating on test data... ###') + history_eval = model.evaluate(test_data[0],test_data[1], + batch_size = batch_size, + verbose = progress_logging + ) + # Get model weights and retrieve those of BN layers + weights_temp = model.get_weights(); + weightsTensorVec.append(weights_temp); + + # Get outputs of each layer_outputs + Nlayers = len(model.layers); + data_out = []; + for i in range(Nlayers): + partial_model = Model(model.input,model.layers[i].output); + data_out.append(partial_model(test_data[0],training=False)); + + # Print accuracy for this iteration + acc_train = history.history['accuracy'][-1] + acc_val = history.history['val_accuracy'][-1] + acc_test = history_eval[-1] + print('Evaluate done for iter #'+str(s)) + print(f'Training/Validation/Test accuracy: {100*acc_train:.2f}%/{100*acc_val:.2f}%/{100*acc_test:.2f}%') + # Store accuracy + acc_iter.append(np.array([acc_train,acc_val,acc_test])); + # Keep best model to extract weights and outputs from + if(acc_test > acc_max): + acc_max = acc_test; + best_model = model; + # Compute accuracy mean over the iterations to obain a robust result + acc_iter = np.stack(acc_iter,axis=1); + acc_mean = np.sum(acc_iter,axis=1)/Niter + print('###################################################################') + print('IMC model - average {}b-IA, {}b-W, {}b-OA test accuracy: {:.5f}'.format(IAres,Wres,OAres,acc_mean[2])) + print('###################################################################') + + # // Save results from best model across all iterations // + if(SAVE_EN): + fileID = open(acc_file,'w') + fileID.write("acc_train,acc_val,acc_test\n"); + for i in range(Niter): + fileID.write("{:.5f},{:.5f},{:.5f}\n".format(acc_iter[0,i],acc_iter[1,i],acc_iter[2,i])); + fileID.write("{:.5f},{:.5f},{:.5f}\n".format(acc_mean[0],acc_mean[1],acc_mean[2])); + fileID.close() + # Save inputs + with open(in_file,"w") as f: + np.savetxt(f,np.reshape(test_data[0][0:Nimg_save],(-1,1)),fmt='%d'); + # Save outputs + Nlayers = len(best_model.layers); indL = 0; + for i in range(Nlayers): + # Get desired layer outputs + partial_model = Model(best_model.input,best_model.layers[i].output); + data_out = partial_model(test_data[0][0:Nimg_save],training=False); + # Write outputs to file, if ADC output only + #if(i==6 or i==7 or i==8): + # print(data_out) + if(i==2 or i==6 or i==9): + out_file_temp = out_file+"_layer_{}.txt".format(indL); + indL = indL+1; + with open(out_file_temp,"w") as f: + np.savetxt(f,np.reshape(data_out,(-1,1)),fmt='%f'); + # Save inference result + with open(inference_file,"w") as f: + indResult = np.argmax(test_data[1][0:Nimg_save],axis=-1); + np.savetxt(f,np.reshape(indResult,(-1,1)),fmt='%d'); + # Save weights + best_model.save_weights(w_file); + + return; + +########################## IN/OUT FILES ############################# +# Fill output files name in and concat +acc_file = path_to_out+acc_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); +w_file = path_to_out+w_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); +in_file = path_to_out+in_file_template.format(dataset_name,IAres); +out_file = path_to_out+out_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); +inference_file = path_to_out+inference_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); +data_files = [acc_file,w_file,in_file,out_file,inference_file]; + +########################## GENERATE CIM-QNN MODEL ######################### +# Concatenate flags +FLAGS = [SAVE_EN,EN_NOISE,ANALOG_BN,IS_FL_MLP,IDEAL_ABN,ABN_INC_ADC,FLAG_PL]; +# Generate hardware information +sramInfo = SramInfo(arch,tech,typeT,VDD,BBN,BBP,IAres,Wres,OAres,r_gamma,r_beta,Nrows,[IS_EMBEDDED,ABN_INC_ADC]); +sramInfo.simulator = simulator; +# Create model and check if successful +model = generate_model(data_files,cf,network_struct,sramInfo,FLAGS); + +########################## TRAIN & TEST ON PRE-DEFINED MODEL ######################### +# Concat precision info +precisions = [IAres,Wres,OAres]; +# // Pre-process input // +input_data = process_input(dataset_name,IS_FL_MLP,precisions); +# // Train and eval // +train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN); diff --git a/utils/config_hardware_model.py b/utils/config_hardware_model.py new file mode 100644 index 0000000000000000000000000000000000000000..f568d3bedcf5733de53d17f31ed4d3827abbdaf7 --- /dev/null +++ b/utils/config_hardware_model.py @@ -0,0 +1,496 @@ +######################################################## +######### DEFINE HARDWARE-DESCRIBING CLASSES ########### +######################################################## +import sys +import numpy as np +import tensorflow as tf +from utils.linInterp import makeLookup2D, makeLookup3D + + +class SramInfo: + def __init__(self,arch,tech,typeT,VDD,BBN,BBP,IAres,Wres,OAres,r_gamma,r_beta,Nrows,CONF_FLAGS): + # // Retrieve config flags // + IS_EMBEDDED = CONF_FLAGS[0]; + ABN_INC_ADC = CONF_FLAGS[1]; + + # // Software options // + # Simulator + self.simulator = "eldo"; + # LUT files location + dir_LUT = './LUTs'; + # Noisy foward path in training + self.noise_at_inf = True; + # T_DP fixed by hardware + self.is_const_T_DP = SpiceObj(name="is_const_T_DP",data=True); + # Analog DP type: model or numerical + self.IS_NUM = True; + + # // Architecture information // + # IN/WEIGHT/OUT Resolution + self.IAres = IAres; # [bits] + self.Wres = Wres; # [bits] + self.OAres = OAres; # [bits] + # ABN resolution + self.r_gamma = r_gamma # [bits] + self.r_beta = r_beta # [bits] + # SRAM architecture & techno information (required by ELDO) + self.arch = SpiceObj(name=arch) # String ('6T','8T',...) + self.tech = SpiceObj(name=tech); # String ('ST65nmBulk',...) + self.typeT = SpiceObj(name=typeT); # String ('RVT','LVT',...) + # Input conversion type + self.inputType = SpiceObj(name="PWM"); # String ('DAC','PWM',...) + # Size information + if(IS_EMBEDDED): + self.Nrows = SpiceObj('Nrows',Nrows+2*(2**r_beta)); # [bits] + else: + self.Nrows = SpiceObj('Nrows',Nrows); # [bits] + self.NB = SpiceObj('NB',0); # [bits] + # Supply information + self.VDD = SpiceObj('VDD_VAL',VDD); # [V] + self.GND = SpiceObj('GND_VAL',0); + self.BBN = SpiceObj('BBN_VAL',BBN); + self.BBP = SpiceObj('BBP_VAL',BBP); + + # // Analog DP // + # --- Hardware data --- + # Supply voltage on BL/BLB + self.VDD_BL = SpiceObj('VDD_BL',VDD/2); + # Timing information + # self.T_DP_vec = 1e-9*np.array([1.7,2.6,3.5,4.4,5.3,6.2,7.1,8.0]); self.T_DP_vec = self.T_DP_vec.astype("float32"); + self.T_DP_vec = np.arange(8); self.T_DP_vec = self.T_DP_vec.astype("float32"); + self.T_DP = SpiceObj('T_DP',0.8e-9); # [s] + if(IS_EMBEDDED): + self.T_DP.data = (Nrows+2*(2**r_beta))/Nrows*self.T_DP.data; + self.T_rise = SpiceObj('T_rise',25e-12) # [s] + self.T_fall = SpiceObj('T_fall',25e-12) # [s] + self.T_start = SpiceObj('T_start',0.1e-9) # [s] + # DR & activities + self.DR = SpiceObj('DR',0.95*self.VDD_BL.data) # [V] + self.act_BL = SpiceObj('act_BL',1) + if(tech == 'ST65nmBulk_LP'): + if(arch == '6T'): + if(self.typeT.name == 'LVT'): + act_WL_map = np.array([[0.6,0.7,0.8,0.9,1.0,1.1,1.2], + [0.470,0.507,0.522,0.531,0.531,0.552,0.537]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + elif(self.typeT.name == 'RVT'): + act_WL_map = np.array([[0.6,0.7,0.8,0.9,1.0,1.1,1.2], + [0.539,0.563,0.575,0.575,0.580,0.573,0.563]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + elif(self.typeT.name == 'HVT'): + act_WL_map = np.array([[0.6,0.7,0.8,0.9,1.0,1.1,1.2], + [0.564,0.564,0.564,0.564,0.564,0.564,0.564]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + else: + raise NameError('Error: selected transistor type does not exist in 65ST LP technology'); + elif(arch == '8T'): + act_WL_val = np.array[1.0]; + else: + raise NameError('Error: selected architecture (cell type) not supported !\n') + elif(tech == 'ST65nmBulk_GP'): + if(arch == '6T'): + if(self.typeT.name == 'LVT'): + act_WL_map = np.array([[0.5,0.6,0.7,0.8,0.9,1.0], + [0.459,0.459,0.459,0.459,0.459,0.459]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + elif(self.typeT.name == 'RVT'): + act_WL_map = np.array([[0.5,0.6,0.7,0.8,0.9,1.0], + [0.503,0.503,0.503,0.503,0.503,0.503]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + elif(self.typeT.name == 'HVT'): + act_WL_map = np.array([[0.5,0.6,0.7,0.8,0.9,1.0], + [0.534,0.534,0.534,0.534,0.534,0.534]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + else: + raise NameError('Error: selected transistor type does not exist in 65ST LP technology'); + elif(arch == '8T'): + act_WL_val = np.array[1.0]; + else: + raise NameError('Error: selected architecture (cell type) not supported !\n') + elif(tech == 'ST28nmFDSOI'): + if(arch == '6T'): + if(self.typeT.name == 'RVT'): + act_WL_map = np.array([[0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9], + [0.49,0.47,0.47,0.48,0.5,0.51,0.52,0.525,0.525,0.525]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + else: + raise NameError('Error: selected transistor type does not exist in 65ST LP technology'); + elif(arch == '8T'): + act_WL_val = np.array[1.0]; + else: + raise NameError('Error: selected architecture (cell type) not supported !\n') + elif(tech == 'ST130nmBulk'): + if(arch == '6T'): + if(self.typeT.name == 'RVT'): + act_WL_map = np.array([[0.6,0.7,0.8,0.9,1.0,1.1,1.2], + [0.49,0.497,0.494,0.49,0.48,0.465,0.465]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + else: + raise NameError('Error: selected transistor type does not exist in 65ST LP technology'); + elif(arch == '8T'): + act_WL_val = np.array[1.0]; + else: + raise NameError('Error: selected architecture (cell type) not supported !\n') + elif(tech == 'UMC180nmBulk'): + if(arch == '6T'): + if(self.typeT.name == 'RVT'): + act_WL_map = np.array([[0.9,1.0,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8], + [0.542,0.528,0.515,0.502,0.491,0.482,0.473,0.466,0.461,0.457]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + else: + raise NameError('Error: selected transistor type does not exist in 65ST LP technology'); + elif(tech == 'GF22nmFDX'): + if(arch == '6T'): + if(self.typeT.name == 'LVT'): + act_WL_map = np.array([[0.4,0.5,0.6,0.7,0.8], + [0.52,0.52,0.52,0.52,0.52]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + elif(self.typeT.name == 'RVT'): + act_WL_map = np.array([[0.65,0.8], + [0.5,0.5]]); + act_WL_val = act_WL_map[1][act_WL_map[0] == VDD]; + else: + raise NameError('Error: selected transistor type does not exist in 65ST LP technology'); + elif(arch == '8T'): + act_WL_val = np.array[1.0]; + else: + raise NameError('Error: selected architecture (cell type) not supported !\n') + self.act_WL = SpiceObj('act_WL',act_WL_val) + # Effective bit-0 voltage level (at BL = VDD) + self.V_Q0 = SpiceObj('V_Q0',0.0064); + # Parasitic cap/cell + if(tech == 'ST65nmBulk_LP'): + self.C_LBL = SpiceObj('C_LBL',50e-15/256) # [F] + elif(tech == 'ST65nmBulk_GP'): + self.C_LBL = SpiceObj('C_LBL',50e-15/256) # [F] + elif(tech == 'ST28nmFDSOI'): + self.C_LBL = SpiceObj('C_LBL',25e-15/256) # [F] + elif(tech == 'ST130nmBulk_GP'): + self.C_LBL = SpiceObj('C_LBL',90e-15/256) # [F] + elif(tech == 'UMC180nmBulk'): + self.C_LBL = SpiceObj('C_LBL',120e-15/256) # [F] + elif(tech == 'GF22nmFDX'): + self.C_LBL = SpiceObj('C_LBL',20e-15/256) # [F] + else: + raise NameError('Selected technology not supported !') + # Transistor sizes + if(tech == 'ST65nmBulk_LP'): + W_inv = 135e-9; # [m] + W_acc = 135e-9; # [m] + W_rbf = 135e-9; # [m] + L_inv = 60e-9; # [m] + L_acc = 60e-9; # [m] + L_rbf = 60e-9; # [m] + elif(tech == 'ST65nmBulk_GP'): + W_inv = 135e-9; + W_acc = 135e-9; + W_rbf = 135e-9; + L_inv = 60e-9; + L_acc = 60e-9; + L_rbf = 60e-9; + elif(tech == 'ST28nmFDSOI'): + W_inv = 80e-9; + W_acc = 80e-9; + W_rbf = 80e-9; + L_inv = 30e-9; + L_acc = 30e-9; + L_rbf = 30e-9; + elif(tech == 'ST130nmBulk_GP'): + W_inv = 150e-9; + W_acc = 150e-9; + W_rbf = 150e-9; + L_inv = 130e-9; + L_acc = 130e-9; + L_rbf = 130e-9; + elif(tech == 'UMC180nmBulk'): + W_inv = 240e-9; + W_acc = 240e-9; + W_rbf = 240e-9; + L_inv = 180e-9; + L_acc = 180e-9; + L_rbf = 180e-9; + elif(tech == 'GF22nmFDX'): + W_inv = 80e-9; + W_acc = 80e-9; + W_rbf = 80e-9; + L_inv = 20e-9; + L_acc = 20e-9; + L_rbf = 20e-9; + else: + raise NameError('Selected technology not supported !') + self.W_inv = SpiceObj('W_inv',W_inv) + self.W_acc = SpiceObj('W_acc',W_acc) + self.W_rbf = SpiceObj('W_rbf',W_rbf) + self.L_inv = SpiceObj('L_inv',L_inv) + self.L_acc = SpiceObj('L_acc',L_acc) + self.L_rbf = SpiceObj('L_rbf',L_rbf) + # Temperature + self.tempCelsius = SpiceObj(data=25); # [Celsius] + # Multipliticty parameter + self.multON = SpiceObj(name='multON'); + # Bitcell data parameter + self.storedWeight = SpiceObj(name='VQ'); + # --- BL/BLB post-layout LUT --- + # - Nominal result - + path_dir = dir_LUT+'/DP/6T_BL_1152cells_LUT_PL_int_base.txt'; + #path_dir = dir_LUT+'<insert_DP_TF_filename>.txt'; + BL_vec = np.arange(0,(2**IAres-1)*Nrows+1,(2**IAres-1)*8); + Ndp = Nrows//8+1; + # Get & reshape data + data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); + # temp_LUT = np.reshape(data_temp,(np.size(self.T_DP_vec),Ndp,Ndp,4)); temp_LUT = np.swapaxes(temp_LUT,0,2); + temp_LUT = np.reshape(data_temp,(np.size(self.T_DP_vec),Ndp,4)); + temp_LUT = temp_LUT.astype("float32"); temp_LUT = np.flip(temp_LUT,axis=-1); + # Make 3D lookup of linear interpolations +# DP_LUT_0 = makeLookup3D(np.squeeze(temp_LUT[...,0]),BL_vec,BL_vec,self.T_DP_vec); +# DP_LUT_1 = makeLookup3D(np.squeeze(temp_LUT[...,1]),BL_vec,BL_vec,self.T_DP_vec); +# DP_LUT_2 = makeLookup3D(np.squeeze(temp_LUT[...,2]),BL_vec,BL_vec,self.T_DP_vec); +# DP_LUT_3 = makeLookup3D(np.squeeze(temp_LUT[...,3]),BL_vec,BL_vec,self.T_DP_vec); +# self.DP_LUT = tf.stack([DP_LUT_0,DP_LUT_1,DP_LUT_2,DP_LUT_3],axis=3); + BL_LUT_0 = makeLookup2D(np.squeeze(temp_LUT[...,0]),self.T_DP_vec,BL_vec); BLB_LUT_0 = BL_LUT_0 + BL_LUT_1 = makeLookup2D(np.squeeze(temp_LUT[...,1]),self.T_DP_vec,BL_vec); BLB_LUT_1 = BL_LUT_1 + BL_LUT_2 = makeLookup2D(np.squeeze(temp_LUT[...,2]),self.T_DP_vec,BL_vec); BLB_LUT_2 = BL_LUT_2 + BL_LUT_3 = makeLookup2D(np.squeeze(temp_LUT[...,3]),self.T_DP_vec,BL_vec); BLB_LUT_3 = BL_LUT_3 + self.BL_LUT = tf.stack([BL_LUT_0,BL_LUT_1,BL_LUT_2,BL_LUT_3],axis=3); + self.BLB_LUT = tf.stack([BLB_LUT_0,BLB_LUT_1,BLB_LUT_2,BLB_LUT_3],axis=3); + + # - Deviation per DP result - + path_dir = dir_LUT + '/DP/6T_BL_1152cells_LUT_PL_int_base_dev.txt'; + #path_dir = dir_LUT+'/DP/<insert_DP_dev_filename>.txt' + # Get & reshape data + data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); + # temp_LUT = np.reshape(data_temp,(np.size(self.T_DP_vec),Ndp,Ndp,4)); temp_LUT = np.swapaxes(temp_LUT,0,2); + temp_LUT = np.reshape(data_temp,(np.size(self.T_DP_vec),Ndp,4)); + temp_LUT = temp_LUT.astype("float32"); temp_LUT = np.flip(temp_LUT,axis=-1); +# # Make 3D lookup of linear interpolations + sig_BL_LUT_0 = makeLookup2D(np.squeeze(temp_LUT[...,0]),self.T_DP_vec,BL_vec); sig_BLB_LUT_0 = sig_BL_LUT_0 + sig_BL_LUT_1 = makeLookup2D(np.squeeze(temp_LUT[...,1]),self.T_DP_vec,BL_vec); sig_BLB_LUT_1 = sig_BL_LUT_1 + sig_BL_LUT_2 = makeLookup2D(np.squeeze(temp_LUT[...,2]),self.T_DP_vec,BL_vec); sig_BLB_LUT_2 = sig_BL_LUT_2 + sig_BL_LUT_3 = makeLookup2D(np.squeeze(temp_LUT[...,3]),self.T_DP_vec,BL_vec); sig_BLB_LUT_3 = sig_BL_LUT_3 + self.sig_BL_LUT = tf.stack([sig_BL_LUT_0,sig_BL_LUT_1,sig_BL_LUT_2,sig_BL_LUT_3],axis=3); + self.sig_BLB_LUT = tf.stack([sig_BLB_LUT_0,sig_BLB_LUT_1,sig_BLB_LUT_2,sig_BLB_LUT_3],axis=3); +# self.sig_BL_LUT = None; +# self.sig_BLB_LUT = None; + + # // DTSE information // + # --- Hardware data --- + self.VDD_DTSE = VDD/2; + self.C_int_dtse = SpiceObj('C_int_dtse',1e-15); # LSB cap, hence 8f/8 = 1f + self.C_L_dtse = SpiceObj('C_L_dtse',1e-15*np.array([7.74812,7.886,8.169,7.981])+1.5e-15); + # --- DTSE post-layout LUT --- + path_dir = dir_LUT+'/DTSE/fitCoef_DTSE.txt'; + #path_dir = dir_LUT+'<path_to_DTSE_filename>.txt'; + data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); + self.DTSE_LUT = np.reshape(data_temp.astype("float32"),(3,4,4)); + self.DTSE_LUT = np.swapaxes(self.DTSE_LUT,1,2); + + # // ABN information // + # --- Hardware data --- + # Supply voltage + self.VDD_ABN = VDD/2; + self.Vmax_beta_g = 0.05; + self.Vmin_beta_g = -0.04; + self.Vmax_beta_l = 0.1/2.75; + self.Vmin_beta_l = 0.0125/2.75; + + # Timing + self.T_ABN = SpiceObj(name='T_ABN',data=0.1e-9); + # Load capacitance + self.C_ABN = SpiceObj(name='C_ABN',data=4*2.12e-15); + # Output parasitic capacitance + self.C_paras_ABN = SpiceObj(name='C_paras',data=0); + # Transistor sizing + self.W_ABN = SpiceObj(name='W_ABN',data=300e-9); + self.L_ABN = SpiceObj(name='L_ABN',data=100e-9); + self.W_PRE = SpiceObj(name='W_PRE',data=200e-9); + self.L_PRE = SpiceObj(name='L_PRE',data=40e-9); + # --- ABN post-layout LUT --- + Nabn = 401; + path_dir = dir_LUT+'/ABN/mean_TF_ABN_PL_int.txt'; + #path_dir = dir_LUT+'<path_to_ABN_TF_filename>.txt'; + # Get & reshape data + data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); + temp_LUT = np.reshape(data_temp,(2**r_gamma,Nabn,8)); + if(ABN_INC_ADC): + temp_LUT = temp_LUT[...,4:8]; + else: + temp_LUT = temp_LUT[...,0:4]; + temp_LUT = temp_LUT.astype("float32"); temp_LUT = np.flip(temp_LUT,axis=-1); + # Make 2D lookup of linear interpolations + ABN_LUT_0 = makeLookup2D(np.squeeze(temp_LUT[...,0]),np.arange(0,2**r_gamma),np.linspace(0,self.VDD_DTSE,Nabn)); + ABN_LUT_1 = makeLookup2D(np.squeeze(temp_LUT[...,1]),np.arange(0,2**r_gamma),np.linspace(0,self.VDD_DTSE,Nabn)); + ABN_LUT_2 = makeLookup2D(np.squeeze(temp_LUT[...,2]),np.arange(0,2**r_gamma),np.linspace(0,self.VDD_DTSE,Nabn)); + ABN_LUT_3 = makeLookup2D(np.squeeze(temp_LUT[...,3]),np.arange(0,2**r_gamma),np.linspace(0,self.VDD_DTSE,Nabn)); + self.ABN_LUT = tf.stack([ABN_LUT_0,ABN_LUT_1,ABN_LUT_2,ABN_LUT_3],axis=3); + # Get the DP voltages corresponding to ABN half-range output + indLookUp = np.argmin(np.abs(temp_LUT - self.VDD_ABN/2),axis=1); + V_DP_half_LUT = indLookUp*(self.VDD_ABN-0)/Nabn; + self.V_DP_half_LUT = V_DP_half_LUT.astype("float32"); + # ABN mismatch + path_dir = dir_LUT+'/ABN/sig_TF_ABN_PL_int.txt'; + #path_dir = dir_LUT+'<path_to_ABN_dev_filename>.txt'; + data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); + temp_LUT = np.reshape(data_temp,(2**r_gamma,Nabn,8)); + if(ABN_INC_ADC): + temp_LUT = temp_LUT[...,4:8]; + else: + temp_LUT = temp_LUT[...,0:4]; + temp_LUT = temp_LUT.astype("float32"); temp_LUT = np.flip(temp_LUT,axis=-1); + # Make 2D lookup of linear interpolations + sig_ABN_LUT_0 = makeLookup2D(np.squeeze(temp_LUT[...,0]),np.arange(0,2**r_gamma),np.linspace(0,self.VDD_DTSE,Nabn)); + sig_ABN_LUT_1 = makeLookup2D(np.squeeze(temp_LUT[...,1]),np.arange(0,2**r_gamma),np.linspace(0,self.VDD_DTSE,Nabn)); + sig_ABN_LUT_2 = makeLookup2D(np.squeeze(temp_LUT[...,2]),np.arange(0,2**r_gamma),np.linspace(0,self.VDD_DTSE,Nabn)); + sig_ABN_LUT_3 = makeLookup2D(np.squeeze(temp_LUT[...,3]),np.arange(0,2**r_gamma),np.linspace(0,self.VDD_DTSE,Nabn)); + self.sig_ABN_LUT = tf.stack([sig_ABN_LUT_0,sig_ABN_LUT_1,sig_ABN_LUT_2,sig_ABN_LUT_3],axis=3); + + # --- Get actual BN gain value achieved during ABN --- + path_dir = dir_LUT+'/ABN/devGainABN.txt'; + #path_dir = dir_LUT+'<path_to_ABN_dev_gain_filename>.txt' + data_temp = np.genfromtxt(path_dir,delimiter=" ",skip_header=1,skip_footer=0); + self.gainABN_LUT = data_temp.astype("float32"); + + # // ADC information // + self.VDD_ADC = SpiceObj('VDD_ADC_VAL',VDD); + + # // Resistive ladder information + self.Nconf_beta_ladder = 5; + + +class SpiceObj: + def __init__(self,name=None,data=None): + self.name = name; + self.data = data; + +class Hardware: + def __init__(self,mu,e_ox,t_ox,Ut,eta,Vea,n_body,mu_Vth,sig_Vth,mu_Vt_abn,sig_Vt_abn,C_tot,V_BL0,V_WL0,V_Q0,sramInfo): + # Process parameters + self.mu = mu; # [Vm/s^2] + self.e_ox = e_ox; # [N/C^2] + self.t_ox = t_ox; # [m] + self.Ut = Ut; # [V] + self.Vea = Vea; # [V] + self.eta = eta; # [/] + self.n_body = n_body; # [/] + self.mu_Vth = mu_Vth; # [V] + self.sig_Vth = sig_Vth; # [V] + self.mu_Vt_abn = mu_Vt_abn; # [V] + self.sig_Vt_abn = sig_Vt_abn; #[V] + # Size and parasitics + self.C_tot = C_tot # [F] + # Operation parameters + self.V_BL0 = V_BL0; # [V] + self.V_WL0 = V_WL0; # [V], 1D-vector + self.V_Q0 = V_Q0; # [V] + # Spice-required design space parameters + self.sramInfo = sramInfo + # Initialize hardware curve-fitting parameters + self.a1 = None; + self.a2 = None; + self.a3 = None; + self.a4 = None; + self.b1 = None; + +def genHardware(sramInfo): + # Physical constants + e_0 = 8.85e-12; # Absolute electric permittivity [N/C^2] + Ut = 0.0257; # Thermal voltage [kB*T/q] + # Internal information required + IAres = sramInfo.IAres; + r_beta = sramInfo.r_beta; Ns_beta = 2**r_beta; + Nrows = sramInfo.Nrows.data; + techName = sramInfo.tech.name; + flavorName = sramInfo.typeT.name; + + # TECHNO TYPE + if(techName == '65nm_LP'): + if(flavorName == 'RVT'): + # Techno parameters + mu = 0.031508; + e_ox = 3.9*e_0; + t_ox = 2.3926e-9; + Vea = 2.32; + eta = 999; + n_body = 1.2; + Vdsat = 0.108; + mu_Vth = 0.523; + sig_Vth = 10*0.035; + Vt_abn = mu_Vth; + sig_Vt_abn = sig_Vth; + # Total BL capitalize + C_int = 20.5e-15/256; + C_par = sramInfo.C_LBL.data; + C_tot = Nrows*(C_int+C_par); + # Precharge & Non-linear mapping + V_WL0 = sramInfo.act_WL.data*sramInfo.VDD.data; + V_BL0 = sramInfo.act_BL.data*sramInfo.VDD.data; + V_Q0 = sramInfo.V_Q0.data; + elif(flavorName == 'LVT'): + # Techno parameters + mu = 0.031508; + e_ox = 3.9*e_0; + t_ox = 2.3926e-9; + Vea = 2.033; + eta = 999; + n_body = 1.2; + mu_Vth = 0.40875; + sig_Vth = 0.0336; + Vt_abn = mu_Vth; + sig_Vt_abn = sig_Vth; + # Total BL capitalize (not really techno-relevant, depends on Nrows --> should be cap/cell) + C_int = 20.5e-15/256; + C_par = sramInfo.C_LBL.data; + C_tot = Nrows*(C_int+C_par); + # Precharge & Non-linear mapping + V_WL0 = sramInfo.act_WL.data*sramInfo.VDD.data; + V_BL0 = sramInfo.act_BL.data*sramInfo.VDD.data; + V_Q0 = sramInfo.V_Q0.data; + else: + raise NameError('Selected flavor not supported !\n') + elif(techName == 'GF22nmFDX'): + if(flavorName == 'LVT'): + # Techno parameters + mu = 0.03125; + e_ox = 3.9*e_0; + t_ox = 2.3926e-9; + Vea = 2.033; + eta = 999; + mu_Vth = 0.325; + sig_Vth = 0.035; + Vt_abn = 0.1915-0.03; + sig_Vt_abn = 0.015; + n_body = 1.15; + # Total BL capitalize (not really techno-relevant, depends on Nrows --> should be cap/cell) + C_int = 10e-15/256; + C_par = sramInfo.C_LBL.data; + C_tot = Nrows*(C_int+C_par); + # Precharge & Non-linear mapping + V_WL0 = sramInfo.act_WL.data*sramInfo.VDD.data; + V_BL0 = sramInfo.act_BL.data*sramInfo.VDD_BL.data; + V_Q0 = sramInfo.V_Q0.data; + elif(flavorName == 'RVT'): + # Techno parameters + mu = 0.03125; + e_ox = 3.9*e_0; + t_ox = 2.3926e-9; + Vea = 2.033; + eta = 999; + mu_Vth = 0.495; + sig_Vth = 1*0.018; + Vt_abn = 0.32; + sig_Vt_abn = 0.015; + n_body = 1.15; + # Total BL capitalize (not really techno-relevant, depends on Nrows --> should be cap/cell) + C_int = 10e-15/256; + C_par = sramInfo.C_LBL.data; + C_tot = Nrows*(C_int+C_par); + # Precharge & Non-linear mapping + V_WL0 = sramInfo.act_WL.data*sramInfo.VDD.data; + V_BL0 = sramInfo.act_BL.data*sramInfo.VDD_BL.data; + V_Q0 = sramInfo.V_Q0.data; + else: + raise NameError('Selected flavor not supported !\n') + else: + raise NameError('Selected techno not supported, please select a valid one !\n'); + + technStruct = Hardware(mu,e_ox,t_ox,Ut,eta,Vea,n_body,mu_Vth,sig_Vth,Vt_abn,sig_Vt_abn,C_tot,V_BL0,V_WL0,V_Q0,sramInfo); + return technStruct; diff --git a/utils/config_hardware_num.py b/utils/config_hardware_num.py new file mode 100644 index 0000000000000000000000000000000000000000..946711d1f8ff0a30d90ded4d50c479978c886798 --- /dev/null +++ b/utils/config_hardware_num.py @@ -0,0 +1,83 @@ +######################################################## +######### DEFINE HARDWARE-DESCRIBING CLASSES ########### +######################################################## +import numpy as np + +class SpiceInfo: + def __init__(self,name=None,data=None): + self.name = name; # String + self.data = data; # Numeric + +class SramInfo: + def __init__(self, + # PVT + tempCelsius = 25, + # Architecture + arch = '6T', + Nrows = 256, + NB = 256, + # Technology + tech = 'ST65nmBulk_LP', + flavor = 'RVT', + # Transistors + W_inv = 135e-9, + L_inv = 60e-9, + W_acc = 135e-9, + L_acc = 60e-9, + # Supply voltages + VDD = 1.2, + GND = 0, + BBN = 0, + BBP = 1.2, + # Parasitics + C_LBL = 50e-15/256, + # Dynamic range + DR = 0.95*1.2, + # Activities + act_BL = 1, + act_WL = np.array([0,0.563]), + # Timing information + t_start = 0.5e-9, + t_rise = 1e-12, + t_fall = 1e-12, + T_read = 100e-9, + Tsimu = 500e-9, + # Resolution + IAres = 1, + OAres = 1): + # PVT + self.tempCelsius = SpiceInfo('T',tempCelsius); + # Architecture + self.arch = arch; # String ('6T','8T',...) + self.Nrows = SpiceInfo('Nrows',Nrows); # [cells] + self.NB = SpiceInfo('NB',NB); # [cells] + self.multON = SpiceInfo('multON',NB); # [cells] + # Technology + self.tech = SpiceInfo(tech,None); # String ('ST65nmBulk',...) + self.flavor = SpiceInfo(flavor,None); # String ('RVT','LVT',...) + # Transistors + self.W_inv = SpiceInfo('W_inv',W_inv); # [m] + self.L_inv = SpiceInfo('L_inv',L_inv); # [m] + self.W_acc = SpiceInfo('W_acc',W_acc); # [m] + self.L_acc = SpiceInfo('L_acc',L_acc); # [m] + # Supply voltages + self.VDD = SpiceInfo('VDD_VAL',VDD); # [V] + self.GND = SpiceInfo('GND_VAL',GND); # [V] + self.BBN = SpiceInfo('BBN_VAL',BBN); # [V] + self.BBP = SpiceInfo('BBP_VAL',BBP); # [V] + # Parasitics + self.C_LBL = SpiceInfo('C_LBL',C_LBL); # [F/cell] + # DR & &activities + self.DR = SpiceInfo('DR',DR); # [V] + self.act_BL = SpiceInfo('act_BL',act_BL); # [V/V] + self.act_WL = SpiceInfo('act_WL',act_WL); # [V/V] + # Timing information + self.t_start = SpiceInfo('t_start',t_start); # [s] + self.t_rise = SpiceInfo('t_rise',t_rise); # [s] + self.t_fall = SpiceInfo('t_fall',t_fall); # [s] + self.T_read = SpiceInfo('T_read',T_read); # [s] + self.Tsimu = SpiceInfo('Tsimu',Tsimu); # [s] + # Resolution + self.IAres = IAres; # [bits] + self.OAres = OAres; # [bits] + \ No newline at end of file diff --git a/utils/config_utils.py b/utils/config_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..24633d65064acdc176f9dc488a0a7f9437549864 --- /dev/null +++ b/utils/config_utils.py @@ -0,0 +1,84 @@ + +import warnings + +def import_from(mdl, name): + mdl = __import__(mdl, fromlist=[name]) + return getattr(mdl, name) + + +#required, default (if not required), type, subtype*(if previous type is list or tuple) +parameter_specs = { + 'cpu' :[True, None, bool], + 'epochs' :[True, None, str], + 'network_type' :[True, None, str], + 'finetune' :[True, None, bool], + 'out_wght_path' :[True, None, str], + 'decay' :[True, None, float], + 'lr' :[True, None, float], + 'decay_at_epoch' :[True, None, list, int], + 'factor_at_epoch' :[True, None, list, float], + 'progress_logging' :[True, None, bool], + 'batch_size' :[True, None, int], + 'kernel_lr_multiplier' :[True, None, float], + 'tensorboard_name' :[True, None, str], + 'kernel_regularizer' :[True, None, float], + 'activity_regularizer' :[True, None, float], + 'kern_size' :[False,None,int], + 'dataset_name' :[False, None, int], + 'dim' :[False, None, int], + 'channels' :[False, None, int], + 'classes' :[False, None, int], + } + +def parse_param(param, value): + #todo: support complex types ( (nested) lists/tuples...) + if isinstance(value, parameter_specs[param][2]): + return value + elif not parameter_specs[param][0]: # if not required, check if None + if value in ['None', 'none', '']: + return None + + return parameter_specs[param][2](value) + +class Config: + def __init__(self, cfg, cmd_args = {}): + try: + + for k in parameter_specs: + self.proces_param(k, cfg, cmd_args) + + except ImportError: + print('The configfile you provided ({}) cannot be imported, please verify.'.format(cfg)) + exit(1) + + + self.postprocess() + + def proces_param(self, param, cfg, cmd_args): + if param in cmd_args : + setattr(self, param.lower(), parse_param(param, cmd_args[param])) + elif param.lower() in cmd_args: + setattr(self, param.lower(), parse_param(param, cmd_args[param.lower()])) + else: + try: + setattr(self, param.lower(),import_from('config.{}'.format(cfg), param)) + except AttributeError: + if parameter_specs[param][0]: #if required + raise + else: + setattr(self, param.lower(), parameter_specs[param][1]) + + + def postprocess(self): + #if hasattr(self, 'bits') and self.bits is not None: + # if self.abits is None: + # self.abits=self.bits + # warnings.warn('specialized bits to abits') + # if self.wbits is None: + # self.wbits = self.bits + # warnings.warn('specialized bits to wbits') + #del self.bits #to make sure it is not further used + if hasattr(self, 'class'): + self.clss=getattr(self,'class') + self.out_wght_path = './weights/{}_{}.hdf5'.format(self.dataset_name,self.network_type) + self.tensorboard_name = '{}_{}.hdf5'.format(self.dataset_name,self.network_type) \ No newline at end of file diff --git a/utils/linInterp.py b/utils/linInterp.py new file mode 100644 index 0000000000000000000000000000000000000000..304387469456bc18433bf3caab8f61e3445b79fe --- /dev/null +++ b/utils/linInterp.py @@ -0,0 +1,121 @@ +import numpy as np +import tensorflow as tf +import keras.backend as K + + +# /// Make 2D-lookup of coefficients for bilinear interpolation /// +# /// Bilinear equation: f(x,y) = a0 + a1*x + a2*y + a3*x*y +def makeLookup2D(fVal,x1_vec,x2_vec): + # Vector sizes + N1 = np.size(x1_vec); + N2 = np.size(x2_vec); + # Fill-in lookup for model coefficients + linearLookup = np.zeros((N1-1,N2-1,4)); + for i in range(N1-1): + for j in range(N2-1): + # Compute common den + den = (x1_vec[i]-x1_vec[i+1])*(x2_vec[j]-x2_vec[j+1]); + # Compute coefficients + a0 = fVal[i,j]*x1_vec[i+1]*x2_vec[j+1]-fVal[i,j+1]*x1_vec[i+1]*x2_vec[j] \ + -fVal[i+1,j]*x1_vec[i]*x2_vec[j+1]+fVal[i+1,j+1]*x1_vec[i]*x2_vec[j]; + a0 = a0/den; + a1 = -fVal[i,j]*x2_vec[j+1]+fVal[i,j+1]*x2_vec[j] \ + +fVal[i+1,j]*x2_vec[j+1]-fVal[i+1,j+1]*x2_vec[j]; + a1 = a1/den; + a2 = -fVal[i,j]*x1_vec[i+1]+fVal[i,j+1]*x1_vec[i+1] \ + +fVal[i+1,j]*x1_vec[i]-fVal[i+1,j+1]*x1_vec[i]; + a2 = a2/den; + a3 = fVal[i,j]-fVal[i,j+1]-fVal[i+1,j]+fVal[i+1,j+1]; + a3 = a3/den; + # Fill lookup + linearLookup[i,j,::] = np.array([a0,a1,a2,a3]); + # Make numpy array into constant tensor + linearLookup = linearLookup.astype("float32"); + linearLookup = tf.constant(linearLookup); + # Return table + return linearLookup; + +# /// Make 3D-lookup of coefficients for trilinear interpolation /// +# /// Bilinear equation: f(x,y,z) = a0 + a1*x + a2*y+ a3*z + a4*x*y + a5*x*z + a6*y*z + a7*x*y*z +def makeLookup3D(fVal,x1_vec,x2_vec,x3_vec): + # Vector sizes + N1 = np.size(x1_vec); + N2 = np.size(x2_vec); + N3 = np.size(x3_vec); + # Vector shifted by 1 + x1_vec_p = np.concatenate((x1_vec[1::],[0])); + x2_vec_p = np.concatenate((x2_vec[1::],[0])); + # Matrix shifted by 1 in the different directions + #fVal_000 = fVal; + # Fill-in lookup for model coefficients + linearLookup = np.zeros((N3-1,N1-1,N2-1,8)); + for k in range(N3-1): + for i in range(N1-1): + for j in range(N2-1): + # Compute common den + den = (x1_vec[i]-x1_vec[i+1])*(x2_vec[j]-x2_vec[j+1])*(x3_vec[k]-x3_vec[k+1]); + # Compute coefficients + a0 = -fVal[i,j,k]*x1_vec[i+1]*x2_vec[j+1]*x3_vec[k+1]+fVal[i,j,k+1]*x1_vec[i+1]*x2_vec[j+1]*x3_vec[k]+fVal[i,j+1,k]*x1_vec[i+1]*x2_vec[j]*x3_vec[k+1]-fVal[i,j+1,k+1]*x1_vec[i+1]*x2_vec[j]*x3_vec[k] \ + + fVal[i+1,j,k]*x1_vec[i]*x2_vec[j+1]*x3_vec[k+1]-fVal[i+1,j,k+1]*x1_vec[i]*x2_vec[j+1]*x3_vec[k]-fVal[i+1,j+1,k]*x1_vec[i]*x2_vec[j]*x3_vec[k+1]+fVal[i+1,j+1,k+1]*x1_vec[i]*x2_vec[j]*x3_vec[k]; + a0 = a0/den; + + a1 = fVal[i,j,k]*x2_vec[j+1]*x3_vec[k+1]-fVal[i,j,k+1]*x2_vec[j+1]*x3_vec[k]-fVal[i,j+1,k]*x2_vec[j]*x3_vec[k+1]+fVal[i,j+1,k+1]*x2_vec[j]*x3_vec[k] \ + - fVal[i+1,j,k]*x2_vec[j+1]*x3_vec[k+1]+fVal[i+1,j,k+1]*x2_vec[j+1]*x3_vec[k]+fVal[i+1,j+1,k]*x2_vec[j]*x3_vec[k+1]-fVal[i+1,j+1,k+1]*x2_vec[j]*x3_vec[k]; + a1 = a1/den; + + a2 = fVal[i,j,k]*x1_vec[i+1]*x3_vec[k+1]-fVal[i,j,k+1]*x1_vec[i+1]*x3_vec[k]-fVal[i,j+1,k]*x1_vec[i+1]*x3_vec[k+1]+fVal[i,j+1,k+1]*x1_vec[i+1]*x3_vec[k] \ + - fVal[i+1,j,k]*x1_vec[i]*x3_vec[k+1]+fVal[i+1,j,k+1]*x1_vec[i]*x3_vec[k]+fVal[i+1,j+1,k]*x1_vec[i]*x3_vec[k+1]-fVal[i+1,j+1,k+1]*x1_vec[i]*x3_vec[k]; + a2 = a2/den; + + a3 = fVal[i,j,k]*x1_vec[i+1]*x2_vec[j+1]-fVal[i,j,k+1]*x1_vec[i+1]*x2_vec[j+1]-fVal[i,j+1,k]*x1_vec[i+1]*x2_vec[j]+fVal[i,j+1,k+1]*x1_vec[i+1]*x2_vec[j] \ + - fVal[i+1,j,k]*x1_vec[i]*x2_vec[j+1]+fVal[i+1,j,k+1]*x1_vec[i]*x2_vec[j+1]+fVal[i+1,j+1,k]*x1_vec[i]*x2_vec[j]-fVal[i+1,j+1,k+1]*x1_vec[i]*x2_vec[j]; + a3 = a3/den; + + a4 = -fVal[i,j,k]*x3_vec[k+1]+fVal[i,j,k+1]*x3_vec[k]+fVal[i,j+1,k]*x3_vec[k+1]-fVal[i,j+1,k+1]*x3_vec[k]+fVal[i+1,j,k]*x3_vec[k+1]-fVal[i+1,j,k+1]*x3_vec[k]-fVal[i+1,j+1,k]*x3_vec[k+1]+fVal[i+1,j+1,k+1]*x3_vec[k]; + a4 = a4/den; + + a5 = -fVal[i,j,k]*x2_vec[j+1]+fVal[i,j,k+1]*x2_vec[j+1]+fVal[i,j+1,k]*x2_vec[j]-fVal[i,j+1,k+1]*x2_vec[j]+fVal[i+1,j,k]*x2_vec[j+1]-fVal[i+1,j,k+1]*x2_vec[j+1]-fVal[i+1,j+1,k]*x2_vec[j]+fVal[i+1,j+1,k+1]*x2_vec[j]; + a5 = a5/den; + + a6 = -fVal[i,j,k]*x1_vec[i+1]+fVal[i,j,k+1]*x1_vec[i+1]+fVal[i,j+1,k]*x1_vec[i+1]-fVal[i,j+1,k+1]*x1_vec[i+1]+fVal[i+1,j,k]*x1_vec[i]-fVal[i+1,j,k+1]*x1_vec[i]-fVal[i+1,j+1,k]*x1_vec[i]+fVal[i+1,j+1,k+1]*x1_vec[i]; + a6 = a6/den; + + a7 = fVal[i,j,k]-fVal[i,j,k+1]-fVal[i,j+1,k]+fVal[i,j+1,k+1]-fVal[i+1,j,k]+fVal[i+1,j,k+1]+fVal[i+1,j+1,k]-fVal[i+1,j+1,k+1]; + a7 = a7/den; + + # Store into coefficients LUT + linearLookup[k,i,j,::] = np.array([a0,a1,a2,a3,a4,a5,a6,a7]); + + # Make numpy array into constant tensor + linearLookup = linearLookup.astype("float32"); + linearLookup = tf.constant(linearLookup); + # Return table + return linearLookup; + +# /// 1D interpolcation from a LUT /// +def interp_1D(LUT,x): + # Retrieve ind_x + x0 = tf.math.floor(x); + ind_x = K.cast(x0,"int32"); + # Perform interpolation + y1 = tf.gather(LUT,ind_x+1); y0 = tf.gather(LUT,ind_x); + y = y0 + (x-x0)*(y1-y0); + return y; + +# /// 2D interpolation from a LUT - specific to numerical analog DP /// +def doInterpDP_2D(LUT,x1,x2,x1_vec,x2_max,N2): + # Possibly reshape x2 if CONV layer + x2_shape = tf.shape(x2); + x2 = tf.reshape(x2,(-1,x2_shape[-1])); + # Get indices + ind_x1 = K.cast(tf.math.floor(x1),"int32"); + ind_x2 = K.clip(tf.math.floor(x2/x2_max*N2),0,N2); ind_x2 = K.cast(ind_x2,"int32"); + # Get corresponding coefficients + coef_vec = tf.gather_nd(LUT,tf.stack([ind_x1*K.ones_like(ind_x2),ind_x2],axis=2)); + # Perform interpolation + f_int = coef_vec[::,::,0]+coef_vec[::,::,1]*x1+coef_vec[::,::,2]*x2+coef_vec[::,::,3]*x1*x2; + # Reshape result back, if needed + f_int = tf.reshape(f_int,x2_shape); + # Return interpolated result + return f_int + \ No newline at end of file diff --git a/utils/load_data.py b/utils/load_data.py index ca76fafe5a493e2a5ce716e2ed1389542faa3624..080556bc2e2315a5594ecc83c00d5a117a36b4fa 100644 --- a/utils/load_data.py +++ b/utils/load_data.py @@ -66,7 +66,7 @@ def load_dataset(dataset): #train_set = mnist(which_set="train", start=0, stop=train_set_size) #valid_set = mnist(which_set="train", start=train_set_size, stop=60000) #test_set = mnist(which_set="test") - path_to_file = '../my_datasets/mnist.npz' + path_to_file = './my_datasets/mnist.npz' (train_set_X,train_set_Y),(valid_set_X,valid_set_Y) = my_mnist.load_data(path_to_file) train_set_X = np.transpose(np.reshape(np.subtract(np.multiply(2. / 255., train_set_X), 1.), (-1, 1, 28, 28)),(0,2,3,1))