diff --git a/layers/binary_layers_IMC.py b/layers/binary_layers_IMC.py new file mode 100644 index 0000000000000000000000000000000000000000..1bb4f77b3800508803bcd98f550254ef6b46ad95 --- /dev/null +++ b/layers/binary_layers_IMC.py @@ -0,0 +1,415 @@ +# -*- coding: utf-8 -*- +import numpy as np +import math + +from keras import backend as K +import tensorflow as tf + +from keras.layers import InputSpec, Layer, Dense, Conv2D +from keras import constraints +from keras import initializers +# Binarization functions +from layers.binary_ops import binarize, binarize_exp, binarize_ssb +from layers.binary_ops import binary_sigmoid_p +# Analog MAC operator +from models.MAC_current import MAC_op_se_ana as MAC_op_se +from models.MAC_current import MAC_op_diff_ana as MAC_op_diff +from models.CONV_current import CONV_op_se_ana as CONV_op_se +from models.CONV_current import CONV_op_diff_ana as CONV_op_diff +# ADC model +from models.ADC import quant_uni +# Hardware parameters generation +from utils.config_hardware_model import genHardware +# Temporary dir +import tempfile +import sys +import subprocess +import time +# Modeling files +import os +scriptpath = "../lib_modelcim/" +sys.path.append(os.path.abspath(scriptpath)); +from preProc_wrapper import preProcSat as getHardwareData +from fit_spice import DP_fit + +class Clip(constraints.Constraint): + def __init__(self, min_value, max_value=None): + self.min_value = min_value + self.max_value = max_value + if not self.max_value: + self.max_value = -self.min_value + if self.min_value > self.max_value: + self.min_value, self.max_value = self.max_value, self.min_value + + def __call__(self, p): + return K.clip(p, self.min_value, self.max_value) + + def get_config(self): + return {"name": self.__call__.__name__, + "min_value": self.min_value, + "max_value": self.max_value} + + +class BinaryDense(Dense): + ''' Binarized Dense layer + References: + "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] + ''' + def __init__(self, units, H=1.,sramInfo=None, EN_NOISE=0, EN_QUANT=1, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs): + super(BinaryDense, self).__init__(units, **kwargs) + self.H = H + self.kernel_lr_multiplier = kernel_lr_multiplier + self.bias_lr_multiplier = bias_lr_multiplier + + self.EN_NOISE = EN_NOISE + self.EN_QUANT = EN_QUANT + + self.sramInfo = sramInfo + self.hardware = None + self.Vt_noise = None + self.input_dim = None + + super(BinaryDense, self).__init__(units, **kwargs) + + def build(self, input_shape): + assert len(input_shape) >= 2 + input_dim = input_shape[1] + self.input_dim = input_dim; + + if self.H == 'Glorot': + self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units))) + #print('Glorot H: {}'.format(self.H)) + if self.kernel_lr_multiplier == 'Glorot': + self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units))) + #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier)) + + # Retrieve architecture type (diff or se) and derive flag + archType = self.sramInfo.arch.name; + # if(archType == '6T'): + self.kernel_constraint = Clip(-self.H, self.H) + self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) + # elif(archType == '8T'): + # self.kernel_constraint = Clip(0, self.H) + # self.kernel_initializer = initializers.RandomUniform(0, self.H) + # else: + # error('Unsupported cell type during binary weights initialization !'); + + self.kernel = self.add_weight(shape=(input_dim, self.units), + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) + + if self.use_bias: + self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] + self.bias = self.add_weight(shape=(self.output_dim,), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + else: + self.lr_multipliers = [self.kernel_lr_multiplier] + self.bias = None + + # Get DP electrical quantities for this layer + Nrows = self.sramInfo.Nrows.data + N_cim = int(math.ceil((input_dim-1)/Nrows)); + self.sramInfo.NB.data = int(input_dim/N_cim); + print(f'######## FC layer with {self.sramInfo.NB.data} cells/op supplied at {self.sramInfo.VDD.data:.2f}V ######## ') + path_dir = '/export/home/adkneip/Documents/PhD/ELDO/IMC_PYTHON/CURRENT_MAC/'+self.sramInfo.arch.name+'_CELL/' + ################################################# USE TEMPORARY SIM DIRECTORY ##################################################### + with tempfile.TemporaryDirectory(dir=path_dir,prefix='SimFolder_') as path_to_file: + #print(path_to_file) + # Copy .cir files into temporary simu folder -- '*' sumbol bugs for some reason + if(self.sramInfo.simulator == "eldo"): + file_table = np.array(['MAC_DC.cir','MAC_NL.cir','MAC_satCal.cir','MAC_time.cir','MAC_train_MC.cir']); + elif(self.sramInfo.simulator == "spectre"): + file_table = np.array(['MAC_DC.scs','MAC_satCal.scs','MAC_time.scs', + 'MAC_DC.mdl','MAC_satCal.mdl','MAC_time.mdl']); + else: + sys.exit('Error: selected simulator not supported !\n'); + for file_temp in file_table: + commandLine = ['cp',path_dir+'RefFolder/'+file_temp,path_to_file+'/']; + proc = subprocess.run(commandLine); + if(proc.returncode != 0): + sys.exit('Error: could not copy reference files into temporary sim folder !\n'); + # Create temporary data file + commandLine = ['mkdir',path_to_file+'/data']; + proc = subprocess.run(commandLine); + if(proc.returncode != 0): + sys.exit('Error: could not copy reference files into temporary sim folder !\n'); + # Perform Spice simulations + self.sramInfo = getHardwareData(path_to_file,self.sramInfo) + # time.sleep(300); # For debug + ################################################################################################################################### + # Generate hardware parameters + hardware = genHardware(self.sramInfo) + # Compute the appropriate curve-fitting factors + # hardware.a1 = 1; hardware.a2 = 1; hardware.b1 = 1; + # self.hardware = hardware + print(f'######## Performing three-parametric best curve-fitting ######## ') + self.hardware = DP_fit(path_dir,'early',hardware) + # Create V_th distribution + mu_Vth = self.hardware.mu_Vth + sig_Vth = self.hardware.sig_Vth + # self.Vt_noise = K.random_normal(shape=(self.units,),mean=0,stddev=sig_Vth) + self.Vt_noise = K.random_normal(shape=(self.units,),mean=0,stddev=0) + + # Perform build + self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) + self.built = True + + def call(self, inputs): + # Binarize weights + W_bin = binarize(self.kernel, H=self.H); + # Check if a single CIM-SRAM is sufficient, or ideal charge-share of their analog outputs + Nrows = self.hardware.sramInfo.Nrows.data + N_cim = int(math.ceil((self.input_dim-1)/Nrows)); + # Retrieve architecture type (diff or se) and derive flag + archType = self.hardware.sramInfo.arch.name; + IS_SE_OUT = (archType == '8T') or self.EN_QUANT; + # Wrap correct MAC_op function + if(archType == '6T'): + MAC_op = MAC_op_diff; + elif(archType == '8T'): + MAC_op = MAC_op_se; + else: + raise NameError('Error: selected architecture (cell type) not supported during FC layer compute !\n'); + # Emulate 6T-based CIM-SRAM analog MAC operation, possibly with parallel macros + if(N_cim > 1): + # Separate inputs and weights in sub-matrices + inputs = tf.unstack(K.reshape(inputs,(-1,int(self.input_dim/N_cim),N_cim)),axis=-1) + W_bin = K.permute_dimensions(K.reshape(K.permute_dimensions(W_bin,(1,0)),(-1,int(self.input_dim/N_cim),N_cim)),(1,2,0)) + W_bin = tf.unstack(W_bin,axis=1) + # Perform CIM-SRAM operations over all sub-matrices (i.e. different CIM-SRAMs) + V_DP = []; + for i in range(N_cim): + V_DP.append(MAC_op(self.hardware,inputs[i],W_bin[i],self.Vt_noise,self.EN_NOISE,self.EN_QUANT)) + # Combine the result as if ideal charge-sharing (--> could implement actual charge-sharing !) + if(IS_SE_OUT): + V_DP = K.sum(tf.stack(V_DP,axis=2),axis=2)/N_cim; + else: + V_BL = K.sum(tf.stack(V_DP[0],axis=2),axis=2)/N_cim; + V_BLB = K.sum(tf.stack(V_DP[1],axis=2),axis=2)/N_cim; + else: + if(IS_SE_OUT): + V_DP = MAC_op(self.hardware,inputs,W_bin,self.Vt_noise,self.EN_NOISE,self.EN_QUANT); + else: + (V_BL,V_BLB) = MAC_op(self.hardware,inputs,W_bin,self.Vt_noise,self.EN_NOISE,self.EN_QUANT); + # Add bias to PA + if self.use_bias: + if(IS_SE_OUT): + V_DP = K.bias_add(V_DP, self.bias) + else: + V_BL = K.bias_add(V_BL,self.bias) + V_BLB = K.bias_add(V_BLB,self.bias) + + # Quantify the PA to get the digitized OA + IAres = self.hardware.sramInfo.IAres; + OAres = self.hardware.sramInfo.OAres; + NB = self.hardware.sramInfo.NB.data; + PAmax = (2**IAres-1)*NB; + DRval = self.hardware.sramInfo.DR.data; + VDD = self.hardware.sramInfo.VDD.data; + if(self.EN_QUANT): + DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType); + # Return quantized output + return DO + elif(archType == '8T'): + return V_DP + else: + # Return unquantized differential output + return K.concatenate([V_BL[np.newaxis,...],V_BLB[np.newaxis,...]],axis=0) + + def get_config(self): + config = {'H': self.H, + 'kernel_lr_multiplier': self.kernel_lr_multiplier, + 'bias_lr_multiplier': self.bias_lr_multiplier} + base_config = super(BinaryDense, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class BinaryConv2D(Conv2D): + '''Binarized Convolution2D layer + References: + "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] + ''' + def __init__(self, filters, kernel_regularizer=None,activity_regularizer=None, kernel_lr_multiplier='Glorot', + bias_lr_multiplier=None, H=1.,sramInfo=None, EN_NOISE=0, EN_QUANT=1, **kwargs): + super(BinaryConv2D, self).__init__(filters, **kwargs) + self.H = H + self.kernel_lr_multiplier = kernel_lr_multiplier + self.bias_lr_multiplier = bias_lr_multiplier + self.activity_regularizer = activity_regularizer + self.kernel_regularizer = kernel_regularizer + + self.sramInfo = sramInfo + self.hardware = None + self.Vt_noise = None + + self.EN_NOISE = EN_NOISE + self.EN_QUANT = EN_QUANT + + def build(self, input_shape): + if self.data_format == 'channels_first': + channel_axis = 1 + else: + channel_axis = -1 + if input_shape[channel_axis] is None: + raise ValueError('The channel dimension of the inputs ' + 'should be defined. Found `None`.') + + input_dim = input_shape[channel_axis] + kernel_shape = self.kernel_size + (input_dim, self.filters) + #kernel_shape = self.kernel_size + (self.filters,) + + base = self.kernel_size[0] * self.kernel_size[1] + if self.H == 'Glorot': + nb_input = int(input_dim * base) + nb_output = int(self.filters * base) + self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output))) + #print('Glorot H: {}'.format(self.H)) + + if self.kernel_lr_multiplier == 'Glorot': + nb_input = int(input_dim * base) + nb_output = int(self.filters * base) + self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output))) + #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier)) + + self.kernel_constraint = Clip(-self.H, self.H) + self.kernel_initializer = initializers.RandomUniform(-self.H, self.H) + #self.bias_initializer = initializers.RandomUniform(-self.H, self.H) + self.kernel = self.add_weight(shape=kernel_shape, + initializer=self.kernel_initializer, + name='kernel', + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint) +# print(K.int_shape(self.kernel)) + + if self.use_bias: + self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier] + self.bias = self.add_weight((self.filters,), + initializer=self.bias_initializer, + name='bias', + regularizer=self.bias_regularizer, + constraint=self.bias_constraint) + + else: + self.lr_multipliers = [self.kernel_lr_multiplier] + self.bias = None + + # Get DP electrical quantities for this layer + self.sramInfo.NB.data = base*input_dim; + print(f'######## 2D-CONV layer with {self.sramInfo.NB.data} cells/op supplied at {self.sramInfo.VDD.data:.2f}V ######## ') + path_dir = '/export/home/adkneip/Documents/PhD/ELDO/IMC_PYTHON/CURRENT_MAC/'+self.sramInfo.arch.name+'_CELL/' + ################################################# USE TEMPORARY SIM DIRECTORY ##################################################### + with tempfile.TemporaryDirectory(dir=path_dir,prefix='SimFolder_') as path_to_file: + #print(path_to_file) + # Copy .cir files into temporary simu folder -- '*' sumbol bugs for some reason + if(self.sramInfo.simulator == "eldo"): + file_table = np.array(['MAC_DC.cir','MAC_NL.cir','MAC_satCal.cir','MAC_time.cir','MAC_train_MC.cir']); + elif(self.sramInfo.simulator == "spectre"): + file_table = np.array(['MAC_DC.scs','MAC_satCal.scs','MAC_time.scs', + 'MAC_DC.mdl','MAC_satCal.mdl','MAC_time.mdl']); + else: + sys.exit('Error: selected simulator not supported !\n'); + for file_temp in file_table: + commandLine = ['cp',path_dir+'RefFolder/'+file_temp,path_to_file+'/']; + proc = subprocess.run(commandLine); + if(proc.returncode != 0): + sys.exit('Error: could not copy reference files into temporary sim folder !\n'); + # Create temporary data file + commandLine = ['mkdir',path_to_file+'/data']; + proc = subprocess.run(commandLine); + if(proc.returncode != 0): + sys.exit('Error: could not copy reference files into temporary sim folder !\n'); + # Perform Spice simulations + self.sramInfo = getHardwareData(path_to_file,self.sramInfo) + ################################################################################################################################### + # Generate hardware parameters + hardware = genHardware(self.sramInfo) + # Compute the appropriate curve-fitting factors + # hardware.a1 = 1; hardware.a2 = 1; hardware.b1 = 1; + # self.hardware = hardware + print(f'######## Performing three-parametric best curve-fitting ######## ') + self.hardware = DP_fit(path_dir,'early',hardware) + # Create V_th distribution + sig_Vth = self.hardware.sig_Vth + #self.Vt_noise = K.random_normal(shape=(input_dim,),mean=0,stddev=sig_Vth) + self.Vt_noise = K.random_normal(shape=(input_dim,),mean=0,stddev=0) + + + # Set input spec. + self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) + self.built = True + + def call(self, inputs): + binary_kernel = binarize(self.kernel, H=self.H) + # Retrieve architecture type (diff or se) and derive flag + archType = self.hardware.sramInfo.arch.name; + IS_SE_OUT = (archType == '8T') or self.EN_QUANT; + # Wrap correct CONV_op function + if(archType == '6T'): + CONV_op = CONV_op_diff; + elif(archType == '8T'): + CONV_op = CONV_op_se; + else: + raise NameError('Error: selected architecture (cell type) not supported during 2DCONV layer compute !\n'); + + inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier + inputs_bnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\ + * inverse_kernel_lr_multiplier + + outputs_bnn_gradient = CONV_op( + self.hardware, + inputs_bnn_gradient, + binary_kernel, + self.Vt_noise, + self.data_format, + self.EN_NOISE, + self.EN_QUANT) + + if(IS_SE_OUT): + V_DP = (outputs_bnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient))\ + * self.kernel_lr_multiplier + else: + V_BL = (outputs_bnn_gradient[0] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient[0]))\ + * self.kernel_lr_multiplier + V_BLB = (outputs_bnn_gradient[1] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient[1]))\ + * self.kernel_lr_multiplier + + if self.use_bias: + if(IS_SE_OUT): + V_DP = K.bias_add(V_DP,self.bias,data_format=self.data_format); + else: + V_BL = K.bias_add(V_BL,self.bias,data_format=self.data_format); + V_BLB = K.bias_add(V_BLB,self.bias,data_format=self.data_format); + + # Quantify the PA to get the digitized OA + IAres = self.hardware.sramInfo.IAres + OAres = self.hardware.sramInfo.OAres + NB = self.hardware.sramInfo.NB.data + PAmax = (2**IAres-1)*NB + DRval = self.hardware.sramInfo.DR.data; + VDD = self.hardware.sramInfo.VDD.data; + if(self.EN_QUANT): + DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType); + # Return digitized output + return DO + elif(archType == '8T'): + return V_DP + else: + # Return unquantized differential output + return (V_BL,V_BLB) + + def get_config(self): + config = {'H': self.H, + 'kernel_lr_multiplier': self.kernel_lr_multiplier, + 'bias_lr_multiplier': self.bias_lr_multiplier} + base_config = super(BinaryConv2D, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +# Aliases + +BinaryConvolution2D = BinaryConv2D