Upload New File

d3808ebe · Adrian Kneip · ceea735c · d3808ebe
--- a/layers/binary_layers_IMC.py
+++ b/layers/binary_layers_IMC.py
+# -*- coding: utf-8 -*-
+import numpy as np
+import math
+from keras import backend as K
+import tensorflow as tf
+from keras.layers import InputSpec, Layer, Dense, Conv2D
+from keras import constraints
+from keras import initializers
+# Binarization functions
+from layers.binary_ops import binarize, binarize_exp, binarize_ssb
+from layers.binary_ops import binary_sigmoid_p
+# Analog MAC operator
+from models.MAC_current import MAC_op_se_ana as MAC_op_se
+from models.MAC_current import MAC_op_diff_ana as MAC_op_diff
+from models.CONV_current import CONV_op_se_ana as CONV_op_se
+from models.CONV_current import CONV_op_diff_ana as CONV_op_diff
+# ADC model
+from models.ADC import quant_uni
+# Hardware parameters generation
+from utils.config_hardware_model import genHardware
+# Temporary dir
+import tempfile
+import sys
+import subprocess
+import time
+# Modeling files
+import os
+scriptpath = "../lib_modelcim/"
+sys.path.append(os.path.abspath(scriptpath));
+from preProc_wrapper import preProcSat as getHardwareData
+from fit_spice import DP_fit
+class Clip(constraints.Constraint):
+    def __init__(self, min_value, max_value=None):
+        self.min_value = min_value
+        self.max_value = max_value
+        if not self.max_value:
+            self.max_value = -self.min_value
+        if self.min_value > self.max_value:
+            self.min_value, self.max_value = self.max_value, self.min_value
+    def __call__(self, p):
+        return K.clip(p, self.min_value, self.max_value)
+    def get_config(self):
+        return {"name": self.__call__.__name__,
+                "min_value": self.min_value,
+                "max_value": self.max_value}
+class BinaryDense(Dense):
+    ''' Binarized Dense layer
+    References:
+    "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830]
+    '''
+    def __init__(self, units, H=1.,sramInfo=None, EN_NOISE=0, EN_QUANT=1, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs):
+        super(BinaryDense, self).__init__(units, **kwargs)
+        self.H = H
+        self.kernel_lr_multiplier = kernel_lr_multiplier
+        self.bias_lr_multiplier = bias_lr_multiplier
+        self.EN_NOISE = EN_NOISE
+        self.EN_QUANT = EN_QUANT
+        self.sramInfo = sramInfo
+        self.hardware = None
+        self.Vt_noise = None
+        self.input_dim = None
+        super(BinaryDense, self).__init__(units, **kwargs)
+    def build(self, input_shape):
+        assert len(input_shape) >= 2
+        input_dim = input_shape[1]
+        self.input_dim = input_dim;
+        if self.H == 'Glorot':
+            self.H = np.float32(np.sqrt(1.5 / (input_dim + self.units)))
+            #print('Glorot H: {}'.format(self.H))
+        if self.kernel_lr_multiplier == 'Glorot':
+            self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5 / (input_dim + self.units)))
+            #print('Glorot learning rate multiplier: {}'.format(self.kernel_lr_multiplier))
+        # Retrieve architecture type (diff or se) and derive flag
+        archType = self.sramInfo.arch.name;
+       # if(archType == '6T'):
+        self.kernel_constraint = Clip(-self.H, self.H)
+        self.kernel_initializer = initializers.RandomUniform(-self.H, self.H)
+       # elif(archType == '8T'):
+            # self.kernel_constraint = Clip(0, self.H)
+            # self.kernel_initializer = initializers.RandomUniform(0, self.H)    
+        # else:
+            # error('Unsupported cell type during binary weights initialization !');
+        self.kernel = self.add_weight(shape=(input_dim, self.units),
+                                     initializer=self.kernel_initializer,
+                                     name='kernel',
+                                     regularizer=self.kernel_regularizer,
+                                     constraint=self.kernel_constraint)
+        if self.use_bias:
+            self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier]
+            self.bias = self.add_weight(shape=(self.output_dim,),
+                                     initializer=self.bias_initializer,
+                                     name='bias',
+                                     regularizer=self.bias_regularizer,
+                                     constraint=self.bias_constraint)
+        else:
+            self.lr_multipliers = [self.kernel_lr_multiplier]
+            self.bias = None
+        # Get DP electrical quantities for this layer
+        Nrows = self.sramInfo.Nrows.data
+        N_cim = int(math.ceil((input_dim-1)/Nrows));
+        self.sramInfo.NB.data = int(input_dim/N_cim);
+        print(f'######## FC layer with {self.sramInfo.NB.data} cells/op supplied at {self.sramInfo.VDD.data:.2f}V ######## ')
+        path_dir = '/export/home/adkneip/Documents/PhD/ELDO/IMC_PYTHON/CURRENT_MAC/'+self.sramInfo.arch.name+'_CELL/'
+        ################################################# USE TEMPORARY SIM DIRECTORY #####################################################
+        with tempfile.TemporaryDirectory(dir=path_dir,prefix='SimFolder_') as path_to_file:
+            #print(path_to_file)
+            # Copy .cir files into temporary simu folder -- '*' sumbol bugs for some reason
+            if(self.sramInfo.simulator == "eldo"):
+                file_table = np.array(['MAC_DC.cir','MAC_NL.cir','MAC_satCal.cir','MAC_time.cir','MAC_train_MC.cir']);
+            elif(self.sramInfo.simulator == "spectre"):
+                file_table = np.array(['MAC_DC.scs','MAC_satCal.scs','MAC_time.scs',
+                                        'MAC_DC.mdl','MAC_satCal.mdl','MAC_time.mdl']);
+            else:
+                sys.exit('Error: selected simulator not supported !\n');
+            for file_temp in file_table:
+                commandLine = ['cp',path_dir+'RefFolder/'+file_temp,path_to_file+'/'];
+                proc = subprocess.run(commandLine);
+                if(proc.returncode != 0):
+                    sys.exit('Error: could not copy reference files into temporary sim folder !\n');
+            # Create temporary data file
+            commandLine = ['mkdir',path_to_file+'/data'];
+            proc = subprocess.run(commandLine);
+            if(proc.returncode != 0):
+                sys.exit('Error: could not copy reference files into temporary sim folder !\n');
+            # Perform Spice simulations
+            self.sramInfo = getHardwareData(path_to_file,self.sramInfo)
+       #     time.sleep(300); # For debug
+       ###################################################################################################################################
+       # Generate hardware parameters
+        hardware = genHardware(self.sramInfo)
+        # Compute the appropriate curve-fitting factors
+        # hardware.a1 = 1; hardware.a2 = 1; hardware.b1 = 1;
+        # self.hardware = hardware
+        print(f'######## Performing three-parametric best curve-fitting ######## ') 
+        self.hardware = DP_fit(path_dir,'early',hardware)        
+        # Create V_th distribution
+        mu_Vth = self.hardware.mu_Vth
+        sig_Vth = self.hardware.sig_Vth
+        # self.Vt_noise = K.random_normal(shape=(self.units,),mean=0,stddev=sig_Vth)
+        self.Vt_noise = K.random_normal(shape=(self.units,),mean=0,stddev=0)
+        # Perform build
+        self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim})
+        self.built = True
+    def call(self, inputs):
+        # Binarize weights
+        W_bin = binarize(self.kernel, H=self.H);
+        # Check if a single CIM-SRAM is sufficient, or ideal charge-share of their analog outputs
+        Nrows = self.hardware.sramInfo.Nrows.data
+        N_cim = int(math.ceil((self.input_dim-1)/Nrows));
+        # Retrieve architecture type (diff or se) and derive flag
+        archType = self.hardware.sramInfo.arch.name;
+        IS_SE_OUT = (archType == '8T') or  self.EN_QUANT;
+        # Wrap correct MAC_op function
+        if(archType == '6T'):
+            MAC_op = MAC_op_diff;
+        elif(archType == '8T'):
+            MAC_op = MAC_op_se;
+        else:
+            raise NameError('Error: selected architecture (cell type) not supported during FC layer compute !\n');
+        # Emulate 6T-based CIM-SRAM analog MAC operation, possibly with parallel macros
+        if(N_cim > 1):
+            # Separate inputs and weights in sub-matrices
+            inputs = tf.unstack(K.reshape(inputs,(-1,int(self.input_dim/N_cim),N_cim)),axis=-1)
+            W_bin = K.permute_dimensions(K.reshape(K.permute_dimensions(W_bin,(1,0)),(-1,int(self.input_dim/N_cim),N_cim)),(1,2,0))
+            W_bin = tf.unstack(W_bin,axis=1)
+            # Perform CIM-SRAM operations over all sub-matrices (i.e. different CIM-SRAMs)
+            V_DP = [];
+            for i in range(N_cim):
+                V_DP.append(MAC_op(self.hardware,inputs[i],W_bin[i],self.Vt_noise,self.EN_NOISE,self.EN_QUANT))
+            # Combine the result as if ideal charge-sharing (--> could implement actual charge-sharing !)
+            if(IS_SE_OUT):
+                V_DP = K.sum(tf.stack(V_DP,axis=2),axis=2)/N_cim;
+            else:
+                V_BL  = K.sum(tf.stack(V_DP[0],axis=2),axis=2)/N_cim;
+                V_BLB = K.sum(tf.stack(V_DP[1],axis=2),axis=2)/N_cim;
+        else:
+            if(IS_SE_OUT):
+                V_DP = MAC_op(self.hardware,inputs,W_bin,self.Vt_noise,self.EN_NOISE,self.EN_QUANT);
+            else:
+                (V_BL,V_BLB) = MAC_op(self.hardware,inputs,W_bin,self.Vt_noise,self.EN_NOISE,self.EN_QUANT);
+        # Add bias to PA
+        if self.use_bias:
+            if(IS_SE_OUT):
+                V_DP = K.bias_add(V_DP, self.bias)
+            else:
+                V_BL  = K.bias_add(V_BL,self.bias)
+                V_BLB = K.bias_add(V_BLB,self.bias)
+        # Quantify the PA to get the digitized OA
+        IAres = self.hardware.sramInfo.IAres;
+        OAres = self.hardware.sramInfo.OAres;
+        NB = self.hardware.sramInfo.NB.data;
+        PAmax = (2**IAres-1)*NB;
+        DRval = self.hardware.sramInfo.DR.data;
+        VDD = self.hardware.sramInfo.VDD.data;
+        if(self.EN_QUANT):
+            DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType);
+            # Return quantized output
+            return DO
+        elif(archType == '8T'):
+            return V_DP
+        else:
+            # Return unquantized differential output
+            return K.concatenate([V_BL[np.newaxis,...],V_BLB[np.newaxis,...]],axis=0)
+    def get_config(self):
+        config = {'H': self.H,
+                  'kernel_lr_multiplier': self.kernel_lr_multiplier,
+                  'bias_lr_multiplier': self.bias_lr_multiplier}
+        base_config = super(BinaryDense, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+class BinaryConv2D(Conv2D):
+    '''Binarized Convolution2D layer
+    References: 
+    "BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830]
+    '''
+    def __init__(self, filters, kernel_regularizer=None,activity_regularizer=None, kernel_lr_multiplier='Glorot',
+                 bias_lr_multiplier=None, H=1.,sramInfo=None, EN_NOISE=0, EN_QUANT=1, **kwargs):
+        super(BinaryConv2D, self).__init__(filters, **kwargs)
+        self.H = H
+        self.kernel_lr_multiplier = kernel_lr_multiplier
+        self.bias_lr_multiplier = bias_lr_multiplier
+        self.activity_regularizer = activity_regularizer
+        self.kernel_regularizer = kernel_regularizer
+        self.sramInfo = sramInfo
+        self.hardware = None
+        self.Vt_noise = None
+        self.EN_NOISE = EN_NOISE
+        self.EN_QUANT = EN_QUANT
+    def build(self, input_shape):
+        if self.data_format == 'channels_first':
+            channel_axis = 1
+        else:
+            channel_axis = -1 
+        if input_shape[channel_axis] is None:
+                raise ValueError('The channel dimension of the inputs '
+                                 'should be defined. Found `None`.')
+        input_dim = input_shape[channel_axis]
+        kernel_shape = self.kernel_size + (input_dim, self.filters)
+        #kernel_shape = self.kernel_size + (self.filters,)
+        base = self.kernel_size[0] * self.kernel_size[1]
+        if self.H == 'Glorot':
+            nb_input = int(input_dim * base)
+            nb_output = int(self.filters * base)
+            self.H = np.float32(np.sqrt(1.5 / (nb_input + nb_output)))
+            #print('Glorot H: {}'.format(self.H))
+        if self.kernel_lr_multiplier == 'Glorot':
+            nb_input = int(input_dim * base)
+            nb_output = int(self.filters * base)
+            self.kernel_lr_multiplier = np.float32(1. / np.sqrt(1.5/ (nb_input + nb_output)))
+            #print('Glorot learning rate multiplier: {}'.format(self.lr_multiplier))
+        self.kernel_constraint = Clip(-self.H, self.H)
+        self.kernel_initializer = initializers.RandomUniform(-self.H, self.H)
+        #self.bias_initializer = initializers.RandomUniform(-self.H, self.H)
+        self.kernel = self.add_weight(shape=kernel_shape,
+                                 initializer=self.kernel_initializer,
+                                 name='kernel',
+                                 regularizer=self.kernel_regularizer,
+                                 constraint=self.kernel_constraint)
+#        print(K.int_shape(self.kernel))
+        if self.use_bias:
+            self.lr_multipliers = [self.kernel_lr_multiplier, self.bias_lr_multiplier]
+            self.bias = self.add_weight((self.filters,),
+                                     initializer=self.bias_initializer,
+                                     name='bias',
+                                     regularizer=self.bias_regularizer,
+                                     constraint=self.bias_constraint)
+        else:
+            self.lr_multipliers = [self.kernel_lr_multiplier]
+            self.bias = None
+        # Get DP electrical quantities for this layer
+        self.sramInfo.NB.data = base*input_dim;
+        print(f'######## 2D-CONV layer with {self.sramInfo.NB.data} cells/op supplied at {self.sramInfo.VDD.data:.2f}V ######## ')
+        path_dir = '/export/home/adkneip/Documents/PhD/ELDO/IMC_PYTHON/CURRENT_MAC/'+self.sramInfo.arch.name+'_CELL/'
+        ################################################# USE TEMPORARY SIM DIRECTORY #####################################################
+        with tempfile.TemporaryDirectory(dir=path_dir,prefix='SimFolder_') as path_to_file:
+            #print(path_to_file)
+            # Copy .cir files into temporary simu folder -- '*' sumbol bugs for some reason
+            if(self.sramInfo.simulator == "eldo"):
+                file_table = np.array(['MAC_DC.cir','MAC_NL.cir','MAC_satCal.cir','MAC_time.cir','MAC_train_MC.cir']);
+            elif(self.sramInfo.simulator == "spectre"):
+                file_table = np.array(['MAC_DC.scs','MAC_satCal.scs','MAC_time.scs',
+                                        'MAC_DC.mdl','MAC_satCal.mdl','MAC_time.mdl']);
+            else:
+                sys.exit('Error: selected simulator not supported !\n');
+            for file_temp in file_table:
+                commandLine = ['cp',path_dir+'RefFolder/'+file_temp,path_to_file+'/'];
+                proc = subprocess.run(commandLine);
+                if(proc.returncode != 0):
+                    sys.exit('Error: could not copy reference files into temporary sim folder !\n');
+            # Create temporary data file
+            commandLine = ['mkdir',path_to_file+'/data'];
+            proc = subprocess.run(commandLine);
+            if(proc.returncode != 0):
+                sys.exit('Error: could not copy reference files into temporary sim folder !\n');
+                # Perform Spice simulations
+            self.sramInfo = getHardwareData(path_to_file,self.sramInfo)
+        ###################################################################################################################################
+        # Generate hardware parameters
+        hardware = genHardware(self.sramInfo)
+        # Compute the appropriate curve-fitting factors
+        # hardware.a1 = 1; hardware.a2 = 1; hardware.b1 = 1;
+        # self.hardware = hardware
+        print(f'######## Performing three-parametric best curve-fitting ######## ') 
+        self.hardware = DP_fit(path_dir,'early',hardware)        
+        # Create V_th distribution
+        sig_Vth = self.hardware.sig_Vth
+        #self.Vt_noise = K.random_normal(shape=(input_dim,),mean=0,stddev=sig_Vth)
+        self.Vt_noise = K.random_normal(shape=(input_dim,),mean=0,stddev=0)
+        # Set input spec.
+        self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
+        self.built = True
+    def call(self, inputs):
+        binary_kernel = binarize(self.kernel, H=self.H)
+        # Retrieve architecture type (diff or se) and derive flag
+        archType = self.hardware.sramInfo.arch.name;
+        IS_SE_OUT = (archType == '8T') or  self.EN_QUANT;
+        # Wrap correct CONV_op function
+        if(archType == '6T'):
+            CONV_op = CONV_op_diff;
+        elif(archType == '8T'):
+            CONV_op = CONV_op_se;
+        else:
+            raise NameError('Error: selected architecture (cell type) not supported during 2DCONV layer compute !\n');
+        inverse_kernel_lr_multiplier = 1./self.kernel_lr_multiplier
+        inputs_bnn_gradient = (inputs - (1. - 1./inverse_kernel_lr_multiplier) * K.stop_gradient(inputs))\
+                  * inverse_kernel_lr_multiplier
+        outputs_bnn_gradient = CONV_op(
+            self.hardware,
+            inputs_bnn_gradient,
+            binary_kernel,
+            self.Vt_noise,
+            self.data_format,
+            self.EN_NOISE,
+            self.EN_QUANT)
+        if(IS_SE_OUT):
+            V_DP = (outputs_bnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient))\
+                    * self.kernel_lr_multiplier
+        else:
+            V_BL  = (outputs_bnn_gradient[0] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient[0]))\
+                    * self.kernel_lr_multiplier
+            V_BLB = (outputs_bnn_gradient[1] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_bnn_gradient[1]))\
+                    * self.kernel_lr_multiplier
+        if self.use_bias:
+            if(IS_SE_OUT):
+                V_DP = K.bias_add(V_DP,self.bias,data_format=self.data_format);
+            else:
+                V_BL  = K.bias_add(V_BL,self.bias,data_format=self.data_format);
+                V_BLB = K.bias_add(V_BLB,self.bias,data_format=self.data_format);
+        # Quantify the PA to get the digitized OA
+        IAres = self.hardware.sramInfo.IAres
+        OAres = self.hardware.sramInfo.OAres
+        NB = self.hardware.sramInfo.NB.data
+        PAmax = (2**IAres-1)*NB
+        DRval = self.hardware.sramInfo.DR.data;
+        VDD = self.hardware.sramInfo.VDD.data;
+        if(self.EN_QUANT):
+            DO = quant_uni(V_DP,PAmax,DRval,VDD,OAres,0.5*DRval/PAmax,archType);
+            # Return digitized output
+            return DO
+        elif(archType == '8T'):
+            return V_DP
+        else:
+            # Return unquantized differential output
+            return (V_BL,V_BLB)
+    def get_config(self):
+        config = {'H': self.H,
+                  'kernel_lr_multiplier': self.kernel_lr_multiplier,
+                  'bias_lr_multiplier': self.bias_lr_multiplier}
+        base_config = super(BinaryConv2D, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+# Aliases
+BinaryConvolution2D = BinaryConv2D