diff --git a/.gitignore b/.gitignore index 6573e3881aa7628fa2ee30efaa5bf9f2357ce1a9..7f510e31af02be016b49816cb6425623764cde80 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,12 @@ logs/ # Python cached files */__pycache__/ + +# Chip config text files +chip_files/abn/* +chip_files/fp_bn/* +chip_files/fp_weights/* +chip_files/inputs/* +chip_files/outputs/* +chip_files/weights/* + diff --git a/config/config_cim_cnn_param.py b/config/config_cim_cnn_param.py index 4fdd7cfd631a390a74e900f123ae40aac6e58e9f..84ee05dfc7c63f41b46940ad3300715d4390d9f2 100644 --- a/config/config_cim_cnn_param.py +++ b/config/config_cim_cnn_param.py @@ -11,8 +11,8 @@ classes=10 # // Network structure // network_type = "full-qnn"; # network_struct = "1C1D" -network_struct = "MLP_three_stage_abn" -OP_TYPE = "FC"; +network_struct = "LeNet-5" +OP_TYPE = "CONV-2D"; C_IN_VEC = [1024,128]; C_OUT_VEC = [128,64]; Nl_fp = 1; @@ -23,7 +23,7 @@ kern_size = 3 kernel_regularizer=0. activity_regularizer=0. # // Training iterations & savings // -Niter = 1; +Niter = 10; Nimg_save = 128; ##################################### @@ -31,9 +31,9 @@ Nimg_save = 128; ##################################### # Main hyper-params epochs = 30 -batch_size = 128*10 +batch_size = 128 # batch_size = 128 -lr = 0.005 +lr = 0.001 decay = 0.000025 # Decay & lr factors decay_at_epoch = [15, 75, 150 ] @@ -58,15 +58,20 @@ Vmax_beta = 0.05; BBN = 0; BBP = 0; # CIM-SRAM I/O RESOLUTION -IAres = 4; +IAres = 1; Wres = 1; -OAres = 4; +OAres = 1; # ABN resolution (if enabled) r_gamma = 5; r_beta = 5; # MAXIMUM INPUT VECTOR SIZE for ALL layers Nrows = 1152; Ncols = 512; +# Timing configuration (! these should be updated with the last of the conf setup) +T_DP = 0x3; +T_PRE = 0x3; +T_MBIT = 0x3; +T_ADC = 0x3; ######################################## ########## Simulation flags ############ @@ -75,6 +80,8 @@ Ncols = 512; simulator = "spectre" # Enable noisy training EN_NOISE = 0; +# Enable synmaic-rnage scaling (charge-domain) +EN_SCALE = 1; # Enable analog BN ANALOG_BN = 1; # Embedded ABN @@ -98,23 +105,26 @@ path_to_model = "./saved_models/"; model_template = "models/model_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}ABN_{}noise"; # Training output files path_to_out = "./saved_models/"; -acc_file_template = "accuracy/acc_IMC_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}ABN_{}noise.txt"; -w_file_template = "weights/weights_IMC_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}ABN_{}noise.hdf5"; -in_file_template = "inputs/in_IMC_{}_IA{}b.txt"; -out_file_template = "outputs/out_IMC_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}ABN_{}noise"; -inference_file_template = "outputs/inference_IMC_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}ABN_{}noise.txt"; +acc_file_template = "accuracy/acc_IMC_{}_{}_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}SCALE_{}ABN_{}noise.txt"; +w_file_template = "weights/weights_IMC_{}_{}_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}SCALE_{}ABN_{}noise.hdf5"; +in_file_template = "inputs/in_IMC_{}_{}_{}_IA{}b.txt"; +out_file_template = "outputs/out_IMC_{}_{}_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}SCALE_{}ABN_{}noise"; +inference_file_template = "outputs/inference_IMC_{}_{}_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}SCALE_{}ABN_{}noise.txt"; # On-chip inference files path_to_chip = "./chip_files/"; -chip_in_template = "inputs/in_calcim_{}_{}_IA{}b.txt"; -chip_out_template = "outputs/out_calcim_{}_{}_IA{}bW{}bOA{}b_noise{}"; -chip_inference_template = "outputs/inference_calcim_{}_{}_IA{}bW{}bOA{}b_noise{}.txt"; -chip_w_template = "weights/weights_calcim_{}_{}_IA{}bW{}bOA{}b_noise{}"; -chip_gamma_template = "abn/gamma_calcim_{}_{}_IA{}bW{}bOA{}b_noise{}"; -chip_beta_template = "abn/beta_calcim_{}_{}_IA{}bW{}bOA{}b_noise{}"; -chip_w_FP_template = "fp_weights/weights_fp_{}_{}_IA{}bW{}bOA{}b_noise{}"; -chip_gamma_FP_template = "fp_bn/gamma_fp_{}_{}_IA{}bW{}bOA{}b_noise{}"; -chip_beta_FP_template = "fp_bn/beta_fp_{}_{}_IA{}bW{}bOA{}b_noise{}"; +chip_in_template = "inputs/in_cimu_{}_{}_{}_{}_IA{}b.txt"; +chip_out_template = "outputs/out_cimu_{}_{}_{}_{}_IA{}bW{}bOA{}b_noise{}"; +chip_inference_template = "outputs/inference_cimu_{}_{}_{}_{}_IA{}bW{}bOA{}b_noise{}.txt"; +chip_w_template = "weights/weights_cimu_{}_{}_{}_{}_IA{}bW{}bOA{}b_noise{}"; +chip_gamma_template = "abn/gamma_cimu_{}_{}_{}_{}_IA{}bW{}bOA{}b_noise{}"; +chip_beta_template = "abn/beta_cimu_{}_{}_{}_{}_IA{}bW{}bOA{}b_noise{}"; +chip_w_FP_template = "fp_weights/weights_fp_{}_{}_{}_{}_IA{}bW{}bOA{}b_noise{}"; +chip_gamma_FP_template = "fp_bn/gamma_fp_{}_{}_{}_{}_IA{}bW{}bOA{}b_noise{}"; +chip_beta_FP_template = "fp_bn/beta_fp_{}_{}_{}_{}_IA{}bW{}bOA{}b_noise{}"; + +# FPGA files +path_to_fpga = "./chip_files/fpga/" fS_beta_fp = 128; fS_gamma_fp = 64; diff --git a/config/config_sweep_param.py b/config/config_sweep_param.py index 2f6aabad45afea6e4b04edb78514784d527886b9..129e93c2298b5ce1b3014b284284454fdd454005 100644 --- a/config/config_sweep_param.py +++ b/config/config_sweep_param.py @@ -3,16 +3,18 @@ ### Dataset & Neural net information ### ######################################## # // Dataset // -config_path = "config_cim_cnn_param" +config_path = "config_sweep_param" dataset_name = "MNIST"; dim=28 channels=1 classes=10 # // Network structure // network_type = "full-qnn"; -# network_struct = "1C1D" -network_struct = "MLP_three_stage_abn" -OP_TYPE = "FC"; +network_struct = "Jia_2020_reduced" +# network_struct = "MLP_three_stage_abn" +OP_TYPE = "CONV-2D"; +# OP_TYPE = "FC"; + C_IN_VEC = [1024,128]; C_OUT_VEC = [128,64]; Nl_fp = 1; @@ -23,17 +25,17 @@ kern_size = 3 kernel_regularizer=0. activity_regularizer=0. # // Training iterations & savings // -Niter = 1; +Niter = 5; Nimg_save = 128; ##################################### ########## Hyperparameters ########## ##################################### # Main hyper-params -epochs = 3 -batch_size = 128*10 +epochs = 30 +batch_size = 32*1 # batch_size = 128 -lr = 0.005 +lr = 0.001 decay = 0.000025 # Decay & lr factors decay_at_epoch = [15, 75, 150 ] @@ -58,12 +60,12 @@ Vmax_beta = 0.1; BBN = 0; BBP = 0; # CIM-SRAM I/O RESOLUTION -IAres = 4; +IAres = 1; Wres = 1; OAres = IAres; # ABN resolution (if enabled) r_gamma = 5; -r_beta = 8; +r_beta = 5; # MAXIMUM INPUT VECTOR SIZE for ALL layers Nrows = 1152; Ncols = 512; @@ -71,8 +73,9 @@ Ncols = 512; ####################################################################### ######### Sweep vectors (comment out related HW info above !) ######### ####################################################################### -IAres_vec = [1,2,4]; -r_gamma_vec = [1,8]; +IAres_vec = [1]; +# r_gamma_vec = [1,2,3,4,5,6,7,8]; +r_gamma_vec = [1,2,3,4]; ######################################## ########## Simulation flags ############ @@ -81,6 +84,8 @@ r_gamma_vec = [1,8]; simulator = "spectre" # Enable noisy training EN_NOISE = 0; +# Enable synmaic-rnage scaling (charge-domain) +EN_SCALE = 0; # Enable analog BN ANALOG_BN = 1; # Embedded ABN @@ -104,11 +109,11 @@ path_to_model = "./saved_models/"; model_template = "models/model_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}ABN_{}noise"; # Training output files path_to_out = "./saved_models/"; -acc_file_template = "accuracy/acc_IMC_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}ABN_{}noise.txt"; -w_file_template = "weights/weights_IMC_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}ABN_{}noise.hdf5"; -in_file_template = "inputs/in_IMC_{}_IA{}b.txt"; -out_file_template = "outputs/out_IMC_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}ABN_{}noise"; -inference_file_template = "outputs/inference_IMC_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}ABN_{}noise.txt"; +acc_file_template = "accuracy/acc_IMC_{}_{}_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}SCALE_{}ABN_{}noise.txt"; +w_file_template = "weights/weights_IMC_{}_{}_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}SCALE_{}ABN_{}noise.hdf5"; +in_file_template = "inputs/in_IMC_{}_{}_{}_IA{}b.txt"; +out_file_template = "outputs/out_IMC_{}_{}_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}SCALE_{}ABN_{}noise"; +inference_file_template = "outputs/inference_IMC_{}_{}_{}_{}_IA{}bW{}bOA{}b_{}b{}bABN_{}iter_{}SCALE_{}ABN_{}noise.txt"; # On-chip inference files path_to_chip = "./chip_files/"; diff --git a/layers/analog_BN_charge_model.py b/layers/analog_BN_charge_model.py index 42d291e1b3a9588a7067ade6bd1af45965fe52bb..32012c99d0c72229c185bfcd77b7105188e28722 100644 --- a/layers/analog_BN_charge_model.py +++ b/layers/analog_BN_charge_model.py @@ -60,7 +60,7 @@ class Analog_BN(Layer): self.beta_constraint = constraints.get(beta_constraint) self.gamma_constraint = constraints.get(gamma_constraint) self.hardware = hardware; - self.m_sigma = m_sigma; + self.m_sigma_init = m_sigma; # /// Build layer /// def build(self,input_shape): @@ -109,6 +109,12 @@ class Analog_BN(Layer): else: self.moving_mean_DP = K.variable(0.0) self.moving_variance_DP = K.variable(1.0) + + # Dummy value to match PL layer + self.m_sigma = self.add_weight(shape = (1,), + name = 'm_sigma', + initializer = initializers.get(tf.keras.initializers.Constant(value=self.m_sigma_init)), + trainable=False); super(Analog_BN, self).build(input_shape) @@ -312,12 +318,14 @@ def norm_ABN_in_train(V_DP,beta=0.0,gamma=1.0,renorm=True,axis=-1,epsilon=1e-5,m # Compute mean and variance of each batch when desired if(renorm): + # Eventually reshape V_DP in case of CONV2D operation + Ncols = K.int_shape(V_DP)[-1]; + V_DP_flat = tf.reshape(V_DP,(-1,Ncols)); # Model transfer function - V_out = V_DP-VDD/2; - + V_out = V_DP_flat-VDD/2; # Get mean and variance mean_DP = K.mean(V_out,axis=0); - variance_DP = K.var(V_DP,axis=0); + variance_DP = K.var(V_DP_flat,axis=0); else: mean_DP = K.constant(0.0); variance_DP = K.constant(1.0); diff --git a/layers/analog_BN_current_model.py b/layers/analog_BN_current_model.py index ba0df01961b28363cc59e4060a9d93662f3658f8..67241b4db2e32ac2be07424dc215a25d62af33fe 100644 --- a/layers/analog_BN_current_model.py +++ b/layers/analog_BN_current_model.py @@ -63,7 +63,7 @@ class Analog_BN(Layer): self.gamma_range = 4*math.sqrt(NB) self.ABNstates = (2**hardware.sramInfo.r_gamma,2**hardware.sramInfo.r_beta) self.IS_DIFF = (hardware.sramInfo.arch.name == '6T'); # Update with other arch types - self.m_sigma = m_sigma; + self.m_sigma_init = m_sigma; # /// Build layer /// def build(self,input_shape): @@ -116,7 +116,7 @@ class Analog_BN(Layer): # Dummy value to match PL layer self.m_sigma = self.add_weight(shape = (1,), name = 'm_sigma', - initializer = initializers.get(tf.keras.initializers.Constant(value=0.)), + initializer = initializers.get(tf.keras.initializers.Constant(value=self.m_sigma_init)), trainable=False); diff --git a/layers/quantized_layers_IMC.py b/layers/quantized_layers_IMC.py index 1267e4e2c7b04658146f357dbd992dab7c0ae9dd..9ac41ba694bfc6d8a801cb3af2bc07bb56fa5e5d 100644 --- a/layers/quantized_layers_IMC.py +++ b/layers/quantized_layers_IMC.py @@ -268,7 +268,7 @@ class CIM_charge_dense(Dense): Based on: "QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] ''' - def __init__(self, units, H=1., nb=16, m_T_DP=1., sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs): + def __init__(self, units, H=1., nb=16, m_T_DP=1., sramInfo=None, EN_NOISE=0, EN_SCALE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, **kwargs): super(CIM_charge_dense, self).__init__(units, **kwargs) self.H = H self.nb = nb @@ -280,6 +280,7 @@ class CIM_charge_dense(Dense): self.FLAG_QUANT = FLAG_QUANT self.FLAG_PL = FLAG_PL self.IS_TRAINABLE_DP = IS_TRAINABLE_DP + self.EN_SCALE = EN_SCALE self.sramInfo = sramInfo self.hardware = None @@ -389,7 +390,7 @@ class CIM_charge_dense(Dense): V_DP = []; V_BL = []; V_BLB = []; for i in range(N_cim): # Compute analog dot-product - V_DP_bin = MAC_op(self.hardware,inputs[i],W_bin[i],self.MoM_noise,T_DP_conf,self.EN_NOISE); + V_DP_bin = MAC_op(self.hardware,inputs[i],W_bin[i],self.MoM_noise,T_DP_conf,self.EN_NOISE,self.EN_SCALE); # Reshape outputs and apply spatial mbit weighting, whenever relevant if(FLAG_SE_DP): V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); @@ -409,7 +410,7 @@ class CIM_charge_dense(Dense): # Single CIM-SRAM tile sufficient else: # Compute analog dot-product - V_DP_bin = MAC_op(self.hardware,inputs,W_bin,self.MoM_noise,T_DP_conf,self.EN_NOISE); + V_DP_bin = MAC_op(self.hardware,inputs,W_bin,self.MoM_noise,T_DP_conf,self.EN_NOISE,self.EN_SCALE); # Reshape outputs and apply DTSE, whenever relevant if(FLAG_SE_DP): V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); @@ -464,7 +465,7 @@ class CIM_current_conv2D(Conv2D): ''' def __init__(self, filters, m_T_DP=1, nRep=0, kernel_regularizer=None,activity_regularizer=None, kernel_lr_multiplier='Glorot', bias_lr_multiplier=None, H=1., nb=16, padding_num=0, sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, **kwargs): - super(CIM_current_Conv2D, self).__init__(filters, **kwargs) + super(CIM_current_conv2D, self).__init__(filters, **kwargs) self.H = H self.nb = nb self.padding_num = padding_num @@ -617,19 +618,21 @@ class CIM_current_conv2D(Conv2D): if(self.FLAG_QUANT): V_DP_bin = (outputs_qnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient))\ * self.kernel_lr_multiplier + output_shape = K.int_shape(V_DP_bin[1]); else: V_BL_bin = (outputs_qnn_gradient[0] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient[0]))\ * self.kernel_lr_multiplier V_BLB_bin = (outputs_qnn_gradient[1] - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient[1]))\ * self.kernel_lr_multiplier + output_shape = K.int_shape(V_DP_bin[1]); # Apply DTSE conversion, when relevant if(FLAG_SE_DP): - V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); + V_DP_bin = K.reshape(V_DP_bin,(-1,output_shape[0],output_shape[1],self.filters,self.nb)); V_DP = MBIT_weight(V_DP_bin,self.nb); else: - V_BL_bin = K.reshape(V_BL_bin,(-1,self.units,self.nb)); - V_BLB_bin = K.reshape(V_BLB_bin,(-1,self.units,self.nb)); + V_BL_bin = K.reshape(V_BL_bin,(-1,output_shape[0],output_shape[1],self.filters,self.nb)); + V_BLB_bin = K.reshape(V_BLB_bin,(-1,output_shape[0],output_shape[1],self.filters,self.nb)); # Post-layout model if(FLAG_PL): # Retrieve actual DTSE params @@ -672,8 +675,8 @@ class CIM_charge_conv2D(Conv2D): "QuantizedNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1" [http://arxiv.org/abs/1602.02830] ''' def __init__(self, filters, m_T_DP=1, nRep=0, kernel_regularizer=None,activity_regularizer=None, kernel_lr_multiplier='Glorot', - bias_lr_multiplier=None, H=1., nb=16, padding_num=0, sramInfo=None, EN_NOISE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, **kwargs): - super(CIM_current_Conv2D, self).__init__(filters, **kwargs) + bias_lr_multiplier=None, H=1., nb=16, padding_num=0, sramInfo=None, EN_NOISE=0, EN_SCALE=0, FLAG_QUANT=0, FLAG_PL=0, IS_TRAINABLE_DP=1, **kwargs): + super(CIM_charge_conv2D, self).__init__(filters, **kwargs) self.H = H self.nb = nb self.padding_num = padding_num @@ -689,6 +692,7 @@ class CIM_charge_conv2D(Conv2D): self.MoM_noise = None self.EN_NOISE = EN_NOISE + self.EN_SCALE = EN_SCALE self.FLAG_QUANT = FLAG_QUANT self.FLAG_PL = FLAG_PL self.IS_TRAINABLE_DP = IS_TRAINABLE_DP @@ -742,10 +746,13 @@ class CIM_charge_conv2D(Conv2D): # Train DP timing config or not if self.IS_TRAINABLE_DP: - self.m_T_DP = self.add_weight(shape=(1,), - initializer = initializers.get(tf.keras.initializers.Constant(value=self.m_T_DP_init)), - regularizer = None, - constraint = None); + if(self.sramInfo.cim_type == 'current'): + self.m_T_DP = self.add_weight(shape=(1,), + initializer = initializers.get(tf.keras.initializers.Constant(value=self.m_T_DP_init)), + regularizer = None, + constraint = None); + else: + self.m_T_DP = 1; else: self.m_T_DP = 1.; @@ -809,22 +816,23 @@ class CIM_charge_conv2D(Conv2D): self.hardware, inputs_qnn_gradient, binary_kernel, - self.Vt_noise, - T_DP_conf, + self.MoM_noise, self.data_format, self.padding_num, - self.EN_NOISE) + self.EN_NOISE + ,self.EN_SCALE) if(FLAG_SE_DP): V_DP_bin = (outputs_qnn_gradient - (1. - 1./self.kernel_lr_multiplier) * K.stop_gradient(outputs_qnn_gradient))\ - * self.kernel_lr_multiplier + * self.kernel_lr_multiplier + output_shape = K.int_shape(V_DP_bin[0]) else: raise NameError('Differential charge-domain operations not supported !'); # Apply MBIT conversion, when relevant if(FLAG_SE_DP): - V_DP_bin = K.reshape(V_DP_bin,(-1,self.units,self.nb)); + V_DP_bin = K.reshape(V_DP_bin,(-1,output_shape[0],output_shape[1],self.filters,self.nb)); # Perform mbit-weight accumulation - if(FLAG_PL): + if(self.FLAG_PL): V_DP = MBIT_W_num(V_DP_bin,self.sramInfo,self.MBIT_LUT); else: V_DP = MBIT_W_ideal(V_DP_bin,self.sramInfo); @@ -851,5 +859,5 @@ class CIM_charge_conv2D(Conv2D): config = {'H': self.H, 'kernel_lr_multiplier': self.kernel_lr_multiplier, 'bias_lr_multiplier': self.bias_lr_multiplier} - base_config = super(CIM_charge_Conv2D, self).get_config() + base_config = super(CIM_charge_conv2D, self).get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/models/Analog_DP.py b/models/Analog_DP.py index 0c69eaaf4fdf147e29c8c2c1ada8db2f5e2df29c..196cbc901ea54b957c70299521e12ca0ea46ae1a 100644 --- a/models/Analog_DP.py +++ b/models/Analog_DP.py @@ -242,7 +242,7 @@ def int_BL_PWM(hardware,IA,W_array,sig_Vt_inf,T_DP_conf,EN_NOISE): return V_BL # Charge-based DP model -def int_DP_cap(hardware,IA,W,Nunit,sig_MoM,EN_NOISE): +def int_DP_cap(hardware,IA,W,Nunit,sig_MoM,EN_NOISE,EN_SCALE): # Retrieve parameters VDDL = hardware.sramInfo.VDD.data/2; IAres = hardware.sramInfo.IAres; @@ -253,8 +253,10 @@ def int_DP_cap(hardware,IA,W,Nunit,sig_MoM,EN_NOISE): C_adc = hardware.sramInfo.C_adc; #25.2f C_mbit = hardware.sramInfo.C_mbit; #14.2f # Get total amortization cap - Cp = (Nunit*C_unit)*Cc+Nunit/(Nrows/C_unit)*C_array+C_adc+C_mbit; - + if(EN_SCALE): + Cp = (Nunit*C_unit)*Cc+Nunit/(Nrows/C_unit)*C_array+C_adc+C_mbit; + else: + Cp = Nrows*Cc+C_array+C_adc+C_mbit; # Perform DP operation dim_IA = K.int_shape(IA); IA = tf.linalg.matrix_transpose(IA); @@ -265,8 +267,9 @@ def int_DP_cap(hardware,IA,W,Nunit,sig_MoM,EN_NOISE): V_MBIT = MBIT_IN_actual(V_DP,hardware.sramInfo); # Debugs - #tf.print("V_DP",V_DP[0:8,0]); - #tf.print("V_MBIT",V_MBIT[0:8,0]); + # tf.print("IA",IA[0:8,0]); + # tf.print("V_DP",V_DP[0:8,0]); + # tf.print("V_MBIT",V_MBIT[0:8,0]); # Return mbit accumulated DP return V_MBIT; diff --git a/models/CONV_charge.py b/models/CONV_charge.py index acb80ac7ecccd5efa33d4bbee94b710422b95268..0f3ea6577f4586553786a1db409d6e7eb3508c2f 100644 --- a/models/CONV_charge.py +++ b/models/CONV_charge.py @@ -43,7 +43,11 @@ def CONV_op_se_num(hardware,IA,W,dist_inf,T_DP_conf,data_format,padding=0,EN_NOI # Reshape IA_t to add the channels during the DP IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,(input_size+inc_dim)*(input_size+inc_dim),filter_size*filter_size*n_channels_in)); # One-hot input vector for bit serial processing - IA_t = tf.math.floormod(tf.bitwise.right_shift(tf.expand_dims(IA_t,-1), tf.range(IAres)), 2);# Mode-dependent input values + #IA_t = tf.math.floormod(tf.bitwise.right_shift(tf.expand_dims(IA_t,-1), tf.range(IAres)), 2);# Mode-dependent input values + expVec = K.cast(K.arange(IAres),dtype=K.dtype(IA)); + IA_t = tf.repeat(tf.expand_dims(IA_t,axis=-1),IAres,axis=-1); + IA_t = tf.math.floormod(floor_through(IA_t/K.pow(2.,expVec)),2.); + IA_t = K.reverse(IA_t,axes=-1); if(sparse_mode): IA_t = IA_t; else: @@ -60,14 +64,16 @@ def CONV_op_se_num(hardware,IA,W,dist_inf,T_DP_conf,data_format,padding=0,EN_NOI # Reshape output V_DP = K.permute_dimensions(V_DP,(0,2,1)) V_DP = K.reshape(V_DP,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + if(data_format == 'channels_last'): V_DP = K.permute_dimensions(V_DP,(0,2,3,1)) - + return V_DP; + ##################################### ANALYTICAL MODELS ######################################### # Convolution operation with Toeplitz matrix, input channels on repeated weights -def CONV_op_se_ana(hardware,IA,W,sig_MoM_inf,data_format,EN_NOISE,EN_QUANT): +def CONV_op_se_ana(hardware,IA,W,sig_MoM_inf,data_format,padding_size,EN_NOISE,EN_SCALE): # Check channel position and always put it second if(data_format == 'channels_last'): input_size = K.int_shape(IA)[1]; @@ -79,21 +85,26 @@ def CONV_op_se_ana(hardware,IA,W,sig_MoM_inf,data_format,EN_NOISE,EN_QUANT): raise ValueError('Unknown data format: ' + str(data_format)) # Retrieve physical quantities & config - C_unit = hardware.sramInfo.C_unit.data; + IAres = hardware.sramInfo.IAres; + C_unit = hardware.sramInfo.C_unit; sparse_mode = (hardware.sramInfo.input_mode == 'sparse'); # Derive all dimensions kern_size = K.int_shape(W); filter_size = kern_size[0]; + inc_dim = (filter_size-2)-padding_size; Nunit = np.ceil(filter_size*filter_size*n_channels_in//C_unit); - padding_size = int(tf.math.ceil((filter_size-1)/2)) - conv_size = input_size+padding_size + #padding_size = int(tf.math.ceil((filter_size-1)/2)) + conv_size = input_size-2*inc_dim; # Transform input into Toeplitz matrix for dot-product operation - IA_t = toeplitz(IA,filter_size); + IA_t = toeplitz(IA,filter_size,padding_size); # Reshape IA_t to add the channels during the DP - IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,input_size*input_size,filter_size*filter_size*n_channels_in)); + IA_t = K.reshape(K.permute_dimensions(IA_t,(0,2,3,1)),(-1,conv_size*conv_size,filter_size*filter_size*n_channels_in)); # One-hot input vector for bit serial processing - IA_t = tf.unpackbits(IA_t); + expVec = K.cast(K.arange(IAres),dtype=K.dtype(IA)); + IA_t = tf.repeat(tf.expand_dims(IA_t,axis=-1),IAres,axis=-1); + IA_t = tf.math.floormod(floor_through(IA_t/K.pow(2.,expVec)),2.); + IA_t = K.reverse(IA_t,axes=-1); # Mode-dependent input values if(sparse_mode): IA_t = IA_t; @@ -105,13 +116,16 @@ def CONV_op_se_ana(hardware,IA,W,sig_MoM_inf,data_format,EN_NOISE,EN_QUANT): W = K.reshape(W,(kern_size[-1],filter_size*filter_size*n_channels_in)) W = K.permute_dimensions(W,(1,0)) # Compute parallel dot-products corresponding to zero-padded convolutions - V_DP = int_DP_cap(hardware,IA_t,W,Nunit,sig_MoM_inf,EN_NOISE) + V_DP = int_DP_cap(hardware,IA_t,W,Nunit,sig_MoM_inf,EN_NOISE,EN_SCALE) # Reshape output V_DP = K.permute_dimensions(V_DP,(0,2,1)) - V_DP = K.reshape(V_DP,(-1,kern_size[-1],input_size+inc_dim,input_size+inc_dim)) + V_DP = K.reshape(V_DP,(-1,kern_size[-1],conv_size,conv_size)) + if(data_format == 'channels_last'): V_DP = K.permute_dimensions(V_DP,(0,2,3,1)) + return V_DP; + ############################### INTERNAL FUNCTIONS (IM2COL FACILITIES) #################################### @@ -168,3 +182,10 @@ def im2col(x,filter_size,padding_type): print(K.int_shape(image_patches)); return image_patches; +def floor_through(x): + '''Element-wise rounding to the closest integer with full gradient propagation. + A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182) + ''' + floored = tf.math.floor(x); + floored_through = x + K.stop_gradient(floored - x); + return floored_through; diff --git a/models/MAC_charge.py b/models/MAC_charge.py index 86a3c79f24d4576b10e520e88175d38f1eb610d1..32465dbbfb44625870cb6af669d05c2b8d5548a1 100644 --- a/models/MAC_charge.py +++ b/models/MAC_charge.py @@ -46,7 +46,7 @@ def MAC_op_se_num(hardware,IA,W,dist_inf,T_DP_conf,EN_NOISE): ##################################### ANALYTICAL MODELS ######################################### # Differential MAC operation -def MAC_op_se_ana(hardware,IA,W,sig_MoM_inf,T_DP_conf,EN_NOISE): +def MAC_op_se_ana(hardware,IA,W,sig_MoM_inf,T_DP_conf,EN_NOISE,EN_SCALE): # Get parameters IAres = hardware.sramInfo.IAres; C_unit = hardware.sramInfo.C_unit; @@ -80,7 +80,7 @@ def MAC_op_se_ana(hardware,IA,W,sig_MoM_inf,T_DP_conf,EN_NOISE): else: IA = 2*IA-1; # Get actual values from LUTs using indexes - V_DP = int_DP_cap(hardware,IA,W,Nunit,sig_MoM_inf,EN_NOISE); + V_DP = int_DP_cap(hardware,IA,W,Nunit,sig_MoM_inf,EN_NOISE,EN_SCALE); # Return result return V_DP diff --git a/models/makeModel.py b/models/makeModel.py index b0acac26c8d749df79425134953acaf1f767166d..beda85759e9deff04a33821952ccad44d9a778ca 100644 --- a/models/makeModel.py +++ b/models/makeModel.py @@ -177,6 +177,37 @@ def make_model(model_type,cf,Conv_,Conv,Dens_,Dens,Act,Quant,BatchNormalization, model.add(Dens_FP(cf.classes)) model.add(BatchNormalization_FP()) model.add(Activation('softmax')) + + # LeNet-5 adapted + elif(model_type == 'LeNet-5'): + model.add(Conv_(cf.kern_size, 8, cf.dim, cf.channels, 4, 0)) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4)) + model.add(Act()) + model.add(Dropout(0.3)) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 16, 2, 0)) + model.add(BatchNormalization((cf.dim-2)*(cf.dim-2)*cf.channels,4)) + model.add(Act()) + model.add(Dropout(0.4)) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Flatten()) + +# model.add(Dens_FP(120)) +# model.add(BatchNormalization_FP()) +# model.add(Dropout(0.3)) +# model.add(Activation('relu')) + + model.add(Dens_FP(84)) + model.add(BatchNormalization_FP()) + model.add(Dropout(0.2)) + model.add(Activation('relu')) + + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + model.add(Activation('softmax')) + # BinaryNet model elif(model_type == 'BinaryNet'): print('BinaryNet network selected...\n') @@ -294,6 +325,126 @@ def make_model(model_type,cf,Conv_,Conv,Dens_,Dens,Act,Quant,BatchNormalization, model.add(Quant((pow(2,cf.abits)-1)*1024)) model.add(BatchNormalization()) model.add(Activation('softmax')) + + # Reduced version of Jia's network + elif(model_type == 'Jia_2020_reduced'): + model.add(Conv_(cf.kern_size, 16,cf.dim,cf.channels,6,0)) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4)) + model.add(Act()) + + model.add(Conv(cf.kern_size, 16,6,0)) + model.add(BatchNormalization((cf.dim-2)*(cf.dim-2)*cf.channels,4)) + model.add(Act()) + # model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 64,4,0)) + model.add(BatchNormalization((cf.dim-4)/2*(cf.dim-4)/2*cf.channels,4)) + model.add(Act()) + + model.add(Conv(cf.kern_size, 64,4,0)) + model.add(BatchNormalization(4,4)) + model.add(Act()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 128,2,0)) + model.add(BatchNormalization(4,4)) + model.add(Act()) + + model.add(Conv(cf.kern_size, 128,2,0)) + #model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(BatchNormalization(4,4)) + model.add(Act()) + + model.add(Flatten()) + model.add(Dropout(0.4)) + #model.add(Dens(512,4.)) + #model.add(BatchNormalization(4,4)) + model.add(Dens_FP(4096)) + model.add(BatchNormalization_FP()) + model.add(Activation('relu')) + + model.add(Dropout(0.4)) + #model.add(Dens(128,4.)) + #model.add(BatchNormalization(4,4)) + model.add(Dens_FP(4096)) + model.add(BatchNormalization_FP()) + model.add(Activation('relu')) + + model.add(Dens_FP(cf.classes)) + model.add(BatchNormalization_FP()) + model.add(Activation('softmax')) + + # VGG-16 network + elif(model_type == 'VGG-16'): + print('VGG-16 network topology selected...') + + model.add(Conv_(cf.kern_size, 64,cf.dim,cf.channels,6,0)) + model.add(BatchNormalization(cf.dim*cf.dim*cf.channels,4)) + model.add(Act()) + model.add(Dropout(0.3)) + + model.add(Conv(cf.kern_size, 64,6,0)) + model.add(BatchNormalization(32*32*cf.channels,4)) + model.add(Act()) + # model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 128,6,0)) + model.add(BatchNormalization(16*16*64,4)) + model.add(Act()) + model.add(Dropout(0.4)) + + model.add(Conv(cf.kern_size, 128,6,0)) + model.add(BatchNormalization(16*16*128,4)) + model.add(Act()) + # model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 256,6,0)) + model.add(BatchNormalization(8*8*128,4)) + model.add(Act()) + model.add(Dropout(0.4)) + model.add(Conv(cf.kern_size, 256,6,0)) + model.add(BatchNormalization(8*8*256,4)) + model.add(Act()) + model.add(Dropout(0.4)) + model.add(Conv(cf.kern_size, 256,6,0)) + model.add(BatchNormalization(8*8*256,4)) + model.add(Act()) + # model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 512,6,0)) + model.add(BatchNormalization(4*4*256,4)) + model.add(Act()) + model.add(Dropout(0.4)) + model.add(Conv(cf.kern_size, 512,6,0)) + model.add(BatchNormalization(4*4*512,4)) + model.add(Act()) + model.add(Dropout(0.4)) + model.add(Conv(cf.kern_size, 512,6,0)) + model.add(BatchNormalization(4*4*512,4)) + model.add(Act()) + # model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv(cf.kern_size, 512,6,0)) + model.add(BatchNormalization(2*2*256,4)) + model.add(Act()) + model.add(Dropout(0.4)) + model.add(Conv(cf.kern_size, 512,6,0)) + model.add(BatchNormalization(2*2*512,4)) + model.add(Act()) + model.add(Dropout(0.4)) + model.add(Conv(cf.kern_size, 512,6,0)) + model.add(BatchNormalization(2*2*512,4)) + model.add(Act()) + # model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Flatten()) + model.add(Dropout(0.5)) + model.add(Dens_FP(512)) + model.add(BatchNormalization_FP()) + model.add(Activation('relu')) + model.add(Dens_FP(10)) + model.add(BatchNormalization_FP()) + model.add(Activation('softmax')) # Raise error on unsupported model type else: diff --git a/models/model_IMC.py b/models/model_IMC.py index d675cb482a345f887d04f7def8d838aad72b9282..9b5e8b07e1f6f3a86fcb7388120ff345b8906f50 100644 --- a/models/model_IMC.py +++ b/models/model_IMC.py @@ -34,6 +34,7 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): FLAG_QUANT = FLAGS[1]; IDEAL_ABN = FLAGS[2]; ABN_INC_ADC = FLAGS[3]; + EN_SCALE = FLAGS[4]; # Select the correct DP layer model based on the CIM type if(sramInfo.cim_type == 'current'): @@ -56,26 +57,28 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): return quantize_op(x=x,IAres=IAres) if cf.network_type =='float': - Conv_ = lambda s, f, i, c: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', - kernel_regularizer=l2(cf.kernel_regularizer),input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) - Conv = lambda s, f: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', - kernel_regularizer=l2(cf.kernel_regularizer),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) + Conv_ = lambda s, f, i, c, m, k: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', + kernel_regularizer=l2(cf.kernel_regularizer),input_shape = (i,i,c),use_bias=False) + Conv = lambda s, f, m, k: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', + kernel_regularizer=l2(cf.kernel_regularizer),use_bias=False) + Conv_FP_ = lambda s, f, i, c: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', + kernel_regularizer=l2(cf.kernel_regularizer),input_shape = (i,i,c),use_bias=False) Act = lambda: LeakyReLU() Quant = lambda n: Activation(lambda x: quant_uni(x,maxVal=n,dynRange=dynRange,OAres=OAres,offset=0.5*dynRange/n)) Dens_FP = lambda n: Dense(n,use_bias=False) - Dens = lambda n: Dense(n,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) + Dens = lambda n: Dense(n,use_bias=False) - Dens_ = lambda n,i,c: Dense(n,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) + Dens_ = lambda n,i,c: Dense(n,use_bias=False,activation='linear',input_shape=(i*i*c,)) elif cf.network_type=='qnn': Conv_ = lambda s,f,i,c,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_SCALE=EN_SCALE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) Conv = lambda s,f,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT) + kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_SCALE=EN_SCALE,FLAG_QUANT=FLAG_QUANT) if(FLAG_QUANT): Act = lambda: Activation(lambda x: quant_relu(x,IAres=IAres)) else: @@ -90,16 +93,16 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): Dens_FP = lambda n: Dense(n,use_bias=False) - Dens = lambda n,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + Dens = lambda n,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_SCALE=EN_SCALE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) - Dens_ = lambda n,i,c,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + Dens_ = lambda n,i,c,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_SCALE=EN_SCALE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) elif cf.network_type=='full-qnn': Conv_ = lambda s,f,i,c,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + kernel_lr_multiplier=cf.kernel_lr_multiplier,input_shape = (i,i,c),use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_SCALE=EN_SCALE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) Conv = lambda s,f,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), - kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + kernel_lr_multiplier=cf.kernel_lr_multiplier,use_bias=False,sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_SCALE=EN_SCALE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) Conv_FP_ = lambda s, f, i, c: Conv2D(kernel_size=(s, s), filters=f, strides=(1, 1), padding='same', activation='linear', kernel_regularizer=l2(cf.kernel_regularizer),input_shape = (i,i,c),use_bias=False) @@ -118,9 +121,9 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): Dens_FP = lambda n: Dense(n,use_bias=False) - Dens = lambda n,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + Dens = lambda n,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_SCALE=EN_SCALE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) - Dens_ = lambda n,i,c,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) + Dens_ = lambda n,i,c,m: CIM_Dense(n,nb=Wres,m_T_DP=m,use_bias=False,activation='linear',input_shape=(i*i*c,),sramInfo=deepcopy(sramInfo),EN_NOISE=EN_NOISE,EN_SCALE=EN_SCALE,FLAG_QUANT=FLAG_QUANT,FLAG_PL=FLAG_PL) # elif cf.network_type=='full-qnn-embedded': # Conv_ = lambda s,f,i,c,m,k: CIM_Conv2D(kernel_size=(s, s), H=1, m_T_DP=m, nRep=k, nb=Wres, filters=f, strides=(1, 1), padding='same', # activation='linear', kernel_regularizer=l2(cf.kernel_regularizer), @@ -152,10 +155,9 @@ def build_model(cf,model_type,sramInfo,EN_NOISE,FLAGS): if(FLAG_QUANT): BatchNorm = lambda: BatchNormalization(momentum=0.1,epsilon=1e-5) else: -# if(cf.network_type == 'full-qnn-embedded'): -# BatchNorm = lambda n,m: Activation('linear'); -# elif(IDEAL_ABN): - if(IDEAL_ABN): + if(cf.network_type == 'float'): + BatchNorm = lambda n,m: BatchNormalization(momentum=0.1,epsilon=1e-5) + elif(IDEAL_ABN): if(sramInfo.cim_type == 'current'): BatchNorm = lambda n,m: Analog_BN_current_ideal(momentum=0.1,epsilon=1e-5,renorm=True,hardware=genHardware(sramInfo),NB=n,m_sigma=m); elif(sramInfo.cim_type == 'charge'): diff --git a/train_cim_qnn.py b/train_cim_qnn.py index 8b531aa82c289c94bd97b34ae7c3d6180f212f38..80ccf90f77e88ff0bf40e8b4b67efd71233f185c 100644 --- a/train_cim_qnn.py +++ b/train_cim_qnn.py @@ -45,7 +45,8 @@ def generate_model(data_files,cf,network_struct,sramInfo,FLAGS): IS_FL_MLP = FLAGS[3]; IDEAL_ABN = FLAGS[4] ABN_INC_ADC = FLAGS[5]; - FLAG_PL = FLAGS[6] + FLAG_PL = FLAGS[6]; + EN_SCALE = FLAGS[7]; # Weights file w_file = data_files[1]; # Retrieve resolution(s) @@ -56,7 +57,7 @@ def generate_model(data_files,cf,network_struct,sramInfo,FLAGS): print('Construct the Network(s)\n') # // Create ideal model // - model = build_model(cf,network_struct,sramInfo,EN_NOISE,[FLAG_PL,not(ANALOG_BN),IDEAL_ABN,ABN_INC_ADC]) + model = build_model(cf,network_struct,sramInfo,EN_NOISE,[FLAG_PL,not(ANALOG_BN),IDEAL_ABN,ABN_INC_ADC,EN_SCALE]) print('Loading data\n') train_data, val_data = load_dataset(cf.dataset_name) @@ -99,7 +100,6 @@ def generate_model(data_files,cf,network_struct,sramInfo,FLAGS): ### Numpy input quantization ### def quant_input(x,IAres): # Quantize between 0 and 2^IAres - print(x) m = pow(2,IAres); y = m*(x+1)/2; return K.clip(np.floor(y),0,m-1); @@ -188,13 +188,6 @@ def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): # Get model weights and retrieve those of BN layers weights_temp = model.get_weights(); weightsTensorVec.append(weights_temp); - - # Get outputs of each layer_outputs - Nlayers = len(model.layers); - data_out = []; - for i in range(Nlayers): - partial_model = Model(model.input,model.layers[i].output); - data_out.append(partial_model(test_data[0],training=False)); # Print accuracy for this iteration acc_train = history.history['accuracy'][-1] @@ -226,20 +219,20 @@ def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): # Save inputs with open(in_file,"w") as f: np.savetxt(f,np.reshape(test_data[0][0:Nimg_save],(-1,1)),fmt='%d'); - # Save outputs - Nlayers = len(best_model.layers); indL = 0; - for i in range(Nlayers): - # Get desired layer outputs - partial_model = Model(best_model.input,best_model.layers[i].output); - data_out = partial_model(test_data[0][0:Nimg_save],training=False); - # Write outputs to file, if ADC output only - #if(i==6 or i==7 or i==8): - # print(data_out) - if(i==2 or i==6 or i==9): - out_file_temp = out_file+"_layer_{}.txt".format(indL); - indL = indL+1; - with open(out_file_temp,"w") as f: - np.savetxt(f,np.reshape(data_out,(-1,1)),fmt='%f'); +# # Save outputs +# Nlayers = len(best_model.layers); indL = 0; +# for i in range(Nlayers): +# # Get desired layer outputs +# partial_model = Model(best_model.input,best_model.layers[i].output); +# data_out = partial_model(test_data[0][0:Nimg_save],training=False); +# # Write outputs to file, if ADC output only +# #if(i==6 or i==7 or i==8): +# # print(data_out) +# if(i==2 or i==6 or i==9): +# out_file_temp = out_file+"_layer_{}.txt".format(indL); +# indL = indL+1; +# with open(out_file_temp,"w") as f: +# np.savetxt(f,np.reshape(data_out,(-1,1)),fmt='%f'); # Save inference result with open(inference_file,"w") as f: indResult = np.argmax(test_data[1][0:Nimg_save],axis=-1); @@ -253,16 +246,16 @@ def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): ########################## IN/OUT FILES ############################# # Fill output files name in and concat -acc_file = path_to_out+acc_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); -w_file = path_to_out+w_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); -in_file = path_to_out+in_file_template.format(dataset_name,IAres); -out_file = path_to_out+out_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); -inference_file = path_to_out+inference_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); +acc_file = path_to_out+acc_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE); +w_file = path_to_out+w_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE); +in_file = path_to_out+in_file_template.format(dataset_name,cim_type,arch,IAres); +out_file = path_to_out+out_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE); +inference_file = path_to_out+inference_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE); data_files = [acc_file,w_file,in_file,out_file,inference_file]; ########################## GENERATE CIM-QNN MODEL ######################### # Concatenate flags -FLAGS = [SAVE_EN,EN_NOISE,ANALOG_BN,IS_FL_MLP,IDEAL_ABN,ABN_INC_ADC,FLAG_PL]; +FLAGS = [SAVE_EN,EN_NOISE,ANALOG_BN,IS_FL_MLP,IDEAL_ABN,ABN_INC_ADC,FLAG_PL,EN_SCALE]; # Generate hardware information sramInfo = SramInfo(arch,tech,typeT,VDD,BBN,BBP,IAres,Wres,OAres,r_gamma,r_beta,Nrows,[IS_EMBEDDED,ABN_INC_ADC]); sramInfo.simulator = simulator; diff --git a/train_param.py b/train_param.py index c3384608a93d57cc51aa6b0d677fad1501349235..f15e70bbc3b809ddd66c11b376537fa9c8bb742b 100644 --- a/train_param.py +++ b/train_param.py @@ -6,6 +6,8 @@ import tensorflow as tf import keras.backend as K import numpy as np +from copy import deepcopy + from models.model_IMC import build_model, load_weights from utils.config_utils import Config from utils.load_data import load_dataset @@ -45,7 +47,8 @@ def generate_model(data_files,cf,network_struct,sramInfo,FLAGS): IS_FL_MLP = FLAGS[3]; IDEAL_ABN = FLAGS[4] ABN_INC_ADC = FLAGS[5]; - FLAG_PL = FLAGS[6] + FLAG_PL = FLAGS[6]; + EN_SCALE = FLAGS[7]; # Weights file w_file = data_files[1]; # Retrieve resolution(s) @@ -56,7 +59,7 @@ def generate_model(data_files,cf,network_struct,sramInfo,FLAGS): print('Construct the Network(s)\n') # // Create ideal model // - model = build_model(cf,network_struct,sramInfo,EN_NOISE,[FLAG_PL,not(ANALOG_BN),IDEAL_ABN,ABN_INC_ADC]) + model = build_model(cf,network_struct,sramInfo,EN_NOISE,[FLAG_PL,not(ANALOG_BN),IDEAL_ABN,ABN_INC_ADC,EN_SCALE]) print('Loading data\n') train_data, val_data = load_dataset(cf.dataset_name) @@ -99,7 +102,6 @@ def generate_model(data_files,cf,network_struct,sramInfo,FLAGS): ### Numpy input quantization ### def quant_input(x,IAres): # Quantize between 0 and 2^IAres - print(x) m = pow(2,IAres); y = m*(x+1)/2; return K.clip(np.floor(y),0,m-1); @@ -124,7 +126,7 @@ def process_input(dataset,IS_FL_MLP,precisions): return(train_data,test_data); ### Train and evaluate model ### -def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): +def train_eval_model(data_files,model_template,precisions,input_data,Niter,SAVE_EN): # // Local variables // # Retrieve resolution(s) IAres = precisions[0] @@ -147,6 +149,8 @@ def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): acc_iter = []; acc_max = 0; best_model = None; for s in range(Niter): + # // Make a copy of the template model, resetting the weights for each new training + model = deepcopy(model_template); # // Create callbacks // print('Setting up the network and creating callbacks\n') early_stop = EarlyStopping(monitor='loss', min_delta=0.001, patience=10, mode='min', verbose=1) @@ -173,7 +177,7 @@ def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): batch_size = batch_size, epochs = epochs, verbose = progress_logging, - callbacks = [checkpoint, tensorboard,lr_decay], + callbacks = [early_stop,checkpoint,tensorboard,lr_decay], validation_split = 0.15, workers = 4, use_multiprocessing = True @@ -188,13 +192,6 @@ def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): # Get model weights and retrieve those of BN layers weights_temp = model.get_weights(); weightsTensorVec.append(weights_temp); - - # Get outputs of each layer_outputs - Nlayers = len(model.layers); - data_out = []; - for i in range(Nlayers): - partial_model = Model(model.input,model.layers[i].output); - data_out.append(partial_model(test_data[0],training=False)); # Print accuracy for this iteration acc_train = history.history['accuracy'][-1] @@ -224,22 +221,22 @@ def train_eval_model(data_files,model,precisions,input_data,Niter,SAVE_EN): fileID.write("{:.5f},{:.5f},{:.5f}\n".format(acc_mean[0],acc_mean[1],acc_mean[2])); fileID.close() # Save inputs - with open(in_file,"w") as f: - np.savetxt(f,np.reshape(test_data[0][0:Nimg_save],(-1,1)),fmt='%d'); - # Save outputs - Nlayers = len(best_model.layers); indL = 0; - for i in range(Nlayers): - # Get desired layer outputs - partial_model = Model(best_model.input,best_model.layers[i].output); - data_out = partial_model(test_data[0][0:Nimg_save],training=False); - # Write outputs to file, if ADC output only - #if(i==6 or i==7 or i==8): - # print(data_out) - if(i==2 or i==6 or i==9): - out_file_temp = out_file+"_layer_{}.txt".format(indL); - indL = indL+1; - with open(out_file_temp,"w") as f: - np.savetxt(f,np.reshape(data_out,(-1,1)),fmt='%f'); +# with open(in_file,"w") as f: +# np.savetxt(f,np.reshape(test_data[0][0:Nimg_save],(-1,1)),fmt='%d'); +# # Save outputs +# Nlayers = len(best_model.layers); indL = 0; +# for i in range(Nlayers): +# # Get desired layer outputs +# partial_model = Model(best_model.input,best_model.layers[i].output); +# data_out = partial_model(test_data[0][0:Nimg_save],training=False); +# # Write outputs to file, if ADC output only +# #if(i==6 or i==7 or i==8): +# # print(data_out) +# if(i==2 or i==6 or i==9): +# out_file_temp = out_file+"_layer_{}.txt".format(indL); +# indL = indL+1; +# with open(out_file_temp,"w") as f: +# np.savetxt(f,np.reshape(data_out,(-1,1)),fmt='%f'); # Save inference result with open(inference_file,"w") as f: indResult = np.argmax(test_data[1][0:Nimg_save],axis=-1); @@ -256,16 +253,16 @@ for r_gamma in r_gamma_vec: OAres = IAres; ########################## IN/OUT FILES ############################# # Fill output files name in and concat - acc_file = path_to_out+acc_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); - w_file = path_to_out+w_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); - in_file = path_to_out+in_file_template.format(dataset_name,IAres); - out_file = path_to_out+out_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); - inference_file = path_to_out+inference_file_template.format(dataset_name,network_struct,IAres,Wres,OAres,r_gamma,r_beta,Niter,ANALOG_BN,EN_NOISE); + acc_file = path_to_out+acc_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE); + w_file = path_to_out+w_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE); + in_file = path_to_out+in_file_template.format(dataset_name,cim_type,arch,IAres); + out_file = path_to_out+out_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE); + inference_file = path_to_out+inference_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE); data_files = [acc_file,w_file,in_file,out_file,inference_file]; ########################## GENERATE CIM-QNN MODEL ######################### # Concatenate flags - FLAGS = [SAVE_EN,EN_NOISE,ANALOG_BN,IS_FL_MLP,IDEAL_ABN,ABN_INC_ADC,FLAG_PL]; + FLAGS = [SAVE_EN,EN_NOISE,ANALOG_BN,IS_FL_MLP,IDEAL_ABN,ABN_INC_ADC,FLAG_PL,EN_SCALE]; # Generate hardware information sramInfo = SramInfo(arch,tech,typeT,VDD,BBN,BBP,IAres,Wres,OAres,r_gamma,r_beta,Nrows,[IS_EMBEDDED,ABN_INC_ADC]); sramInfo.simulator = simulator; diff --git a/utils/config_hardware_model.py b/utils/config_hardware_model.py index 8eb93a42dc9b32b098eb4379e3c53eb50c693ebf..0b37968f46b11110cbe85498a67b16bc84b3c6fa 100644 --- a/utils/config_hardware_model.py +++ b/utils/config_hardware_model.py @@ -394,6 +394,8 @@ class SramInfo_charge: # Size information self.Nrows = SpiceObj('Nrows',Nrows); self.C_unit = 36; + # Range of expected DP + self.NB = SpiceObj('NB',0); # [bits] # Supply information self.VDD = SpiceObj('VDD_VAL',VDD); # [V] self.GND = SpiceObj('GND_VAL',0); @@ -470,7 +472,8 @@ class SramInfo_charge: # // ABN information // # --- Hardware data --- # Supply voltage - self.Vmax_beta = 0.02; + # self.Vmax_beta = 0.02; + self.Vmax_beta = 0.05; # Timing self.T_ABN = SpiceObj(name='T_ADC',data=5e-9); # Load capacitance diff --git a/utils/load_data.py b/utils/load_data.py index 080556bc2e2315a5594ecc83c00d5a117a36b4fa..f37d083e50a40cd21233639eaefb5cc31b55066b 100644 --- a/utils/load_data.py +++ b/utils/load_data.py @@ -66,7 +66,7 @@ def load_dataset(dataset): #train_set = mnist(which_set="train", start=0, stop=train_set_size) #valid_set = mnist(which_set="train", start=train_set_size, stop=60000) #test_set = mnist(which_set="test") - path_to_file = './my_datasets/mnist.npz' + path_to_file = '../../cim_qnn_training/my_datasets/mnist.npz' (train_set_X,train_set_Y),(valid_set_X,valid_set_Y) = my_mnist.load_data(path_to_file) train_set_X = np.transpose(np.reshape(np.subtract(np.multiply(2. / 255., train_set_X), 1.), (-1, 1, 28, 28)),(0,2,3,1))