Skip to content
Extraits de code Groupes Projets
Valider 4cfc683d rédigé par Adrian Kneip's avatar Adrian Kneip
Parcourir les fichiers

Add software to chip mapping scripts

parent f2093131
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
/*
*-----------------------------------
* Header file for CIM-QNN parameters
*-----------------------------------
*/
#define N_ROWS 1152
#define N_COLS 512
// Input img size
uint8_t H_IMG = 28;
uint8_t W_IMG = 28;
// Networks channels
uint16_t C_IN[2] = {9,72}
uint16_t C_OUT[2] = {8,16}
uint8_t C_IN_LOG[2] = {3,6}
uint8_t C_OUT_LOG[2] = {3,4}
// Computing precision
uint8_t R_IN = 1; uint8_t R_IN_LOG = 0;
uint8_t R_W = 1; uint8_t R_W_LOG = 0;
uint8_t R_OUT = 1; uint8_t R_OUT_LOG = 0;
uint8_t R_BETA = 5;
uint8_t R_GAMMA = 5;
// Timing configuration
uint8_t T_DP_CONF = 3;
uint8_t T_PRE_CONF = 3;
uint8_t T_MBIT_CONF = 3;
uint8_t T_ADC_CONF = 3;
uint8_t Nimg = 128;
// ABN CIM gain
uint8_t GAMMA[2] = {61,53};
// ABN FP parameters
uint32_t GAMMA_FP[2] = {61,53};
uint32_t BETA_FP[2][47] = {
{0xfffffffffffff01a,0xc79,0xffffffffffffe8e0,0xfffffffffffffef2,0xfffffffffffff91b,0xfffffffffffff6a9,0x5b8,0xfffffffffffffd0e,0xfffffffffffff4a3,0xfffffffffffff245,0xffffffffffffeea5,0xf04,0xfffffffffffff672,0xfffffffffffffc89,0xfffffffffffff74a,0xfffffffffffff9b4,0xfffffffffffffb62,0xfffffffffffff673,0xfffffffffffff130,0xe1b,0x1124,0xe06,0xd1e,0xffffffffffffec4f,0xffffffffffffff7f,0xfffffffffffffda8,0xfffffffffffff50f,0xffffffffffffee80,0x242,0x132,0xfffffffffffff64c,0xfffffffffffffd11,0xfffffffffffff21f,0xfffffffffffff268,0xfffffffffffff6a8,0xfffffffffffff757,0xfffffffffffff75d,0xe49,0xffffffffffffee5f,0xfffffffffffff54b,0xfffffffffffffbee,0xfffffffffffff851,0xfffffffffffff4c7,0xfffffffffffff9a8,0xfffffffffffffe62,0xfffffffffffffd99,0x46e},
{0xfffffffffffffc13,0xfffffffffffffa72,0xfffffffffffff122,0x34b,0xfffffffffffff97b,0x12b,0xfffffffffffff1a1,0xfffffffffffffa05,0xfffffffffffff4e5,0xb27,0xfffffffffffffeb0,0xffffffffffffedc9,0xffffffffffffefe7,0xfffffffffffffec8,0x2be,0xfffffffffffff819,0xffffffffffffff2f,0xfffffffffffff27a,0xfffffffffffffcf0,0xfffffffffffff16a,0xfffffffffffff967,0xfffffffffffff092,0x3b1,0x8,0xfffffffffffff1ce,0xfffffffffffff1cc,0x38a,0xffffffffffffeea1,0xfffffffffffff7c0,0xfffffffffffff189,0x79e,0xfffffffffffffbe9,0xfffffffffffffb22,0xfffffffffffff608,0x306,0xffffffffffffe93f,0xfffffffffffff4ed,0xfffffffffffffeb4,0xfffffffffffffe4e,0xfffffffffffffee9,0xfffffffffffffeb2,0xfffffffffffffe8d,0xffffffffffffff12,0xfffffffffffffe67,0xfffffffffffffe8e,0xfffffffffffffed8,0xfffffffffffffec4},
};
#####################################################################################
################ Write C header file dedicated to the CNN params ####################
#####################################################################################
import sys, os
import math
import numpy as np
def create_C_header(filename,network_info,cim_dim,D_VEC,P_VEC,TIME_CONF,GAMMA_VEC,BETA_FP_VEC,GAMMA_FP_VEC):
# // Retrieve variables //
# CNN network info
Nlayers_cim = network_info[0];
Nlayers_fp = network_info[1];
Nimg = network_info[2];
# CIM dims
N_ROWS = cim_dim[0];
N_COLS = cim_dim[1];
# Channels
H_IMG = D_VEC[0];
W_IMG = D_VEC[1];
C_IN = D_VEC[2];
C_OUT = D_VEC[3];
# Precisions
R_IN = P_VEC[0];
R_W = P_VEC[1];
R_OUT = P_VEC[2];
R_BETA = P_VEC[3];
R_GAMMA = P_VEC[4];
# Timings
T_DP = TIME_CONF[0];
T_PRE = TIME_CONF[1];
T_MBIT = TIME_CONF[2];
T_ADC = TIME_CONF[3];
# // Reshape FP beta-offset
BETA_FP_VEC = np.reshape(BETA_FP_VEC,(Nlayers_fp,-1));
Nbeta_fp = np.shape(BETA_FP_VEC)[-1];
# // Write header file //
# Open file
fileID = open(filename,'w');
# Header
fileID.write('/*\n');
fileID.write(' *-----------------------------------\n');
fileID.write(' * Header file for CIM-QNN parameters\n');
fileID.write(' *-----------------------------------\n');
fileID.write('*/\n');
fileID.write('\n');
# Pre-processor statements
fileID.write(f'#define N_ROWS {N_ROWS}\n');
fileID.write(f'#define N_COLS {N_COLS}\n');
fileID.write('\n');
# Input img size
fileID.write('// Input img size\n');
fileID.write(f'uint8_t H_IMG = {H_IMG};\n')
fileID.write(f'uint8_t W_IMG = {W_IMG};\n')
# Layers & channels
fileID.write('// Networks channels\n');
fileID.write(f'uint16_t C_IN[{Nlayers_cim}] = {{');
for i in range(len(C_IN)):
if(i == 0):
fileID.write(f'{C_IN[i]}');
else:
fileID.write(f',{C_IN[i]}');
fileID.write('}\n');
fileID.write(f'uint16_t C_OUT[{Nlayers_cim}] = {{');
for i in range(len(C_OUT)):
if(i == 0):
fileID.write(f'{C_OUT[i]}');
else:
fileID.write(f',{C_OUT[i]}');
fileID.write('}\n');
fileID.write(f'uint8_t C_IN_LOG[{Nlayers_cim}] = {{');
for i in range(len(C_IN)):
if(i == 0):
fileID.write(f'{int(math.log2(C_IN[i]))}');
else:
fileID.write(f',{int(math.log2(C_IN[i]))}');
fileID.write('}\n');
fileID.write(f'uint8_t C_OUT_LOG[{Nlayers_cim}] = {{');
for i in range(len(C_OUT)):
if(i == 0):
fileID.write(f'{int(math.log2(C_OUT[i]))}');
else:
fileID.write(f',{int(math.log2(C_OUT[i]))}');
fileID.write('}\n');
# Precision
fileID.write('// Computing precision\n');
fileID.write(f'uint8_t R_IN = {R_IN}; uint8_t R_IN_LOG = {int(math.log2(R_IN))};\n');
fileID.write(f'uint8_t R_W = {R_W}; uint8_t R_W_LOG = {int(math.log2(R_W))};\n');
fileID.write(f'uint8_t R_OUT = {R_OUT}; uint8_t R_OUT_LOG = {int(math.log2(R_OUT))};\n');
fileID.write(f'uint8_t R_BETA = {R_BETA};\n');
fileID.write(f'uint8_t R_GAMMA = {R_GAMMA};\n');
fileID.write('\n');
# Timing configs
fileID.write('// Timing configuration\n');
fileID.write(f'uint8_t T_DP_CONF = {T_DP};\n');
fileID.write(f'uint8_t T_PRE_CONF = {T_PRE};\n');
fileID.write(f'uint8_t T_MBIT_CONF = {T_MBIT};\n');
fileID.write(f'uint8_t T_ADC_CONF = {T_ADC};\n');
fileID.write('\n');
# Number of samples
fileID.write(f'uint8_t Nimg = {Nimg};\n');
fileID.write('\n');
# ABN params
fileID.write('// ABN CIM gain \n');
# Gain values
fileID.write(f'uint8_t GAMMA[{Nlayers_cim}] = {{');
for i in range(Nlayers_cim):
if(i==0):
fileID.write(f'{GAMMA_FP_VEC[i]}');
else:
fileID.write(f',{GAMMA_FP_VEC[i]}');
fileID.write(f'}};\n');
fileID.write('\n');
# ABN params
fileID.write('// ABN FP parameters\n');
# Gain values
fileID.write(f'uint32_t GAMMA_FP[{Nlayers_fp}] = {{');
for i in range(Nlayers_fp):
if(i==0):
fileID.write(f'{GAMMA_FP_VEC[i]}');
else:
fileID.write(f',{GAMMA_FP_VEC[i]}');
fileID.write(f'}};\n');
fileID.write('\n');
# Offsets value
fileID.write(f'uint32_t BETA_FP[{Nlayers_fp}][{Nbeta_fp}] = {{\n');
for i in range(Nlayers_fp):
fileID.write(f'{{');
for j in range(Nbeta_fp):
if(j==0):
fileID.write(f'{hex(BETA_FP_VEC[i][j])}');
else:
fileID.write(f',{hex(BETA_FP_VEC[i][j])}');
if(i==R_BETA-1):
fileID.write(f'}}\n');
else:
fileID.write(f'}},\n');
fileID.write(f'}};\n');
fileID.write('\n');
# Close file and return
fileID.close();
return;
\ No newline at end of file
################################################################################
################ Create FPGA files for on-chip test mapping ####################
################################################################################
import sys, os
import math
import numpy as np
from chip_files.make_mif_fpga import make_mif_fpga
def create_fpga_files(filename_vec,network_info,cim_dim,D_VEC,P_VEC,dataTensors):
# // Retrieve variables //
# CNN network info
Nlayers_cim = network_info[0];
Nlayers_fp = network_info[1];
# CIM dims
N_ROWS = cim_dim[0];
N_COLS = cim_dim[1];
# Channels
C_IN = D_VEC[2];
C_OUT = D_VEC[3];
# Precisions
R_IN = P_VEC[0];
R_W = P_VEC[1];
R_OUT = P_VEC[2];
R_BETA = P_VEC[3];
R_GAMMA = P_VEC[4];
# Data
data_in = dataTensors[0];
data_w_cim = dataTensors[1];
data_b_cim = dataTensors[2];
data_w_fp = dataTensors[3];
data_inf = dataTensors[4];
if(len(dataTensors) > 5):
data_out = dataTensors[5];
# Files to write
filename_in = filename_vec[0];
filename_w_cim = filename_vec[1];
filename_b_cim = filename_vec[2];
filename_w_fp = filename_vec[3];
filename_inf = filename_vec[4];
if(len(dataTensors) > 5):
filename_out = filename_vec[5];
# Internal variables
Ndata = np.shape(data_in); Ndata = Ndata[0];
# // Extract beta-offset bits to pack them coluwm-wise //
beta_conf_list = [];
for i in range(Nlayers_cim):
beta_conf_temp = np.expand_dims(data_b_cim[i].astype("uint8"),axis=-1);
beta_unpacked = np.flip(np.unpackbits(beta_conf_temp,axis=-1),axis=-1);
# swap axes
beta_unpacked = np.swapaxes(beta_unpacked,0,1);
# Repeat beta values in r_w cols
beta_unpacked = np.repeat(beta_unpacked,R_W,axis=-1);
if(R_W*C_OUT[i] < 32):
beta_unpacked = np.pad(beta_unpacked,((0,0),(0,32-R_W*C_OUT[i])));
beta_conf_temp = np.dot(np.reshape(beta_unpacked[:R_BETA,...],(-1,32)),2**np.arange(32));
beta_conf_list.append(beta_conf_temp);
#Stack results along a single dimension
data_b_cim = np.stack(beta_conf_list);
# // Write fpga test files //
make_mif_fpga(filename_in,data_in,32);
make_mif_fpga(filename_w_cim,data_w_cim,32);
make_mif_fpga(filename_b_cim,data_b_cim,32);
make_mif_fpga(filename_w_fp,data_w_fp,32);
make_mif_fpga(filename_inf,data_inf,8);
if(len(dataTensors) > 5):
make_mif_fpga(filename_out,data_out,32);
return;
\ No newline at end of file
DEPTH = 10;
WIDTH = 32;
ADDRESS_RADIX = DEC;
DATA_RADIX = HEX;
CONTENT
BEGIN
0 : 0x13;
1 : 0x13;
2 : 0x9;
3 : 0xbe;
4 : 0x3e;
5 : 0xadce;
6 : 0x1f26;
7 : 0x2ff6;
8 : 0x7d41;
9 : 0x2d00;
END;
DEPTH = 128;
WIDTH = 8;
ADDRESS_RADIX = DEC;
DATA_RADIX = HEX;
CONTENT
BEGIN
0 : 0x7;
1 : 0x2;
2 : 0x1;
3 : 0x0;
4 : 0x4;
5 : 0x1;
6 : 0x4;
7 : 0x9;
8 : 0x5;
9 : 0x9;
10 : 0x0;
11 : 0x6;
12 : 0x9;
13 : 0x0;
14 : 0x1;
15 : 0x5;
16 : 0x9;
17 : 0x7;
18 : 0x3;
19 : 0x4;
20 : 0x9;
21 : 0x6;
22 : 0x6;
23 : 0x5;
24 : 0x4;
25 : 0x0;
26 : 0x7;
27 : 0x4;
28 : 0x0;
29 : 0x1;
30 : 0x3;
31 : 0x1;
32 : 0x3;
33 : 0x4;
34 : 0x7;
35 : 0x2;
36 : 0x7;
37 : 0x1;
38 : 0x2;
39 : 0x1;
40 : 0x1;
41 : 0x7;
42 : 0x4;
43 : 0x2;
44 : 0x3;
45 : 0x5;
46 : 0x1;
47 : 0x2;
48 : 0x4;
49 : 0x4;
50 : 0x6;
51 : 0x3;
52 : 0x5;
53 : 0x5;
54 : 0x6;
55 : 0x0;
56 : 0x4;
57 : 0x1;
58 : 0x9;
59 : 0x5;
60 : 0x7;
61 : 0x8;
62 : 0x9;
63 : 0x3;
64 : 0x7;
65 : 0x4;
66 : 0x6;
67 : 0x4;
68 : 0x3;
69 : 0x0;
70 : 0x7;
71 : 0x0;
72 : 0x2;
73 : 0x9;
74 : 0x1;
75 : 0x7;
76 : 0x3;
77 : 0x2;
78 : 0x9;
79 : 0x7;
80 : 0x7;
81 : 0x6;
82 : 0x2;
83 : 0x7;
84 : 0x8;
85 : 0x4;
86 : 0x7;
87 : 0x3;
88 : 0x6;
89 : 0x1;
90 : 0x3;
91 : 0x6;
92 : 0x9;
93 : 0x3;
94 : 0x1;
95 : 0x4;
96 : 0x1;
97 : 0x7;
98 : 0x6;
99 : 0x9;
100 : 0x6;
101 : 0x0;
102 : 0x5;
103 : 0x4;
104 : 0x9;
105 : 0x9;
106 : 0x2;
107 : 0x1;
108 : 0x9;
109 : 0x4;
110 : 0x8;
111 : 0x7;
112 : 0x3;
113 : 0x9;
114 : 0x7;
115 : 0x4;
116 : 0x4;
117 : 0x4;
118 : 0x9;
119 : 0x2;
120 : 0x5;
121 : 0x4;
122 : 0x7;
123 : 0x6;
124 : 0x7;
125 : 0x9;
126 : 0x0;
127 : 0x5;
END;
Ce diff est replié.
Ce diff est replié.
Ce diff est replié.
############################################################################
################ Write input arrays to MIF format files ####################
############################################################################
import sys, os
import math
import numpy as np
def make_mif_fpga(filename,dataTensor,bitwidth):
# // Internal variables
Ndepth = np.size(dataTensor);
dataTensor = np.reshape(dataTensor,(-1,));
# // Write file
fileID = open(filename,'w');
# Dimensions
fileID.write(f'DEPTH = {Ndepth};\n',);
fileID.write(f'WIDTH = {bitwidth};\n');
fileID.write('\n');
fileID.write('ADDRESS_RADIX = DEC;\n');
fileID.write('DATA_RADIX = HEX;\n');
fileID.write('\n');
fileID.write('CONTENT\n');
fileID.write('BEGIN\n');
# Write data
for i in range(Ndepth):
fileID.write(f'{i} : {hex(dataTensor[i])};\n');
fileID.write('END;\n');
# close document
fileID.close();
##############################################################################
################ This file sets the tbench for the CIM up ####################
##############################################################################
import os, sys
import subprocess
import numpy as np
from create_C_header import create_header
from genRandomData import genRandomData
### Local variables ###
# // Hardware parameters
# CIM size
Nrows = 1152;
Ncols = 256;
# Image dimensions
Himg = 28;
Wimg = Himg;
# Target layer size
C_in = 1024;
C_out = 32;
# Target precision
r_in = 1;
r_w = 1;
r_out = 8;
# Timing config
T_DP = 2;
T_PRE = 2;
T_MBIT = 2;
T_ADC = 2;
# ABN vectors
r_beta = 5;
r_gamma = 8;
GAMMA_VEC = np.arange(1,r_gamma);
BETA_VEC = 2**np.arange(r_beta)-1;
# // Target data distributions & data
dist_in = 'normal';
dist_w = 'uniform';
Ndata = 50;
# // Path to output file
filename_h = "./outputs/cim_config.h";
filename_o = "./outputs/TF_DP_in.txt";
### Create data distributions ###
cim_info = (Nrows,Ncols,r_in,C_in,r_out,C_out);
dataVec = genRandomData(cim_info,dist_in,dist_w,Ndata);
### Generate test files ###
filename_vec = (filename_h,filename_o);
cim_dim = (Nrows,Ncols);
D_VEC = (Himg,Wimg,C_in,C_out);
P_VEC = (r_in,r_w,r_out,r_beta,r_gamma);
T_VEC = (T_DP,T_PRE,T_MBIT,T_ADC);
create_header(filename_vec,cim_dim,D_VEC,P_VEC,T_VEC,BETA_VEC,GAMMA_VEC,dataVec);
# // C header has been created, C file is ready for execution //
print("/// Done writing setup files, ready for flashing ///");
###############################################################################################################################
###################### Map CIM-QNN inputs/weights/outputs from Python to SystemVerilog/Hardware ###############################
###############################################################################################################################
import sys,os
import h5py
import numpy as np
import tensorflow as tf
from keras.models import load_model
from ctypes import c_uint32, c_uint64
from config.config_cim_cnn_param import*
from layers.binary_ops import binarize as binarize
from utils.config_hardware_model import SramInfo_charge as SramInfo
from chip_files.create_C_header import create_C_header
from chip_files.create_fpga_files import create_fpga_files
#################################################
########## Local variables definition ###########
#################################################
# Img dimension
H = dim;
# Computing precision
R_IN = IAres;
R_W = Wres;
R_OUT = IAres;
R_BETA = r_beta;
R_GAMMA = r_gamma;
# Network length
Nlayers = len(C_IN_VEC);
# Flags for test files generation
OUT_EN = 0; # 1: output files per layer exist ; 0: they do not, prevent storage and comparison
# Create CIMU structure
sramInfo = SramInfo(arch,tech,typeT,VDD,BBN,BBP,IAres,Wres,OAres,r_gamma,r_beta,Nrows,[0,0]);
epsilon = 1e-8;
###################################################
########## Get files to map from config ###########
###################################################
in_file = path_to_out+in_file_template.format(dataset_name,cim_type,arch,IAres);
out_file = path_to_out+out_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE);
w_file = path_to_out+w_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE);
inference_file = path_to_out+inference_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE);
#################################################
########## Get files to store outputs ###########
#################################################
file_out_inputs = path_to_chip+chip_in_template.format(dataset_name,network_struct,cim_type,arch,IAres);
file_out_outputs = path_to_chip+chip_out_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
file_out_inference = path_to_chip+chip_inference_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
file_out_weights = path_to_chip+chip_w_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
file_out_gamma = path_to_chip+chip_gamma_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
file_out_beta = path_to_chip+chip_beta_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
file_out_weights_FP = path_to_chip+chip_w_FP_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
file_out_gamma_FP = path_to_chip+chip_gamma_FP_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
file_out_beta_FP = path_to_chip+chip_beta_FP_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
####################################################
########## Define files for FPGA storage ###########
####################################################
file_fpga_inputs = path_to_fpga+'inputs.mif';
file_fpga_weights_cim = path_to_fpga+'weights_cim.mif';
file_fpga_beta = path_to_fpga+'beta_cim.mif';
file_fpga_weights_FP = path_to_fpga+'weights_FP.mif';
file_fpga_inf_res = path_to_fpga+'inf_results.mif';
file_fpga_outputs = path_to_fpga+'outputs.mif';
##########################################
########## Post-process data #############
##########################################
# // Transform inputs sub-set into 32b words for SRAM encoding //
C_IN = C_IN_VEC[0];
with open(in_file,"r") as f:
# Get inputs
inputs = np.genfromtxt(f,delimiter=" ");
inputs = np.reshape(inputs,(Nimg_save,-1));
# Reshape depending upon the operation type
if(OP_TYPE == "FC"):
inputs = np.pad(inputs,((0,0),(0,C_IN_VEC[0]-np.shape(inputs)[-1])),mode="constant");
inputs = inputs.flatten();
if(C_IN*R_IN < 32):
img_temp = np.reshape(inputs,(C_IN))[np.newaxis,:];
int_img = np.dot(img_temp,2**np.arange(C_IN));
else:
img_temp = np.reshape(inputs,(-1,32//R_IN));
int_img = np.dot(img_temp,2**np.arange(0,32,R_IN));
elif(OP_TYPE == "CONV-1D"):
img_temp = np.reshape(inputs,(-1,32//R_IN));
int_img = np.dot(img_temp,2**np.arange(0,32,R_IN));
elif(OP_TYPE == "CONV-2D"):
img_temp = np.reshape(inputs,(-1,32//R_IN));
int_img = np.dot(img_temp,2**np.arange(0,32,R_IN));
else:
print("Warning: operation mode not supported")
# // Transform outputs sub-set into 32b words for SRAM encoding //
if(OUT_EN):
outputs_list = []; outputs_list_test = [];
for i in range(Nlayers):
C_OUT = C_OUT_VEC[i];
# Get outputs (only ADC outputs)
with open(out_file+"_layer_{}.txt".format(i),"r") as f:
outputs = np.genfromtxt(f,delimiter=" ");
# Store raw outputs for FP test
outputs_list_test.append(np.int32(outputs));
# Reshaping depending upon operation type
if(OP_TYPE == "FC"):
if(R_OUT*C_OUT <= 32):
int_dout = np.array([np.dot(outputs,2**(R_OUT*np.arange(0,(R_OUT*C_OUT),R_OUT)))]);
else:
int_dout = np.dot(np.reshape(outputs,(-1,32//R_OUT)),2**np.arange(0,32,R_OUT));
elif(OP_TYPE == "CONV-1D"):
int_dout = np.dot(np.reshape(outputs,(-1,32//R_OUT)),2**(R_OUT*np.arange(32//R_OUT)));
elif(OP_TYPE == "CONV-2D"):
# Pad with zeros when necessary to fit memory size
Npads = (R_OUT*C_OUT*(H-2*(i+1))*(H-2*(i+1)))%32;
Npads = 0 if (Npads == 0) else (32//R_OUT-Npads);
# Swap rows and columns
data_out = np.reshape(outputs,(-1,H-2*(i+1),H-2*(i+1),C_OUT));
data_out = np.swapaxes(data_out,1,2);
data_out = data_out.reshape(-1);
data_out = np.pad(data_out,(0,Npads),mode="constant");
# Encode into 32b words
int_dout = np.dot(np.reshape(data_out,(-1,32//R_OUT)),2**(R_OUT*np.arange(32//R_OUT)));
else:
print("Warning: operation type not supported !")
# Add result to outputs list
outputs_list.append(int_dout.astype("uint64"));
for i in range(Nl_fp):
# Get outputs
with open(out_file+"_layer_{}.txt".format(Nlayers+i),"r") as f:
outputs = np.genfromtxt(f,delimiter=" ");
# Transform into FP
outputs = np.int32(np.round(outputs*(2**16-1)*(2**15)/fS_beta_fp/fS_gamma_fp));
# outputs = outputs*(2**15)*(2**15)/fS_beta_fp/fS_gamma_fp;
# Append result
outputs_list.append(outputs);
# // Write inference results to destination //
with open(inference_file,"r") as f:
inf_results = np.genfromtxt(f,delimiter=" ");
# // Get weights for each layer and quantize them //
weights_list = []; weights_FP_list = [];
gamma_list = []; beta_list = [];
gamma_FP_list = []; beta_FP_list = [];
c_in_vec = []; c_out_vec = [];
Nlayers_cim = 0; Nlayers_fp = 0;
with h5py.File(w_file,"r") as f:
# List all groups
list_of_keys = list(f.keys())
# print(list_of_keys)
for key in list_of_keys:
# // Different cases depending upon the layer type (key) //
# CIM-QNN layer
if(('cim_charge_conv2d' in key) or ('cim_charge_dense' in key)):
dataset = f[key][key];
local_keys = list(dataset.keys());
w_data = dataset[local_keys[0]][()];
# Binarize weights
w_data = tf.cast((binarize(w_data,H=1.)+np.ones_like(w_data))/2,dtype="int32");
# Get weights shape (detect FC or CONV)
w_shape = tf.shape(w_data);
if(len(w_shape)>1):
w_data = tf.reshape(w_data,(-1,w_shape[-1]));
w_shape = tf.shape(w_data);
# Pad with zeros to reach the full array size
w_data = np.pad(w_data,((0,Nrows-w_shape[0]),(0,Ncols-w_shape[1])));
# Store layer dimensions
c_in_vec.append(w_shape[-2]); c_out_vec.append(w_shape[-1]);
Nlayers_cim += 1;
# Flatten weights in 32b words
int_weights = np.dot(np.reshape(w_data,(-1,32)),2**np.arange(32));
# Store T_dp and weights to output file
weights_list.append(int_weights);
# Full-precision dense or conv layer
elif(('dense' in key) or ('conv' in key)):
dataset = f[key][key];
local_keys = list(dataset.keys());
w_data = dataset[local_keys[0]][()];
# Transform floating-point weights into full-precision signed int32
w_data = np.round(w_data*(2**15)/fS_beta_fp);
w_data = np.int32(w_data);
# w_data = w_data*(2**15)/fS_beta_fp;
# Store weights
weights_FP_list.append(np.reshape(w_data,(-1,1)));
# Count one more FP layer
Nlayers_fp += 1;
# Analog BN
elif('analog_bn' in key):
dataset = f[key][key];
local_keys = list(dataset.keys());
beta = dataset[local_keys[0]][()];
gamma = dataset[local_keys[1]][()];
#m_sigma = dataset[local_keys[2]][()]; # to be corrected with updated training, if necesseay
m_sigma = 1;
mov_mean = dataset[local_keys[2]][()];
mov_var = dataset[local_keys[3]][()];
# // Retrieve hardware parameters //
Vmax_beta = sramInfo.Vmax_beta;
Vlsb_beta = Vmax_beta/2**(r_beta-1);
# // Equivalent gain computation //
# Target variance
sigma_goal = VDD/m_sigma; var_goal = sigma_goal*sigma_goal;
# Get custom renorm factors (single gain for all columns)
mov_variance_DP_t = np.mean(mov_var)/var_goal;
sigma_DP_t = np.sqrt(mov_variance_DP_t);
# Get equivalent coefficients
gamma_eq = gamma/(sigma_DP_t + epsilon);
# Get gamma encoding
gamma_code = np.round(np.log2(gamma_eq));
# // Equivalent offset computation //
beta_eq = beta/gamma_eq - mov_mean;
# Get beta encoding
beta_code = np.round(beta_eq/Vlsb_beta);
print(beta_code)
# // Append gamma & beta configs (uint8, encoding done during C mapping)
gamma_list.append(gamma_code.astype("uint8"));
beta_list.append(beta_code.astype("uint8"));
# Full-precision BN
elif('batch_normalization' in key):
dataset = f[key][key];
local_keys = list(dataset.keys());
beta = dataset[local_keys[0]][()];
gamma = dataset[local_keys[1]][()];
mov_mean = dataset[local_keys[2]][()];
mov_var = dataset[local_keys[3]][()];
# Get equivalent coefficients
mov_sig = np.sqrt(mov_var);
gamma_eq = gamma/mov_sig;
beta_eq = beta/gamma_eq - mov_mean;
# Transform floating-point result into full-precision signed int32
beta_eq = np.int32(np.round(beta_eq/fS_beta_fp*(2**15)));
gamma_eq = np.int32(np.round(gamma_eq/fS_gamma_fp*(2**16-1)));
# beta_eq = beta_eq*(2**15)/fS_beta_fp;
# gamma_eq = gamma_eq*(2**15)/fS_gamma_fp;
# Store results
gamma_FP_list.append(np.reshape(gamma_eq,(-1,1)));
beta_FP_list.append(np.reshape(beta_eq,(-1,1)));
# NOTHING TO DO FOR ACTIVATIONS/REGU LAYERS
# // Verify full-precision conversion gives expected output (converted to 64b) //
# print(np.shape(outputs_list_test[-1])); print(np.shape(weights_FP_list[0]));
# print(np.shape(gamma_FP_list[0])); print(np.shape(beta_FP_list[0]));
# print(np.shape(weights_FP_list[-1]))
# print(np.reshape(weights_FP_list[-1],(-1,classes)))
print("\n");
#print("--- Operands ---");
#print(np.reshape(outputs_list_test[-1],(Nimg_save,-1)));
#print(np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10)));
#print(outputs_list[-1]);
###################################################
########## Test FP output equivalence #############
###################################################
if(OUT_EN):
print("--- Computing FP MAC equivalence ---");
temp_mac = np.squeeze(np.dot(np.reshape(outputs_list_test[-1],(Nimg_save,-1)),np.reshape(weights_FP_list[0],(-1,classes))));
actual_mac = np.int32(np.round(np.squeeze(gamma_FP_list[-1])*(temp_mac+np.squeeze(beta_FP_list[-1]))));
# actual_mac = np.uint64(np.squeeze(np.dot(np.reshape(outputs_list_test[-1],(Nimg_save,-1)),np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10)))));
# actual_mac = np.squeeze(gamma_FP_list[0])*(np.squeeze(np.dot(np.reshape(outputs_list_test[-1],(Nimg_save,-1)),np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10))))+np.squeeze(beta_FP_list[0]));
# expected_mac = np.uint64(np.round(outputs_list[-1]));
# expected_mac = np.uint64(outputs_list[-1]*(2**31)/fS_beta_fp);
expected_mac = outputs_list[-1];
expected_mac = np.reshape(expected_mac,(Nimg_save,-1));
#print("--- Operands ---");
#print(np.reshape(outputs_list_test[-1],(Nimg_save,-1)));
#print(np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10)));
#print(outputs_list[-1]);
# print(np.int32(actual_mac)); print(np.int32(expected_mac));
print(actual_mac); print(expected_mac);
print(outputs_list_test[-1]);
# Detailed comptuation below
in_FP = np.reshape(outputs_list_test[-1],(Nimg_save,-1));
w_FP = np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10));
gamma_FP = gamma_FP_list[0]; beta_FP = beta_FP_list[0];
mac_val = np.zeros((Nimg_save,10),dtype="int32");
for m in range(Nimg_save):
# Perform MAC operations
for i in range(C_OUT_VEC[-1]):
# Fetch input
inputs = in_FP[m][i];
for j in range(10):
# Fetch weight
weights = w_FP[i][j];
# MAC operation
mac_val[m][j] = mac_val[m][j] + inputs*weights;
#if(m==0 and (i==0 or i==1)):
if(m==0 and i<8 and j==0):
print('Input {} is {}'.format(j,inputs));
print('Weight {} is {}'.format(j,weights));
print('DP {} at iter {} is {}'.format(j,i,mac_val[m][j]));
# Print final DP value
for j in range(10):
if(m==0):
print('DP result {} is {}'.format(j,mac_val[m][j]));
# Perform BN operations
for j in range(10):
mac_val[m][j] = gamma_FP[j]*(mac_val[m][j]+beta_FP[j]);
if(m==0):
print('BN result {} is {}'.format(j,mac_val[m][j]));
count_error = 0;
for i in range(Nimg_save):
for j in range(np.shape(actual_mac)[-1]):
perc_error = 100*np.abs(np.int32(actual_mac[i,j]-expected_mac[i,j])/np.int32(expected_mac[i,j]));
if(perc_error>1e-1):
error_val = actual_mac[i,j]^expected_mac[i,j];
count_error+=1;
#print("Error for FP computation {}: {} ({:3f}%) !".format(i,hex(error_val),perc_error));
print("Error for FP computation {}: {} instead of {} ({:3f}%) !".format(Nimg_save*i+j,hex(actual_mac[i,j]),hex(expected_mac[i,j]),perc_error));
if(count_error == 0):
print('All results are correct to 0.1%, congrats !');
else:
print('There were {} word errors found !'.format(count_error));
else:
print('Warning: output results not available after training, FP comparison bypassed');
####################################################
########## Store results to text files #############
####################################################
# Inputs
data_in = int_img.astype("uint32");
np.savetxt(file_out_inputs,data_in,fmt='%x');
# Outputs
if(OUT_EN):
cim_outputs = np.concatenate(outputs_list,axis=None).astype("uint64");
for i in range(len(outputs_list)):
if(i<Nlayers):
np.savetxt(file_out_outputs+'_layer_{}.txt'.format(i),outputs_list[i].astype("uint64"),fmt='%x');
else:
np.savetxt(file_out_outputs+'_layer_{}.txt'.format(i),outputs_list[i].astype("uint64"),fmt='%x');
# Inference results
np.savetxt(file_out_inference,np.array([inf_results]).astype("uint64"),fmt='%x');
# CIM weights
weights_cim = np.concatenate(weights_list,axis=None).astype("uint64");
np.savetxt(file_out_weights+'.txt',weights_cim,fmt='%x');
# Gamma file
gamma_cim = np.concatenate(gamma_list,axis=None);
np.savetxt(file_out_gamma+'.txt',gamma_cim,fmt='%x');
# Beta file
beta_cim = np.concatenate(beta_list,axis=None);
np.savetxt(file_out_beta+'.txt',beta_cim,fmt='%x');
# FP FC/CONV weights
weights_fp = np.concatenate(weights_FP_list,axis=None).astype("uint64");
np.savetxt(file_out_weights_FP+'.txt',weights_fp,fmt='%x');
# FP BN weights
gamma_fp = np.concatenate(gamma_FP_list,axis=None).astype("uint64");
beta_fp = np.concatenate(beta_FP_list,axis=None).astype("uint64");
np.savetxt(file_out_gamma_FP+'.txt',gamma_fp,fmt='%x');
np.savetxt(file_out_beta_FP+'.txt',beta_fp,fmt='%x');
######################################################################
########## Generate final test files for on-chip testing #############
######################################################################
# // Parameters folding //
# Filenames
filename_c = path_to_chip+'./cim_config.h';
filename_fpga = [file_fpga_inputs,file_fpga_weights_cim,file_fpga_beta,file_fpga_weights_FP,file_fpga_inf_res,file_fpga_outputs];
# CNN info
network_info = (Nlayers_cim,Nlayers_fp,Nimg_save);
# CIM dimensions
cim_dim = (Nrows,Ncols,Nimg_save);
# Precision/Channels/Timing
D_VEC = (dim,dim,c_in_vec,c_out_vec);
P_VEC = (R_IN,R_W,R_OUT,R_BETA,R_GAMMA);
T_VEC = (T_DP,T_PRE,T_MBIT,T_ADC);
# Data for FPGA
data_fpga = [data_in,weights_cim,beta_list,weights_fp,inf_results.astype("int32")];
if(OUT_EN):
data_fpga.append(cim_outputs);
# // Generate C header file with hardware params //
create_C_header(filename_c,network_info,cim_dim,D_VEC,P_VEC,T_VEC,gamma_cim,beta_fp,gamma_fp);
# // Generate off-chip FPGA memory files //
create_fpga_files(filename_fpga,network_info,cim_dim,D_VEC,P_VEC,data_fpga);
print('///////////////////////////////////////////////////////');
print('//////////////// FILES CONVERSION DONE ////////////////');
print('///////////////////////////////////////////////////////');
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter