Skip to content
Extraits de code Groupes Projets
sw_to_chip.py 18 ko
Newer Older
  • Learn to ignore specific revisions
  • ###############################################################################################################################
    ###################### Map CIM-QNN inputs/weights/outputs from Python to SystemVerilog/Hardware ###############################
    ###############################################################################################################################
    import sys,os
    import h5py
    import numpy as np
    import tensorflow as tf
    from keras.models import load_model
    
    from ctypes import c_uint32, c_uint64
    
    from config.config_cim_cnn_param import*
    from layers.binary_ops import binarize as binarize
    
    from utils.config_hardware_model import SramInfo_charge as SramInfo
    
    from chip_files.create_C_header import create_C_header
    from chip_files.create_fpga_files import create_fpga_files
    
    #################################################
    ########## Local variables definition ###########
    #################################################
    # Img dimension
    H = dim;
    # Computing precision
    R_IN    = IAres;
    R_W     = Wres;
    R_OUT   = IAres;
    R_BETA  = r_beta;
    R_GAMMA = r_gamma;
    # Network length
    Nlayers = len(C_IN_VEC); 
    # Flags for test files generation
    OUT_EN = 0; # 1: output files per layer exist ; 0: they do not, prevent storage and comparison
    
    # Create CIMU structure
    sramInfo = SramInfo(arch,tech,typeT,VDD,BBN,BBP,IAres,Wres,OAres,r_gamma,r_beta,Nrows,[0,0]);     
    epsilon = 1e-8;
    
    ###################################################
    ########## Get files to map from config ###########
    ###################################################
    in_file  = path_to_out+in_file_template.format(dataset_name,cim_type,arch,IAres);
    out_file = path_to_out+out_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE);
    w_file   = path_to_out+w_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE);
    inference_file = path_to_out+inference_file_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,r_gamma,r_beta,Niter,EN_SCALE,ANALOG_BN,EN_NOISE);
    
    #################################################
    ########## Get files to store outputs ###########
    #################################################
    file_out_inputs     = path_to_chip+chip_in_template.format(dataset_name,network_struct,cim_type,arch,IAres);
    file_out_outputs    = path_to_chip+chip_out_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
    file_out_inference  = path_to_chip+chip_inference_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
    file_out_weights    = path_to_chip+chip_w_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
    file_out_gamma      = path_to_chip+chip_gamma_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
    file_out_beta       = path_to_chip+chip_beta_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
    file_out_weights_FP = path_to_chip+chip_w_FP_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
    file_out_gamma_FP   = path_to_chip+chip_gamma_FP_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
    file_out_beta_FP    = path_to_chip+chip_beta_FP_template.format(dataset_name,network_struct,cim_type,arch,IAres,Wres,OAres,EN_NOISE);
    
    ####################################################
    ########## Define files for FPGA storage ###########
    ####################################################
    file_fpga_inputs      = path_to_fpga+'inputs.mif';
    file_fpga_weights_cim = path_to_fpga+'weights_cim.mif';
    file_fpga_beta        = path_to_fpga+'beta_cim.mif';
    file_fpga_weights_FP  = path_to_fpga+'weights_FP.mif';
    file_fpga_inf_res     = path_to_fpga+'inf_results.mif';
    file_fpga_outputs     = path_to_fpga+'outputs.mif';
    
    ##########################################
    ########## Post-process data #############
    ##########################################
    # // Transform inputs sub-set into 32b words for SRAM encoding //
    C_IN = C_IN_VEC[0];
    with open(in_file,"r") as f:
      # Get inputs
      inputs  = np.genfromtxt(f,delimiter=" ");
      inputs  = np.reshape(inputs,(Nimg_save,-1));
      # Reshape depending upon the operation type
      if(OP_TYPE == "FC"):
          inputs = np.pad(inputs,((0,0),(0,C_IN_VEC[0]-np.shape(inputs)[-1])),mode="constant");
          inputs = inputs.flatten();
          if(C_IN*R_IN < 32):
              img_temp = np.reshape(inputs,(C_IN))[np.newaxis,:];
              int_img = np.dot(img_temp,2**np.arange(C_IN));
          else:
              img_temp = np.reshape(inputs,(-1,32//R_IN));
              int_img = np.dot(img_temp,2**np.arange(0,32,R_IN));
      elif(OP_TYPE == "CONV-1D"):
          img_temp = np.reshape(inputs,(-1,32//R_IN));
          int_img = np.dot(img_temp,2**np.arange(0,32,R_IN));
      elif(OP_TYPE == "CONV-2D"):
          img_temp = np.reshape(inputs,(-1,32//R_IN));
          int_img = np.dot(img_temp,2**np.arange(0,32,R_IN));
      else:
          print("Warning: operation mode not supported")
        
    # // Transform outputs sub-set into 32b words for SRAM encoding //
    if(OUT_EN):
      outputs_list = []; outputs_list_test = [];
      for i in range(Nlayers):
        C_OUT = C_OUT_VEC[i];
        # Get outputs (only ADC outputs)
        with open(out_file+"_layer_{}.txt".format(i),"r") as f:
          outputs = np.genfromtxt(f,delimiter=" ");
        # Store raw outputs for FP test
        outputs_list_test.append(np.int32(outputs));
        # Reshaping depending upon operation type
        if(OP_TYPE == "FC"):
          if(R_OUT*C_OUT <= 32):
              int_dout = np.array([np.dot(outputs,2**(R_OUT*np.arange(0,(R_OUT*C_OUT),R_OUT)))]);
          else:
              int_dout = np.dot(np.reshape(outputs,(-1,32//R_OUT)),2**np.arange(0,32,R_OUT));
        elif(OP_TYPE == "CONV-1D"):
          int_dout = np.dot(np.reshape(outputs,(-1,32//R_OUT)),2**(R_OUT*np.arange(32//R_OUT)));
        elif(OP_TYPE == "CONV-2D"):
          # Pad with zeros when necessary to fit memory size
          Npads = (R_OUT*C_OUT*(H-2*(i+1))*(H-2*(i+1)))%32;
          Npads = 0 if (Npads == 0) else (32//R_OUT-Npads);
          # Swap rows and columns
          data_out = np.reshape(outputs,(-1,H-2*(i+1),H-2*(i+1),C_OUT));
          data_out = np.swapaxes(data_out,1,2);
          data_out = data_out.reshape(-1);
          data_out = np.pad(data_out,(0,Npads),mode="constant");
          # Encode into 32b words
          int_dout = np.dot(np.reshape(data_out,(-1,32//R_OUT)),2**(R_OUT*np.arange(32//R_OUT)));
        else:
          print("Warning: operation type not supported !")
        # Add result to outputs list
        outputs_list.append(int_dout.astype("uint64"));
    
      for i in range(Nl_fp):
        # Get outputs
        with open(out_file+"_layer_{}.txt".format(Nlayers+i),"r") as f:
          outputs = np.genfromtxt(f,delimiter=" ");
          # Transform into FP
          outputs = np.int32(np.round(outputs*(2**16-1)*(2**15)/fS_beta_fp/fS_gamma_fp));
          # outputs = outputs*(2**15)*(2**15)/fS_beta_fp/fS_gamma_fp;
          # Append result
          outputs_list.append(outputs);
      
    # // Write inference results to destination //
    with open(inference_file,"r") as f:
      inf_results = np.genfromtxt(f,delimiter=" ");
    
    # // Get weights for each layer and quantize them //
    weights_list = []; weights_FP_list = [];
    gamma_list = []; beta_list = [];
    gamma_FP_list = []; beta_FP_list = [];
    c_in_vec = []; c_out_vec = [];
    Nlayers_cim = 0;  Nlayers_fp = 0;
    with h5py.File(w_file,"r") as f:
      # List all groups
      list_of_keys = list(f.keys())
      # print(list_of_keys)
      for key in list_of_keys:
        # // Different cases depending upon the layer type (key) //
        # CIM-QNN layer
        if(('cim_charge_conv2d' in key) or ('cim_charge_dense' in key)):
          dataset = f[key][key];
          local_keys = list(dataset.keys());
          w_data = dataset[local_keys[0]][()];
          # Binarize weights
          w_data = tf.cast((binarize(w_data,H=1.)+np.ones_like(w_data))/2,dtype="int32");
          # Get weights shape (detect FC or CONV)
          w_shape = tf.shape(w_data);
          if(len(w_shape)>1):
            w_data    = tf.reshape(w_data,(-1,w_shape[-1]));
            w_shape   = tf.shape(w_data);
          # Pad with zeros to reach the full array size
          w_data = np.pad(w_data,((0,Nrows-w_shape[0]),(0,Ncols-w_shape[1])));
          # Store layer dimensions
          c_in_vec.append(w_shape[-2]); c_out_vec.append(w_shape[-1]); 
          Nlayers_cim += 1;
          
          # Flatten weights in 32b words
          int_weights = np.dot(np.reshape(w_data,(-1,32)),2**np.arange(32));
          # Store T_dp and weights to output file
          weights_list.append(int_weights);
        # Full-precision dense or conv layer
        elif(('dense' in key) or ('conv' in key)):
          dataset = f[key][key];
          local_keys = list(dataset.keys());
          w_data = dataset[local_keys[0]][()];
          # Transform floating-point weights into full-precision signed int32
          w_data = np.round(w_data*(2**15)/fS_beta_fp);
          w_data = np.int32(w_data);
          # w_data = w_data*(2**15)/fS_beta_fp;
          # Store weights
          weights_FP_list.append(np.reshape(w_data,(-1,1)));
          # Count one more FP layer
          Nlayers_fp += 1;
        # Analog BN
        elif('analog_bn' in key):
          dataset = f[key][key];
          local_keys = list(dataset.keys());
          beta  = dataset[local_keys[0]][()];
          gamma = dataset[local_keys[1]][()];
          #m_sigma  = dataset[local_keys[2]][()]; # to be corrected with updated training, if necesseay
          m_sigma = 1;
          mov_mean = dataset[local_keys[2]][()];
          mov_var  = dataset[local_keys[3]][()];
          
          # // Retrieve hardware parameters //
          Vmax_beta = sramInfo.Vmax_beta;
          Vlsb_beta = Vmax_beta/2**(r_beta-1);
          
          # // Equivalent gain computation //
          # Target variance
          sigma_goal = VDD/m_sigma; var_goal = sigma_goal*sigma_goal;
          # Get custom renorm factors (single gain for all columns)
          mov_variance_DP_t = np.mean(mov_var)/var_goal;
          sigma_DP_t = np.sqrt(mov_variance_DP_t); 
          # Get equivalent coefficients
          gamma_eq = gamma/(sigma_DP_t + epsilon);
          # Get gamma encoding
          gamma_code = np.round(np.log2(gamma_eq));
          
          # // Equivalent offset computation //
          beta_eq = beta/gamma_eq - mov_mean;
          # Get beta encoding
          beta_code = np.round(beta_eq/Vlsb_beta);
          print(beta_code)
          
          # // Append gamma & beta configs (uint8, encoding done during C mapping) 
          gamma_list.append(gamma_code.astype("uint8"));
          beta_list.append(beta_code.astype("uint8"));
    
          
        # Full-precision BN
        elif('batch_normalization' in key):
          dataset = f[key][key];
          local_keys = list(dataset.keys());
          beta  = dataset[local_keys[0]][()];
          gamma = dataset[local_keys[1]][()];
          mov_mean = dataset[local_keys[2]][()];
          mov_var  = dataset[local_keys[3]][()];
          # Get equivalent coefficients
          mov_sig = np.sqrt(mov_var);
          gamma_eq = gamma/mov_sig;
          beta_eq  = beta/gamma_eq - mov_mean;
          # Transform floating-point result into full-precision signed int32
          beta_eq = np.int32(np.round(beta_eq/fS_beta_fp*(2**15)));
          gamma_eq = np.int32(np.round(gamma_eq/fS_gamma_fp*(2**16-1)));
          # beta_eq = beta_eq*(2**15)/fS_beta_fp;
          # gamma_eq = gamma_eq*(2**15)/fS_gamma_fp;
          
          # Store results
          gamma_FP_list.append(np.reshape(gamma_eq,(-1,1)));
          beta_FP_list.append(np.reshape(beta_eq,(-1,1)));
       # NOTHING TO DO FOR ACTIVATIONS/REGU LAYERS
       
    # // Verify full-precision conversion gives expected output (converted to 64b) //
    # print(np.shape(outputs_list_test[-1])); print(np.shape(weights_FP_list[0]));
    # print(np.shape(gamma_FP_list[0])); print(np.shape(beta_FP_list[0]));
    # print(np.shape(weights_FP_list[-1]))
    # print(np.reshape(weights_FP_list[-1],(-1,classes)))
    print("\n");
    #print("--- Operands ---");
    #print(np.reshape(outputs_list_test[-1],(Nimg_save,-1)));
    #print(np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10)));
    #print(outputs_list[-1]);
    
    ###################################################
    ########## Test FP output equivalence #############
    ###################################################
    
    if(OUT_EN):
      print("--- Computing FP MAC equivalence ---");
      temp_mac = np.squeeze(np.dot(np.reshape(outputs_list_test[-1],(Nimg_save,-1)),np.reshape(weights_FP_list[0],(-1,classes))));
      actual_mac = np.int32(np.round(np.squeeze(gamma_FP_list[-1])*(temp_mac+np.squeeze(beta_FP_list[-1]))));
      # actual_mac = np.uint64(np.squeeze(np.dot(np.reshape(outputs_list_test[-1],(Nimg_save,-1)),np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10)))));
      # actual_mac = np.squeeze(gamma_FP_list[0])*(np.squeeze(np.dot(np.reshape(outputs_list_test[-1],(Nimg_save,-1)),np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10))))+np.squeeze(beta_FP_list[0]));
      # expected_mac = np.uint64(np.round(outputs_list[-1]));
      # expected_mac = np.uint64(outputs_list[-1]*(2**31)/fS_beta_fp);
      expected_mac = outputs_list[-1];
      expected_mac = np.reshape(expected_mac,(Nimg_save,-1));
    
      #print("--- Operands ---");
      #print(np.reshape(outputs_list_test[-1],(Nimg_save,-1)));
      #print(np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10)));
      #print(outputs_list[-1]);
      # print(np.int32(actual_mac)); print(np.int32(expected_mac));
      print(actual_mac); print(expected_mac);
      print(outputs_list_test[-1]);
      # Detailed comptuation below
      in_FP = np.reshape(outputs_list_test[-1],(Nimg_save,-1));
      w_FP  = np.reshape(weights_FP_list[0],(C_OUT_VEC[-1],10));
      gamma_FP = gamma_FP_list[0]; beta_FP = beta_FP_list[0];
      mac_val = np.zeros((Nimg_save,10),dtype="int32");
      for m in range(Nimg_save):
        # Perform MAC operations
        for i in range(C_OUT_VEC[-1]):
          # Fetch input
          inputs = in_FP[m][i]; 
          for j in range(10):
            # Fetch weight
            weights = w_FP[i][j];
            # MAC operation
            mac_val[m][j] = mac_val[m][j] + inputs*weights;
            #if(m==0 and (i==0 or i==1)):
            if(m==0 and i<8 and j==0):
              print('Input {} is {}'.format(j,inputs));
              print('Weight {} is {}'.format(j,weights));
              print('DP {} at iter {} is {}'.format(j,i,mac_val[m][j]));
        # Print final DP value
        for j in range(10):
          if(m==0):
            print('DP result {} is {}'.format(j,mac_val[m][j]));
        # Perform BN operations
        for j in range(10):
          mac_val[m][j] = gamma_FP[j]*(mac_val[m][j]+beta_FP[j]);
          if(m==0):
            print('BN result {} is {}'.format(j,mac_val[m][j]));
    
      count_error = 0;
      for i in range(Nimg_save):
        for j in range(np.shape(actual_mac)[-1]):
          perc_error = 100*np.abs(np.int32(actual_mac[i,j]-expected_mac[i,j])/np.int32(expected_mac[i,j]));
          if(perc_error>1e-1):
            error_val = actual_mac[i,j]^expected_mac[i,j];
            count_error+=1;
            #print("Error for FP computation {}: {} ({:3f}%) !".format(i,hex(error_val),perc_error));
            print("Error for FP computation {}: {} instead of {} ({:3f}%) !".format(Nimg_save*i+j,hex(actual_mac[i,j]),hex(expected_mac[i,j]),perc_error));
    
    
      if(count_error == 0):
        print('All results are correct to 0.1%, congrats !');
      else:
        print('There were {} word errors found !'.format(count_error));
    
    else:
      print('Warning: output results not available after training, FP comparison bypassed');
    
    
    ####################################################
    ########## Store results to text files #############
    ####################################################
    # Inputs
    data_in = int_img.astype("uint32");
    np.savetxt(file_out_inputs,data_in,fmt='%x');
    # Outputs
    if(OUT_EN):
      cim_outputs = np.concatenate(outputs_list,axis=None).astype("uint64");
      for i in range(len(outputs_list)):
        if(i<Nlayers):
          np.savetxt(file_out_outputs+'_layer_{}.txt'.format(i),outputs_list[i].astype("uint64"),fmt='%x');
        else:
          np.savetxt(file_out_outputs+'_layer_{}.txt'.format(i),outputs_list[i].astype("uint64"),fmt='%x');
      
    # Inference results
    np.savetxt(file_out_inference,np.array([inf_results]).astype("uint64"),fmt='%x');
    # CIM weights
    weights_cim = np.concatenate(weights_list,axis=None).astype("uint64");
    np.savetxt(file_out_weights+'.txt',weights_cim,fmt='%x');
    # Gamma file
    gamma_cim = np.concatenate(gamma_list,axis=None);
    np.savetxt(file_out_gamma+'.txt',gamma_cim,fmt='%x');
    # Beta file
    beta_cim = np.concatenate(beta_list,axis=None);
    np.savetxt(file_out_beta+'.txt',beta_cim,fmt='%x');
    # FP FC/CONV weights
    weights_fp = np.concatenate(weights_FP_list,axis=None).astype("uint64");
    np.savetxt(file_out_weights_FP+'.txt',weights_fp,fmt='%x');
    # FP BN weights
    gamma_fp = np.concatenate(gamma_FP_list,axis=None).astype("uint64");
    beta_fp  = np.concatenate(beta_FP_list,axis=None).astype("uint64");
    np.savetxt(file_out_gamma_FP+'.txt',gamma_fp,fmt='%x'); 
    np.savetxt(file_out_beta_FP+'.txt',beta_fp,fmt='%x'); 
      
    ######################################################################
    ########## Generate final test files for on-chip testing #############
    ######################################################################
    # // Parameters folding //
    # Filenames
    filename_c    = path_to_chip+'./cim_config.h';
    filename_fpga = [file_fpga_inputs,file_fpga_weights_cim,file_fpga_beta,file_fpga_weights_FP,file_fpga_inf_res,file_fpga_outputs];
    # CNN info
    network_info = (Nlayers_cim,Nlayers_fp,Nimg_save);
    # CIM dimensions
    cim_dim       = (Nrows,Ncols,Nimg_save);
    # Precision/Channels/Timing
    D_VEC         = (dim,dim,c_in_vec,c_out_vec);
    P_VEC         = (R_IN,R_W,R_OUT,R_BETA,R_GAMMA);
    T_VEC         = (T_DP,T_PRE,T_MBIT,T_ADC);
    # Data for FPGA
    data_fpga = [data_in,weights_cim,beta_list,weights_fp,inf_results.astype("int32")];
    if(OUT_EN):
      data_fpga.append(cim_outputs);
    
    # // Generate C header file with hardware params //
    create_C_header(filename_c,network_info,cim_dim,D_VEC,P_VEC,T_VEC,gamma_cim,beta_fp,gamma_fp);
    # // Generate off-chip FPGA memory files //
    create_fpga_files(filename_fpga,network_info,cim_dim,D_VEC,P_VEC,data_fpga);
    
     
    print('///////////////////////////////////////////////////////');
    print('//////////////// FILES CONVERSION DONE ////////////////');
    print('///////////////////////////////////////////////////////');