2010年7月30日 星期五

BIST

在現今的SOC系統驗證上,都會加入BIST的電路,除了能加快Debug 的速度跟Error correlation 的機制來做到自我校正的功能, 不過一般最常用在Memory testing上,因為Memory 的排列排列方式具有規則性, 分成 bank, page... location, Method read/write, 可以很簡單的用 Fault table 做查表把Fault location 的位置mapping到其他的記憶體位置.也可以加入Row / Column Detection 把 Fault的 值做 update. 底下我們用最簡單的 Fault Table 來實現. sample code bist.cpp

#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <boost/crc.hpp>
#include <sstream>
#include <string>

#define MEMDEEP 10

bool Debug = false; 

#ifdef  DEBUG
Debug =true;
#endif
 
using namespace std;

struct MEM {
 unsigned int Addr;
 unsigned int Val;
 unsigned int CkSum;
 unsigned int Crc32;
 unsigned int Status;
};

struct ErrTable{
 unsigned int Org_Addr;
 unsigned int Nw_Addr; 
};
 
void  ProWriteMEM(struct MEM *Ptr,unsigned int Addr,unsigned int Val);
void  ProReadMEM(struct MEM *Ptr,unsigned int Addr,unsigned int *Val);
void  ProCheckErrTable();
unsigned ProCheckSum(unsigned int x);

//=====================================
// Num of 1's counts
//=====================================
unsigned int ProCheckSum(unsigned x) {   
        x = x - ((x >> 1) & 0x55555555);    
        x = (x & 0x33333333) + ((x >> 2) & 0x33333333);    
        x = (x + (x >> 4)) & 0x0F0F0F0F;    
        x = x + (x >> 8);    
        x = x + (x >> 16);    
        return x & 0x0000003F;    
} 

//=====================================
// CRC32 Check
//=====================================
unsigned int GetCrc32(const string my_string) {
    boost::crc_32_type result;
    result.process_bytes(my_string.data(), my_string.length());
    return result.checksum();
}

void ProWriteMEM(struct MEM *Ptr,unsigned int Addr,unsigned int Val){

  Ptr->Addr  = Addr;
  Ptr->Val   = Val;
  Ptr->CkSum = ProCheckSum(Val);
  string s;
  stringstream out; 
  out << Val;
  s = out.str();
  Ptr->Crc32 = GetCrc32(s);   
}

void ProReadMEM(struct MEM *Ptr,unsigned int Addr,unsigned int *Val){
   // Ptr->Addr
   *Val = Ptr->Val;
}

void ProWriteErrTable(struct ErrTable *Ptr,unsigned int Org,unsigned int Nw){
   Ptr->Org_Addr  = Org;
   Ptr->Nw_Addr   = Nw;   
}

void ProReadErrTable(struct ErrTable *Ptr,unsigned int *Org,unsigned int *Nw){
   *Org = Ptr->Org_Addr;
   *Nw  = Ptr->Nw_Addr;
}

int CheckErrTable(struct ErrTable *Ptr[],unsigned int Org,unsigned int *Nw){
  int i,r;
  unsigned int Org_Addr,Nw_Addr;

  for(i=0; i<MEMDEEP; i++){
      ProReadErrTable(Ptr[i],&Org_Addr,&Nw_Addr);
      if(Debug ==true){
        printf("Table ID:: %d, Org_Addr ::%x, Nw_Addr ::%x\n",i,Org_Addr,Nw_Addr);
     }
     
     if( Org == Org_Addr && Org_Addr != Nw_Addr ){
         *Nw = Nw_Addr;
          r= 0;
          break;
      }else{
          r= -1;
      }
   }
   return r;
}





int main(int argc,char *argv[]){

   struct MEM   *MEMPtr[MEMDEEP];
   struct ErrTable *ErrPtr[MEMDEEP];

   int i;
   for(i=0; i<MEMDEEP; i++){
       MEMPtr[i] = (struct MEM      *) malloc( sizeof(struct MEM     ) );
       ErrPtr[i] = (struct ErrTable *) malloc( sizeof(struct ErrTable) );
  }

  //Initial Set 2 MEM
   ProWriteMEM(MEMPtr[0],0x00000000,0);
   ProWriteMEM(MEMPtr[1],0x00000004,1);
   ProWriteMEM(MEMPtr[2],0x00000008,2);
   ProWriteMEM(MEMPtr[3],0x0000000c,3);
   ProWriteMEM(MEMPtr[4],0x00000010,4);


  //Set the Fault Loc @ ErrTable
   ProWriteErrTable(ErrPtr[0],0x00000008,0x00000018);
   ProWriteMEM(MEMPtr[5],0x00000018,2);
  
   ProWriteErrTable(ErrPtr[1],0x0000000c,0x00000001c);
   ProWriteMEM(MEMPtr[6],0x0000001c,3);
  
   //Check Fault Loc exists or not
   unsigned int Org_Addr,Nw_Addr,rst;
   Org_Addr = 0x00000008;

   int c = CheckErrTable(ErrPtr,Org_Addr,&Nw_Addr);
   cout <<c<<endl;
   if( c != -1 ){   
    for(i=MEMDEEP>>1; i<MEMDEEP; i++){
        if( MEMPtr[i]->Addr == Nw_Addr ){ 
            ProReadMEM(MEMPtr[i],Nw_Addr,&rst); 
            printf("Find Fault @ %x -> %x :: data %d\n",Org_Addr,Nw_Addr,MEMPtr[i]->Val);
        }

    }
  }


   free(*MEMPtr);
   free(*ErrPtr);
return 0;
}
compile
g++ -I/${boost_lib} -o bist bist.cpp
Refs: CRC32 Boost c++ Check number of 1's counts c++ int2string code download here

2010年7月23日 星期五

Bus communication with thread emulator @c

底下我們利用 "pthread" 的方式模擬 Bus 的 communication, 因為還是在 High Level Architecture view, 所以在Protocol 部份我們用 Package 來傳輸, 可節省實際在設計上的複雜度,也可以加速驗證的流程. sample part @ c
void *AHB2APB_MASTER_DO(void *t){
    long my_id = (long)t;

    int i;
    srand ( time(NULL) );
  
 for(;;){
     AHB2APB_MASTER_MESSAGE_INFO(MS_CUR_ST);

    switch(MS_CUR_ST){
        case BG_MS_IDLE     : 
                               WT_SEL = ( rand()%2 == 0)? AMBA_ON : AMBA_OFF;
                               MS_NXT_ST = BG_MS_REQ;     
                               break;
 
        case BG_MS_REQ      :  MS_NXT_ST = BG_MS_NONSEQ; 
                               break;

        case BG_MS_NONSEQ   :  pthread_mutex_lock(&count_mutex);
                               pthread_cond_signal(&master2slave); 
                               pthread_mutex_unlock(&count_mutex);
                           
                               pthread_mutex_lock(&count_mutex);
                                     // for Write Case
                                     if( WT_SEL == AMBA_ON  && AHB2APB_fifo.FIFO_INX != FIFO_DEP ){
                                         AHB2APB_fifo.VEC_LIST[AHB2APB_fifo.FIFO_INX++] = &vecptr_1;
                                     // for Read Case
                               }else if ( WT_SEL == AMBA_OFF && AHB2APB_fifo.FIFO_INX != FIFO_DEP ){
                                         AHB2APB_fifo.VEC_LIST[AHB2APB_fifo.FIFO_INX++] = &vecptr_2;
                               }

                               MS_NXT_ST = BG_MS_SEQ;
                               pthread_mutex_unlock(&count_mutex);
                               break;

        case BG_MS_SEQ      :  MS_NXT_ST = ( WT_SEL == AMBA_ON )? BG_MS_WT_DON : BG_MS_RD_DON;
                               break;

        case BG_MS_RD_DON   :  pthread_mutex_lock(&count_mutex);
                               while( APB2AHB_fifo.FIFO_INX ==0 ) {
                                      pthread_cond_wait(&slave2master, &count_mutex);
                               }
                               pthread_mutex_unlock(&count_mutex);

                               pthread_mutex_lock(&count_mutex);
                               for(i=0; i< APB2AHB_fifo.FIFO_INX; i++){
                                  AHB2APB_TESTVEC_MESSAGE_INFO(APB2AHB_fifo.VEC_LIST[i]); 
                                 }
                               APB2AHB_fifo.FIFO_INX =0;
                               MS_NXT_ST = BG_MS_IDLE; 
                               COT++;
                               pthread_mutex_unlock(&count_mutex); 
                               break;

        case BG_MS_WT_DON   :  pthread_mutex_lock(&count_mutex);
                               //APB2AHB_fifo.FIFO_INX =0;
                               MS_NXT_ST = BG_MS_IDLE;
                               COT++;
                               pthread_mutex_unlock(&count_mutex);                           
                               break;
    }
      
     MS_CUR_ST = MS_NXT_ST;

    if( COT == DON_COT ){ pthread_exit(NULL); }
  }
}


int main(int argc,char *argv[]){
 long t1=1, t2=2;

//=======================
//    Pattern Gen
//=======================
AHB2APB_TESTVEC_GEN_1(&vecptr_1);
//AHB2APB_TESTVEC_MESSAGE_INFO(&vecptr_1); 

AHB2APB_TESTVEC_GEN_2(&vecptr_2);
//AHB2APB_TESTVEC_MESSAGE_INFO(&vecptr_2); 

//=======================
// Reset FIFO 
//=======================
AHB2APB_fifo.FIFO_INX =0;
APB2AHB_fifo.FIFO_INX =0;

//AHB2APB_fifo.VEC_LIST[0] = &vecptr_1;
//AHB2APB_TESTVEC_MESSAGE_INFO(AHB2APB_fifo.VEC_LIST[0]); 


  pthread_t threads[2];
  pthread_attr_t attr;
 
  pthread_mutex_init(&count_mutex, NULL);
  pthread_cond_init (&master2slave, NULL);


  pthread_attr_init(&attr);
  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  pthread_create(&threads[0], &attr, AHB2APB_SLAVE_DO,(void *)t1);
  pthread_create(&threads[1], &attr, AHB2APB_MASTER_DO,(void *)t2);


  pthread_join(threads[0], NULL);
  pthread_join(threads[1], NULL);


  pthread_attr_destroy(&attr);
  pthread_cond_destroy(&master2slave);

  pthread_mutex_destroy(&count_mutex);
  pthread_exit(NULL);


}
code download here compile %gcc -lpthread -o tt bridge.c Refs: https://computing.llnl.gov/tutorials/pthreads/ http://www.yolinux.com/TUTORIALS/LinuxTutorialPosixThreads.html

iPad report ...

有興趣的人可以下載由 Gartner 所提供的 iPad report. iPad Architecture overview from Gartner.

2010年7月19日 星期一

System Bus Bridge Emulator @ SystemC

Hi all, How to communicate each different clock domain IPs with bus architecture? i think bridge is a good answer for you, what's "Bridge" and what's the benefits? you can check it from ARM (AHB 2 APB BUS Bridge) or X86 (PCI 2 PCI BUS Bridge)...,that can help you to known why we use it and how it work... Ref: AHB to APB Bridge (AHB2APB) In here,we model the system bus bridge emulator @SystemC code,beware it only support High Speed bus 2 Low Speed bus, if you want the both sides supported, you can rewrite it in Low 2 High Case and reconstructed it again.In this "IP", we support some features below. features 1. Read/Write Buffers(FIFO) In FIFO design, it can save the transaction wait for bus requirement, and get the high utility with it. 2. Parallel tasks for High/Low Speed Bus transaction In Master/Slave had it's own state machine, and each communication would handle by Latch Design. Ref: 3. Read back preloaded In the Read/Write transaction you would find the Read transaction is a critical case, why ?... Because the data wouldn't been exist ready in Bridge, so we should ask again to get d it from another side's Slaves, and keep it in our Bridge Buffer until the New Read Request is hit. 4. write through it like a Cache design, it can store the common used data and map the whole Slaves directly, so it can save the request times and increase the bus performance. Ref: Cycle Chart http://spreadsheets.google.com/pub?key=0An5jh2lwIpeKdE9nQnJCME1UdnNicl9oYTJZNHpVN3c&hl=en&output=html source code download here.

2010年7月7日 星期三

DVFS emulator

DVFS(Dynamic voltage frequency scaling)利用動態的調整"電壓","頻率"的方式達到low power的效果.當然也可以搭配"Power Management"來做到 Power Saving 的功效.原理和方法請參考底下的Refs. Refs: DVFS Voltage Scaling and DVFS 動態電壓與頻率調節在降低功耗中的作用 power management 4 Linux power monitor part1 peak power Constrains : 1.底下假設每個Model都各有不同的Time and Power,且先後順序也已知.
          TsCPUPtr->Set_cTsID(0);
          TsCPUPtr->Set_cTsSeq(0);           //define Seq-list(0)
          TsCPUPtr->Set_cTsStr("CPU_TASK");
          TsCPUPtr->Set_cTs_WORK_1(20,10);  //define Power(20uint w) Time(10uint s)
          TsCPUPtr->Set_cTs_WORK_2(10,20);  //define Power(10uint w) Time(20uint s)
          TsCPUPtr->Set_cTs_WORK_3(5,40);   //define Power(5unit w) Time(40uint s)

          TsDMAPtr->Set_cTsID(1);
          TsDMAPtr->Set_cTsSeq(1);
          TsDMAPtr->Set_cTsStr("DMA_TASK");
          TsDMAPtr->Set_cTs_WORK_1(30,20);
          TsDMAPtr->Set_cTs_WORK_2(15,40);
          TsDMAPtr->Set_cTs_WORK_3(10,60);
2. 假設每組Model都各有3組不同的Work Type.在時間先後順序的constrain下,我們可以找到所有的Solution Spaces...且滿足我們的Time constrain@ (50unit)
   ( CPU_WORK_1_Time + DMA_WORK_1_Time <= Time_Constrain )? Pass : fail     ( CPU_WORK_1_Time + DMA_WORK_2_Time <= Time_Constrain )? Pass : fail     ( CPU_WORK_1_Time + DMA_WORK_3_Time <= Time_Constrain )? Pass : fail     ( CPU_WORK_1_Time + DMA_WORK_1_Time + DMA_WORK_2_Time <= Time_Constrain )? Pass : fail 
3.透過Time constrain的方式可以找到符合假設的解空間.最後再透過Power的比較找出最小的total Power解.
   ( CPU_WORK_1_Time(10) + DMA_WORK_1_Time(20) <= Time_Constrain(50) )? Pass : fail     ( CPU_WORK_1_Power(20) + DMA_WORK_1_Power(30) ) = 50      ( CPU_WORK_1_Time(10) + DMA_WORK_2_Time(40) <= Time_Constrain(50) )? Pass : fail     ( CPU_WORK_1_Power(20) + DMA_WORK_2_Power(15) ) = 35 
Results This Model only support @ CPU to DMA time windos CPU to BUS if you want another time windows, Ref "SetModel_SolSpaceAns()" Power And Time Set , Ref "SetModel_TASKPowerTime()" Timing Sequence Space solutions , Ref "SetModel_WorkSolSpaceWt()" Timing Sequence Space Solutions 4 C(m,n) case 3 2 1 3 2 3 1 2 1 3 2 1 Final Answers Sol CPU @ Time Seq ::1 Sol DMA @ Time Seq ::2 ========================== Sol CPU @ Time Seq ::2 Sol DMA @ Time Seq ::1 ========================== Sol CPU @ Time Seq ::2 Sol DMA @ Time Seq ::2 ========================== Sol CPU @ Time Seq ::5 Sol DMA @ Time Seq ::2 ========================== code download here...

2010年7月2日 星期五

AHB Platform emulator @ SystemC

Hi all, we update our AHB virtual platform @ SystemC code, in this new version. we add "arbiter" in it,that's a sample arbitration to select which IP had it's own usage and priority, and fix some bugs for our previous release. you can check our bugs lists to update your platform or re-download it in here. update lists: 1. Response Error handle Retry, SPLIT .... support that can release the deadlock case for Master request and Arbiter arbitration. 2. Slave to Master Decoder Decoded the data path "Read" back to Master and which Master should be release it's usage. 3 . Master to Slave Decoder Decoded the data path "Write" to Slave and Slave selection.. 4. Arbiter detection (fix priority) selected which master had it's own usage for this current transform. Ref : AMBA 4 ARM AHB Master emulator @ SystemC AHB Slave emulator @ SystemC AMBA 3.0 AXI Protocol the news AMBA protocol.

Image Scalar 4 ESL Flow HW c++ code

延續之前的 Post.

ESL Design Flow

Image Scalar 4 ESL Flow Golden Model

Image Scalar 4 ESL Flow HW Algorithm

底下用 c++ code 來驗證我們的 HW Algorithm, 當然你可以自行插入一些 INFO ("Power Monitor", "Communication Monitor ") HW_scalar.cpp



#include <iostream>

#define LEN 4 

#define FM_BUF_DEP 3 


enum STATUS {
  ST_IDLE      =0,
  ST_PRE_RRDBUF0    =1, //Read From RD_EXT_MEM to LOC RD_BUF_0
  ST_PRE_RRDBUF1_DRDBUF0toWTBUF0  =2, //Read From RD_EXT_MEM to LOC RD_BUF_1 And DO RD_BUF_0 to WT_BUF_0
  ST_REC_WWTBUF0_DRDBUF01toWTBUF1 =3, //Wrtie From WT_BUF_0 to WT_EXT_MEM And Do RD_BUF_0/RD_BUF_1 to WT_BUF_1
  ST_REC_SRDBUF1to0_SWTBUF1to0  =4, //Shift RD_BUF_1 to RD_BUF_0 and WT_BUF_1 to WT_BUF_0
  ST_REC_RRDBUF1     =5,
  ST_REC_WWTBUF0_DRDBUF0toWTBUF1  =6,
  ST_REC_SWTBUF1to0   =7,
  ST_END_WWTBUF0_DRDBUF0toWTBUF1 =8,
  ST_END_SWTBUF1to0   =9,
  ST_END_WWTBUF0   =10,
  ST_END_WWTBUF0_BND   =11,
  ST_DONE    =12,
};

enum BURST {
  BURST_1 =0,
  BURST_4 =1,
  BURST_8 =2,
  BURST_16 =3,
};

using namespace std;


void PRO_ST_IDLE();    
void PRO_ST_PRE_RRDBUF0();    
void PRO_ST_PRE_RRDBUF1_DRDBUF0toWTBUF0();  
void PRO_ST_REC_WWTBUF0_DRDBUF01toWTBUF1(); 
void PRO_ST_REC_SRDBUF1to0_SWTBUF1to0();  
void PRO_ST_REC_RRDBUF1();    
void PRO_ST_REC_WWTBUF0_DRDBUF0toWTBUF1();  
void PRO_ST_REC_SWTBUF1to0();   
void PRO_ST_END_WWTBUF0_DRDBUF0toWTBUF1();  
void PRO_ST_END_SWTBUF1to0();   
void PRO_ST_END_WWTBUF0();    
void PRO_ST_END_WWTBUF0_BND();   
void PRO_ST_DONE();    
void PRO_STATUS_DO();
void HW_SCALAR();


int RD_EXT_MEM[LEN*LEN];
int WT_EXT_MEM[(2*LEN)*(2*LEN)];


int RD_FM_BUF_0[FM_BUF_DEP];
int RD_FM_BUF_1[FM_BUF_DEP];
int BD_FM_BUF[LEN];

int WT_FM_BUF_0[2*FM_BUF_DEP];
int WT_FM_BUF_1[2*FM_BUF_DEP];

int BOUND_COT;
int BOUND_COT_INX_j;

int RD_EXT_MEM_INX_i;
int RD_EXT_MEM_INX_j;

int WT_EXT_MEM_INX_i;
int WT_EXT_MEM_INX_j;

int cur_st;
int nxt_st;


void PRO_ST_IDLE(){
 RD_EXT_MEM_INX_i   =0;
 RD_EXT_MEM_INX_j   =0;
 WT_EXT_MEM_INX_i   =0;
 WT_EXT_MEM_INX_j   =0;
        BOUND_COT_INX_j    =0;
        BOUND_COT          =0;

int BUF_DEP_COT = FM_BUF_DEP;

 while( BUF_DEP_COT < LEN ){
     BUF_DEP_COT += FM_BUF_DEP-1;
     BOUND_COT++;
 }
}

void INI_RD_EXT_MEM(){
int i;
int k=0;
      
  for(i=0; i<LEN*LEN; i++){
     cout<<i<<" : "<<k<<endl;
     RD_EXT_MEM[i] = k++;
  }
}


void PRO_ST_DONE(){
int i;

  for(i=0; i<(2*LEN)*(2*LEN); i++){
     cout<<i<<" : "<<WT_EXT_MEM[i]<<endl;
  }
}

void PRO_ST_END_WWTBUF0_BND(){
int i;
  // WRITE 
     for(i=0; i<2*FM_BUF_DEP; i++){
         WT_EXT_MEM[ 2*LEN*WT_EXT_MEM_INX_i + 2*BOUND_COT_INX_j +  WT_EXT_MEM_INX_j++ ] = WT_FM_BUF_0[i];
    }
        WT_EXT_MEM_INX_i++;
 WT_EXT_MEM_INX_j=0;

   if( BOUND_COT==1 ){
       BOUND_COT_INX_j = LEN - FM_BUF_DEP;
  }else{
       BOUND_COT_INX_j += FM_BUF_DEP-1;
  }

       BOUND_COT--; 
       RD_EXT_MEM_INX_i=0;
       RD_EXT_MEM_INX_j=0;

       WT_EXT_MEM_INX_i=0;
       WT_EXT_MEM_INX_j=0;

}


void PRO_ST_END_WWTBUF0(){
int i;
  // WRITE 
     for(i=0; i<2*FM_BUF_DEP; i++){
         WT_EXT_MEM[ 2*LEN*WT_EXT_MEM_INX_i + 2*BOUND_COT_INX_j + WT_EXT_MEM_INX_j++ ] = WT_FM_BUF_0[i];
    }
        WT_EXT_MEM_INX_i++;
 WT_EXT_MEM_INX_j=0;

}

void PRO_ST_END_SWTBUF1to0(){
int i;
  //shift
   for(i=0; i<2*FM_BUF_DEP; i++){
        WT_FM_BUF_0[i] = WT_FM_BUF_1[i];
  }

}

void PRO_ST_END_WWTBUF0_DRDBUF0toWTBUF1(){
int i;

 // WRITE 
     for(i=0; i<2*FM_BUF_DEP; i++){
         WT_EXT_MEM[ 2*LEN*WT_EXT_MEM_INX_i + 2*BOUND_COT_INX_j + WT_EXT_MEM_INX_j++ ] = WT_FM_BUF_0[i];
    }
        WT_EXT_MEM_INX_i++;
 WT_EXT_MEM_INX_j=0;

    // DO
   for(i=0; i<FM_BUF_DEP-1; i++){
      WT_FM_BUF_1[2*i]     = (RD_FM_BUF_0[i]);  
      WT_FM_BUF_1[2*i+1]   = (RD_FM_BUF_0[i] + RD_FM_BUF_0[i+1]) >>1;
  }
      WT_FM_BUF_1[2*FM_BUF_DEP-1] = (RD_FM_BUF_0[FM_BUF_DEP-1]);  
      WT_FM_BUF_1[2*FM_BUF_DEP-2] = (RD_FM_BUF_0[FM_BUF_DEP-1]);  
}


void PRO_ST_REC_SWTBUF1to0(){
int i;
   //Shift
   for(i=0; i<2*FM_BUF_DEP; i++){
        WT_FM_BUF_0[i] = WT_FM_BUF_1[i];
  }

}


void PRO_ST_REC_WWTBUF0_DRDBUF0toWTBUF1(){
 int i; 
     // WRITE 
     for(i=0; i<2*FM_BUF_DEP; i++){
         WT_EXT_MEM[ 2*LEN*WT_EXT_MEM_INX_i + 2*BOUND_COT_INX_j + WT_EXT_MEM_INX_j++ ] = WT_FM_BUF_0[i];
    }
        WT_EXT_MEM_INX_i++;
 WT_EXT_MEM_INX_j=0;

    // DO
   for(i=0; i<FM_BUF_DEP-1; i++){
      WT_FM_BUF_1[2*i]     = (RD_FM_BUF_0[i]);  
      WT_FM_BUF_1[2*i+1]   = (RD_FM_BUF_0[i] + RD_FM_BUF_0[i+1]) >>1;
  }
      WT_FM_BUF_1[2*FM_BUF_DEP-1] = (RD_FM_BUF_0[FM_BUF_DEP-1]);  
      WT_FM_BUF_1[2*FM_BUF_DEP-2] = (RD_FM_BUF_0[FM_BUF_DEP-1]);  
}


void PRO_ST_REC_RRDBUF1(){
int i;
  // Read
  for(i=0; i<FM_BUF_DEP; i++){
      RD_FM_BUF_1[i] = RD_EXT_MEM[  LEN*RD_EXT_MEM_INX_i + BOUND_COT_INX_j + RD_EXT_MEM_INX_j++];
  }
       RD_EXT_MEM_INX_i++;
 RD_EXT_MEM_INX_j=0;
}

void PRO_ST_REC_SRDBUF1to0_SWTBUF1to0(){
int i; 
   // Shift
   for(i=0; i<FM_BUF_DEP; i++){
         RD_FM_BUF_0[i] = RD_FM_BUF_1[i];
    }
   // Shift   
   for(i=0; i<2*FM_BUF_DEP; i++){
        WT_FM_BUF_0[i] = WT_FM_BUF_1[i];
  }

}

void PRO_ST_REC_WWTBUF0_DRDBUF01toWTBUF1(){
int i; 
     // WRITE 
     for(i=0; i<2*FM_BUF_DEP; i++){
         WT_EXT_MEM[ 2*LEN*WT_EXT_MEM_INX_i + 2*BOUND_COT_INX_j + WT_EXT_MEM_INX_j++ ] = WT_FM_BUF_0[i];
    }
        WT_EXT_MEM_INX_i++;
 WT_EXT_MEM_INX_j=0;
    
     //DO
     for(i=0; i<FM_BUF_DEP-1; i++){
        WT_FM_BUF_1[2*i]    = (RD_FM_BUF_0[i] + RD_FM_BUF_1[i]) >>1;
 WT_FM_BUF_1[2*i+1]  = (RD_FM_BUF_0[i] + RD_FM_BUF_1[i] + RD_FM_BUF_0[i+1] + RD_FM_BUF_1[i+1])>>2;
  }

         WT_FM_BUF_1[2*FM_BUF_DEP-1]    = (RD_FM_BUF_0[FM_BUF_DEP-1] + RD_FM_BUF_1[FM_BUF_DEP-1]) >>1;
         WT_FM_BUF_1[2*FM_BUF_DEP-2]    = (RD_FM_BUF_0[FM_BUF_DEP-1] + RD_FM_BUF_1[FM_BUF_DEP-1]) >>1;
}

void PRO_ST_PRE_RRDBUF1_DRDBUF0toWTBUF0(){
int i;
    // READ 
   for(i=0; i<FM_BUF_DEP; i++){
      RD_FM_BUF_1[i] = RD_EXT_MEM[  LEN*RD_EXT_MEM_INX_i + BOUND_COT_INX_j + RD_EXT_MEM_INX_j++];
    }
      RD_EXT_MEM_INX_i++;
      RD_EXT_MEM_INX_j=0;
  
   // DO
   for(i=0; i<FM_BUF_DEP-1; i++){
      WT_FM_BUF_0[2*i]     = (RD_FM_BUF_0[i]);  
      WT_FM_BUF_0[2*i+1]   = (RD_FM_BUF_0[i] + RD_FM_BUF_0[i+1]) >>1;
  }

      WT_FM_BUF_0[2*FM_BUF_DEP-1] = (RD_FM_BUF_0[FM_BUF_DEP-1]);  
      WT_FM_BUF_0[2*FM_BUF_DEP-2] = (RD_FM_BUF_0[FM_BUF_DEP-1]);  
}


void PRO_ST_PRE_RRDBUF0(){
int i;

   for(i=0; i<FM_BUF_DEP; i++){
      RD_FM_BUF_0[i] = RD_EXT_MEM[  LEN*RD_EXT_MEM_INX_i + BOUND_COT_INX_j + RD_EXT_MEM_INX_j++];
    }
      RD_EXT_MEM_INX_i++;
      RD_EXT_MEM_INX_j=0;
}


void PRO_DEBUG_INFO(){
    cout<<"cur_st           :: "<<cur_st<<endl;
    cout<<"RD_EXT_MEM_INX_i :: "<<RD_EXT_MEM_INX_i<<endl;
    cout<<"RD_EXT_MEM_INX_j :: "<<RD_EXT_MEM_INX_j<<endl;
    cout<<"WT_EXT_MEM_INX_i :: "<<WT_EXT_MEM_INX_i<<endl;
    cout<<"WT_EXT_MEM_INX_j :: "<<WT_EXT_MEM_INX_j<<endl;
    cout<<"BOUND_COT_INX_j  :: "<<BOUND_COT_INX_j<<endl;
    cout<<"BOUND_COT        :: "<<BOUND_COT<<endl;

    cout<<endl;
    cout<<endl;
 
}

void HW_SCALAR(){

cur_st = ST_IDLE; 

  while( cur_st != ST_DONE ){
      switch(cur_st){
        case  ST_IDLE           : nxt_st = ST_PRE_RRDBUF0;    break; 
        case  ST_PRE_RRDBUF0   : nxt_st = ST_PRE_RRDBUF1_DRDBUF0toWTBUF0; break;
        case  ST_PRE_RRDBUF1_DRDBUF0toWTBUF0  : nxt_st = ST_REC_WWTBUF0_DRDBUF01toWTBUF1; break;
        case  ST_REC_WWTBUF0_DRDBUF01toWTBUF1 : nxt_st = ST_REC_SRDBUF1to0_SWTBUF1to0;    break;
        case  ST_REC_SRDBUF1to0_SWTBUF1to0 : nxt_st = ( RD_EXT_MEM_INX_i == LEN )? ST_END_WWTBUF0_DRDBUF0toWTBUF1 :
                     ST_REC_RRDBUF1;
            break;   
        case  ST_REC_RRDBUF1            : nxt_st = ST_REC_WWTBUF0_DRDBUF0toWTBUF1; break;  
        case  ST_REC_WWTBUF0_DRDBUF0toWTBUF1  : nxt_st = ST_REC_SWTBUF1to0;   break;
        case  ST_REC_SWTBUF1to0   : nxt_st = ST_REC_WWTBUF0_DRDBUF01toWTBUF1;  break;
        case  ST_END_WWTBUF0_DRDBUF0toWTBUF1  : nxt_st = ST_END_SWTBUF1to0;   break; 
        case  ST_END_SWTBUF1to0   : nxt_st = ST_END_WWTBUF0;   break; 
        case  ST_END_WWTBUF0   : nxt_st = ST_END_WWTBUF0_BND;   break; 
        case  ST_END_WWTBUF0_BND  : nxt_st = ( BOUND_COT ==0 )? ST_DONE : 
                      ST_PRE_RRDBUF0;
            break;
        case  ST_DONE    :      break; 
       }
         
         PRO_STATUS_DO();

         PRO_DEBUG_INFO();    

         cur_st = nxt_st;
   }
}

void PRO_STATUS_DO(){

   switch(cur_st){ 
        case  ST_IDLE       : PRO_ST_IDLE();    break;       
        case  ST_PRE_RRDBUF0   : PRO_ST_PRE_RRDBUF0();    break;
        case  ST_PRE_RRDBUF1_DRDBUF0toWTBUF0  : PRO_ST_PRE_RRDBUF1_DRDBUF0toWTBUF0();  break;
        case  ST_REC_WWTBUF0_DRDBUF01toWTBUF1 : PRO_ST_REC_WWTBUF0_DRDBUF01toWTBUF1(); break;
        case  ST_REC_SRDBUF1to0_SWTBUF1to0 : PRO_ST_REC_SRDBUF1to0_SWTBUF1to0();  break;
        case  ST_REC_RRDBUF1            : PRO_ST_REC_RRDBUF1();    break;
        case  ST_REC_WWTBUF0_DRDBUF0toWTBUF1   : PRO_ST_REC_WWTBUF0_DRDBUF0toWTBUF1();  break; 
        case  ST_REC_SWTBUF1to0   : PRO_ST_REC_SWTBUF1to0();   break;
        case  ST_END_WWTBUF0_DRDBUF0toWTBUF1    : PRO_ST_END_WWTBUF0_DRDBUF0toWTBUF1();  break;
        case  ST_END_SWTBUF1to0          : PRO_ST_END_SWTBUF1to0();   break;
        case  ST_END_WWTBUF0   : PRO_ST_END_WWTBUF0();    break;
        case  ST_END_WWTBUF0_BND  : PRO_ST_END_WWTBUF0_BND();   break; 
  case  ST_DONE    : PRO_ST_DONE();    break; 
 }
}




int main(int argc,char *argv[]){
  if( FM_BUF_DEP <=1 ){    cout<< "HW Not Support Buf Dep <=1 case"<<endl; return -1; }
   
 INI_RD_EXT_MEM();
 HW_SCALAR();
 PRO_ST_DONE();

return 0;
}
Total Download: http://sites.google.com/site/funningboy/c/ESL_Scalar.tar.gz?attredirects=0&d=1

2010年7月1日 星期四

Image Scalar 4 ESL Flow HW Algorithm

結合 Platform 跟 Communication 上的關係做出的 Sample. 用 Pipe line架構來減少 computation time, Burst Mode 減少 Bus access 所浪費的時間...

Image Scalar 4 ESL Flow Golden Model

有 ESL Design Flow 概念後, 底下就用最簡單的 Image Scalar 當例子 ESL Design Flow Ref: image scaling 利用內差補點的方式,做 Pixel 的放大縮小. 以 2x2 放大成 4x4 的例子 以 4x4 縮小成 2x2的例子 Ref : CIC 96學年決賽競賽題目(研究所組/大學組) 有了image scaling 的概念後, 開始建立 Golden Model scalar.c

#include <stdio.h>
#include <stdlib.h>


#define LEN 4 


int ARR[LEN][LEN];
int RST[2*LEN][2*LEN];

void DISPLAY_RST(){
int i;
int j;

printf ("i\tj\tRST\n");

for(i=0; i< 2*LEN; i++){
 for(j=0; j< 2*LEN; j++){
     printf("%d\t%d\t%d\n",i,j,RST[i][j]);
  }
  printf("\n");
}


}

void SCALAR_ARR(){
int i;
int j;

   for( i=0; i< LEN; i++){
     for( j=0; j< LEN; j++){
      
        RST[2*i][2*j] = ARR[i][j];
        
        if( i!=LEN-1 && j!=LEN-1 ){
           RST[2*i][2*j+1]  = (ARR[i][j] + ARR[i][j+1])>>1;
           RST[2*i+1][2*j]  = (ARR[i][j] + ARR[i+1][j])>>1;
           RST[2*i+1][2*j+1]= (ARR[i][j] + ARR[i+1][j] + ARR[i][j+1] + ARR[i+1][j+1] )>>2;
      
        }  else if( i!=LEN-1 && j==LEN-1 ){
           RST[2*i][2*j+1]    = ARR[i][j];
           RST[2*i+1][2*j]    = (ARR[i][j] + ARR[i+1][j])>>1;
           RST[2*i+1][2*j+1]  = (ARR[i][j] + ARR[i+1][j])>>1;
       
        } else if( i==LEN-1 && j!=LEN-1 ){
          RST[2*i+1][2*j]   = ARR[i][j];
          RST[2*i][2*j+1]   = (ARR[i][j] + ARR[i][j+1])>>1;
          RST[2*i+1][2*j+1] = (ARR[i][j] + ARR[i][j+1])>>1;
      
        } else {
          RST[2*i+1][2*j+1] = ARR[i][j];
          RST[2*i+1][2*j]   = ARR[i][j];
          RST[2*i][2*j+1]   = ARR[i][j];
        } 
         
   }
 }

}



void INI_ARR(){
int i;
int j;
int k=0;

  for( i=0; i< LEN; i++){
    for( j=0; j< LEN; j++){
          ARR[i][j] = k++;  
         printf("%d\t%d\t%d\n",i,j,ARR[i][j]);
    }
  }
printf("===========\n");
}


int main (int *argc,char argv[]){

INI_ARR();
SCALAR_ARR();
DISPLAY_RST();

return 0;
}
PS : 目前我們只考慮單向的"放大" function.

ESL Design Flow

感覺台灣的 RD 對這塊 "ESL Design Flow "很不重視, 只重視所謂的 "成品" 跟 "Code", 有做出來就好, 不管 Performance 跟 Power issue..., 等到要 Real Time Display or Real Time Work 時候,再來修修改改, 不外乎就加個 counter 或者是訊號亂拉一通, 反正只要能夠 function OK, 這也是為什麼台灣 RD 永遠也沒有做完的一天, 有改不完的 code, 跟燒不完的Rom code, 還有永不止息的SoftWare 戰爭. ESL Design Flow 可參考
System Level - SystemC Eclipse + SystemC + Cygwin
Design Automation Tool from Behavior Level to Transaction Level for Virtual Bus-Based Platforms virtual platform with OpenRisc ESL DESIGN Flow 1.System Golden Model : 當我們的function Check 的比對檔, 至少function 要work 吧. 不然搞什麼IC design 阿. 2. HW Algorithm Model : 利用符合 HW Design 的演算法來做硬體層面的實現, 在現今SOC的世代中, 系統的效能跟功率是很重要的課題, 如何減少Access 的次數 跟有效率的 Target Rate 不僅能簡化 Software 的複雜度,也能讓 PMU , MMU 更有效率. PS: 必須考慮 Communication + Computation Cycle Time 的關係. 3.HW Model : 實現 HW Algorithm 到實際的 "code" , 當然最好加入 Communication 跟 Software 的 issue, 最後要跟 golden 做驗證, 在此之中可加入 Power or Communication Monitor 來做 Performance and Power Estimation Power Monitor power monitor part1 power monitor part2 peak power Communication @ AMBA BUS AMBA 4 ARM AHB Master emulator @ SystemC AHB Slave emulator @ SystemC 4. RTL @ Verilog 等 HW 架構決定後, 開始RTL coding, 這樣可以很清楚的知道 Data Flow 跟 each control Path, 把每個 Time Stage 清楚的描述出來,就可以免去東拉西扯的 Signal 跟模糊不清的 Module Interface, 不然每次要亂七八糟開個 Port 給其他新的 IP, 真是只有 XXX 的想說. 4. GATE @ Verilog 這就要靠 Synthesis 的功力摟,看誰的 Design 夠力還有 constrain 夠好.... 不然 Garbage in = Garbage out...是很嚇人的, 明明就同樣的function code, 用同樣的 tool 為啥別人的就是比較好, 心中只會浮現 XXX PS:小弟對這塊比較不熟XD , PS: 自己有時候也為了趕時間,直接衝 RTL Design, 最後下場當然是 "砍掉重練"....