在現今的 CPU 架構中都有 support
SIMD(single instruction multi data) 的概念,來做到hardware 加速.底下小弟就舉個最簡單的範例吧.
假設底下 ex(1) 為原始的 hardware code.
ex(1)
void SCALAR(unsigned int a, unsigned int b, unsigned int *c){
unsigned d = (a+b)>>1;
d = d+2;
*c = d;
}
ex(2) 為 clang compile 後的 LLVM IR. 在ex(2)中,我們發現 hardware 要先做 "add",在繼續做 "lshr" 的動作.所以要消耗兩個 cycle 的時間.今天假設我們的硬體support "add+lshr" 的指令.是不是就只需要一個 cycle 的時間就可完成.如ex(3)所示.這樣我們就可以減少 hardware Instruction counts.相對的效能就可以提升.有點類似 CUDA/ARM 裡面的特殊指令集拉...
ex(2)
define void @SCALAR(i32 %a, i32 %b, i32* nocapture %c) nounwind {
entry:
%add = add i32 %b, %a
%shr = lshr i32 %add, 1
%add3 = add i32 %shr, 2
store i32 %add3, i32* %c, align 4, !tbaa !0
ret void
}
ex(3)
define void @SCALAR(i32 %a, i32 %b, i32* nocapture %c) nounwind {
entry:
%shr = add_lshr i32 %b, %a, 1
%add3 = add i32 %shr, 2
store i32 %add3, i32* %c, align 4, !tbaa !0
ret void
}
sample code @ llvm
#define DEBUG_TYPE "oIRPass"
#include "llvm/Pass.h"
#include "llvm/Module.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/Dominators.h"
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include <set>
#include <map>
using namespace llvm;
using namespace std;
class oIRPass : public FunctionPass {
public:
static char ID;
oIRPass() : FunctionPass(ID) {}
~oIRPass(){
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
bool runOnFunction(Function &F);
void setInstAddLshrSucc(Instruction *Inst);
void chkInstAddLshrSucc();
std::string getInstAddLshrToString(Instruction *iPreInst,Instruction *iCutInst);
std::string getResultToString(Instruction *Inst);
private:
std::vector<Instruction*> oIRInstSuccVec;
std::map<Instruction*,std::string> oIRInstMap;
};
// get <Result>
std::string oIRPass::getResultToString(Instruction *Inst){
Instruction *IT = dyn_cast<Instruction>(Inst);
assert( IT!=NULL && "getResultToString Error");
bool pass = false;
// @ successor Instruction::xx check
for (Instruction::use_iterator uit = IT->use_begin(); uit!= IT->use_end(); ++uit) {
Instruction *NIT = dyn_cast<Instruction>(*uit);
assert( NIT!=NULL && "getResultToString Error");
for(unsigned i=0; i<NIT->getNumOperands(); ++i){
Value *val = NIT->getOperand(i);
if(dyn_cast<Instruction>(val)){
Instruction *PIT = dyn_cast<Instruction>(val);
if( PIT==IT ){
pass = true;
return val->getNameStr();
}
}
}
}
assert( pass==true && "getResultToString Error");
}
// Instruction 2 string
// ex: %tmp22 = addlshr i32 %i,%j, 1 <<---
std::string oIRPass::getInstAddLshrToString(Instruction *iPreInst,Instruction *iCutInst){
Instruction *PreInst = dyn_cast<Instruction>(iPreInst);
Instruction *CutInst = dyn_cast<Instruction>(iCutInst);
BinaryOperator *prebin = dyn_cast<BinaryOperator>(PreInst);
assert( PreInst!=NULL && prebin!=NULL && prebin->getOpcode()== Instruction::Add && "getInstAddLshrToString Error");
BinaryOperator *cutbin = dyn_cast<BinaryOperator>(CutInst);
assert( CutInst!=NULL && cutbin!=NULL && cutbin->getOpcode()== Instruction::LShr && "getInstAddLshrToString Error");
// @ const cehck
Value *val2 = cutbin->getOperand(1);
ConstantInt *c2 = dyn_cast<ConstantInt>(val2);
assert( c2!=NULL && "getInstAddLshrToString Error");
Value *val0 = prebin->getOperand(0);
Value *val1 = prebin->getOperand(1);
unsigned int prebinWidth = cast<IntegerType>(prebin->getType())->getBitWidth();
unsigned int cutbinWidth = cast<IntegerType>(cutbin->getType())->getBitWidth();
// @ width check
assert( cutbinWidth<=prebinWidth && prebinWidth==32 && "getInstAddLshrToString Error");
stringstream sb;
sb << c2->getZExtValue();
std::string st = "%"+getResultToString(CutInst)+" = addlshr i32 "+"%"+val0->getNameStr()+", "+"%"+val1->getNameStr()+", "+ sb.str();
return st;
}
// set Instruction successor match Add->LSHR?
void oIRPass::setInstAddLshrSucc(Instruction *Inst){
Instruction *IT = dyn_cast<Instruction>(Inst);
assert( IT!=NULL && "setInstructionSucc Error");
oIRInstSuccVec.clear();
if(dyn_cast<BinaryOperator>(IT)){
BinaryOperator *bin = dyn_cast<BinaryOperator>(IT);
// @ Instruction::A
if(bin->getOpcode() == Instruction::Add){
oIRInstSuccVec.push_back(IT);
// @ successor Instruction::Add check
for (Instruction::use_iterator uit = IT->use_begin(); uit!= IT->use_end(); ++uit) {
Instruction *NIT = dyn_cast<Instruction>(*uit);
if(dyn_cast<BinaryOperator>(NIT)){
BinaryOperator *nbin = dyn_cast<BinaryOperator>(NIT);
if(nbin->getOpcode() == Instruction::LShr)
oIRInstSuccVec.push_back(NIT);
}
}
}
}
}
// check Instruction successor match Add->LShr?
void oIRPass::chkInstAddLshrSucc(){
typedef std::vector<Instruction*>::iterator IT_iterator;
// @ Instruction::Add token
IT_iterator ITB = oIRInstSuccVec.begin();
Instruction *PreIT = dyn_cast<Instruction>(*ITB);
BinaryOperator *prebin = dyn_cast<BinaryOperator>(PreIT);
assert( PreIT!=NULL && prebin!=NULL && prebin->getOpcode()== Instruction::Add && "chkInstructionSucc Error");
// @ Instruction::LShr token
for(IT_iterator IB = oIRInstSuccVec.begin(), IE = oIRInstSuccVec.end(); IB != IE; ++IB) {
Instruction *CutIT = dyn_cast<Instruction>(*IB);
BinaryOperator *cutbin = dyn_cast<BinaryOperator>(CutIT);
assert( CutIT!=NULL && cutbin!=NULL && "chkInstructionSucc Error");
Value *val1 = cutbin->getOperand(1);
if(cutbin->getOpcode()== Instruction::LShr && dyn_cast<ConstantInt>(val1))
errs() << "oIR found :: " << getInstAddLshrToString(PreIT, CutIT) << "\n";
} //end for
}
bool oIRPass::runOnFunction(Function &F) {
for (Function::iterator BB = F.begin(), BE = F.end(); BB != BE; ++BB) {
BasicBlock *BC = dyn_cast<BasicBlock>(BB);
for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ++IB) {
Instruction *IT = dyn_cast<Instruction>(IB);
setInstAddLshrSucc(IT);
chkInstAddLshrSucc();
}
}
return true;
}
char oIRPass::ID = 0;
RegisterPass<oIRPass> PXX("oIR_pass", "oIR lib test",false,false);
Results:
oIR found :: %shr = addlshr i32 %b, %a, 1
refs:
LLVM + BOOST = LMBOOST....
Sampe Pass @ LLVM
LLVM 2.8 env set && pass manager set
沒有留言:
張貼留言