

### Contributors to the course material

- Arvind, Rishiyur S. Nikhil, Joel Emer, Muralidaran Vijayaraghavan
- Staff and students in 6.375 (Spring 2013),
   6.S195 (Fall 2012), 6.S078 (Spring 2012)
  - Asif Khan, Richard Ruhler, Sang Woo Jun, Abhinav Agarwal, Myron King, Kermin Fleming, Ming Liu, Li-Shiuan Peh
- External
  - Prof Amey Karkare & students at IIT Kanpur
  - Prof Jihong Kim & students at Seoul Nation University
  - Prof Derek Chiou, University of Texas at Austin
  - Prof Yoav Etsion & students at Technion

October 23, 2013

http://csg.csail.mit.edu/6.S195

15-2





# Two-bit versus one-bit Branch prediction Consider the branch instruction needed to implement a loop with one bit, the prediction will always be set incorrectly on loop exit with two bits the prediction will not change on loop exit A little bit of hysteresis is good in changing predictions



## Where does BHT fit in the processor pipeline?

- ♦ BHT can only be used after instruction decode
- We still need the next instruction address predictor (e.g., BTB) at the fetch stage
- Need a mechanism to update the BHT
  - where does the update information come from?

Execute

October 23, 2013

http://csg.csail.mit.edu/6.S195

A step-by-step explanation of how pipelines with multiple predictors work



### Nomenclature Drop an instruction: What we really mean is poison the instruction so that the subsequent stages know not to update any architectural state. The poisoned instruction has to be passed down for book keeping reasons, i.e., to remove it from the scoreboard. Detecting a misprediction versus training/updating a predictor. On a pc misprediction, information about redirecting the pc has to be passed to the fetch stage. However for training the BTB and other predictors information has to be passed even when there is no misprediction. • we will first focus on pc redirection and then on predictor training October 23, 2013 http://csg.csail.mit.edu/6.S195 L15-10







```
4-Stage pipeline with Branch
     Prediction
     module mkProc(Proc);
      Reg#(Addr) pc <- mkRegU;
RFile rf <- mkBypassRFile;
                        iMem <- mkIMemory;
       IMemory
       DMemory dMem <- mkDMemory;
       Fifo#(1, Decode2Execute) d2e <- mkPipelineFifo;
       Fifo#(1, Exec2Commit) e2c <- mkPipelineFifo;
       Scoreboard#(2) sb <- mkPipelineScoreboard;</pre>
       Reg#(Bool) feEp <- mkReg(False);</pre>
       Reg#(Bool) fdEp <- mkReg(False);</pre>
       Reg#(Bool) dEp <- mkReg(False);
Reg#(Bool) deEp <- mkReg(False);
Reg#(Bool) eEp <- mkReg(False);</pre>
       Fifo#(ExecRedirect) redirect <- mkBypassFifo;</pre>
       Fifo#(DecRedirect) decRedirect <- mkBypassFifo;
       AddrPred#(16) addrPred <- mkBTB;
       DirPred#(1024) dirPred <- mkBHT;</pre>
October 23, 2013
                           http://csg.csail.mit.edu/6.S195
```

```
4-Stage-BP pipeline
    Fetch rule
    rule doFetch;
        let inst = iMem.req(pc);
        if(redirect.notEmpty) begin
         feEp <= !feEp; pc <= redirect.first.newPc;</pre>
          redirect.deg; end
        else if(decRedirect.notEmpty)
             begin
             if (decRedirect.first.eEp == feEp)
               fdEp <= !fdEp; pc <= decRedirect.first.newPc; end</pre>
             decRedirect.deq;
             end;
        else begin
          let ppc = addrPred.predPc(pc);
          f2d.enq(Fetch2Decoode{pc: pc, ppc: ppc, inst: inst,
                               eEp: feEp, dEp: fdEp});
        end
    endrule
October 23, 2013
                          http://csg.csail.mit.edu/6.S195
```

```
4-Stage-BP pipeline
    Decode&RegRead Action
    function Action decAndRegFetch (DInst dInst, Addr pc, Addr ppc,
    Bool eEp);
    action
         let stall = sb.search1(dInst.src1) | sb.search2(dInst.src2)
                     || sb.search3(dInst.dst);;
          if(!stall)
          begin
            let rVal1 = rf.rd1(validRegValue(dInst.src1));
            let rVal2 = rf.rd2(validRegValue(dInst.src2));
            d2e.enq(Decode2Execute{pc: pc, ppc: ppc,
                 dInst: dInst, epoch: eEp,
                 rVal1: rVal1, rVal2: rVal2});
             sb.insert(dInst.rDst);
          end
    endaction
    endfunction
October 23, 2013
                         http://csg.csail.mit.edu/6.S195
                                                                 L15-16
```

```
4-Stage-BP pipeline
    Decode&RegRead rule
    rule doDecode;
      let x = f2d.first; let inst = x.inst; let pc = x.pc;
      let ppc = x.ppc; let idEp = x.dEp; let ieEp = x.eEp;
      let dInst = decode(inst);
      let newPc = dirPrec.predAddr(pc, dInst);
      if (ieEp != deEp) begin // change Decode's epochs and
                            // continue normal instruction execution
          deEp <= ieEp; let newdEp = idEp;</pre>
          decAndRegRead(inst, pc, newPc, ieEp);
          if(ppc != newPc)
           newDEp = !newdEp; decRedirect.enq(DecRedirect{pc: pc,
                                  newPc: newPc, eEp: ieEp}); end
         dEp <= newdEp end
      else if (idEp == dEp) begin
          decAndRegRead(inst, pc, newPc, ieEp);
          if(ppc != newPc)
                                                           begin
            dEp <= !dEp; decRedirect.enq(DecRedirect{pc: pc,</pre>
                               newPc: newPc, eEp: ieEp}); end
                          end
       f2d.deg;
    endrule
October 23, 2013
                         http://csg.csail.mit.edu/6.S195
```

```
4-Stage-BP pipeline
     Execute rule
     rule doExecute;
                                                          no change
         let x = d2e.first;
        let dInst = x.dInst; let pc
         let ppc = x.ppc;
                            let epoch = x.epoch;
         let rVal1 = x.rVal1; let rVal2 = x.rVal2;
         if (epoch == eEpoch) begin
          let eInst = exec(dInst, rVal1, rVal2, pc, ppc);
          if(eInst.iType == Ld) eInst.data <-</pre>
            dMem.req(MemReq(op:Ld, addr:eInst.addr, data:?));
           else if (eInst.iType == St) let d <+
            dMem.req(MemReq(op:St, addr:eInst.addr, data:eInst.data));
           e2c.eng(Exec2Commit{dst:eInst.dst, data:eInst.data});
           if(eInst.mispredict) begin
           redirect.enq(eInst.addr); eEpoch <= !eEpoch; end
                            end
         else e2c.eng(Exec2Commit{dst:Invalid, data:?});
         d2e.deq;
     endrule
October 23, 2013
                          http://csg.csail.mit.edu/6.S195
```

```
4-Stage-BP pipeline

Commit rule

rule doCommit;

let dst = eInst.first.dst;

let data = eInst.first.data;

if(isValid(dst))

rf.wr(tuple2(validValue(dst), data);

e2c.deq;

sb.remove;

endrule

October 23, 2013 http://csg.csail.mit.edu/6.5195 L15-19
```

## Exploiting Spatial Correlation Yeh and Patt, 1992 if (x[i] < 7) then y += 1;if (x[i] < 5) then c -= 4;If first condition is false then so is second condition History register, H, records the direction of the last N branches executed by the processor and the predictor uses this information to predict the resolution of the next branch October 24, 2011 http://www.csg.csail.mit.edu/6.823







