Skip to content

Commit

Permalink
MemBlock: split store addr and store data (OpenXiangShan#781)
Browse files Browse the repository at this point in the history
* RSFeedback: add source type

* StoreQueue: split store addr and store data

* StoreQueue: update ls forward logic

* Now it supports splited addr and data

* Chore: force assign name for load/store unit

* RS: add rs'support for store a-d split

* StoreQueue: fix stlf logic

* StoreQueue: fix addr wb sq update logic

* AtomicsUnit: support splited a/d

* StoreQueue: add sbuffer enq condition assertion

Store data op (std) may still be invalid after store addr op's (sta)
commitment, so datavalid needs to be checked before commiting
store data to sbuffer

Note that at current commit a non-completed std op for a
commited store may exist. We should make sure that uop
will not be cancelled by a latter branch mispredict. More work
to be done!

* Roq: add std/sta split writeback logic

Now store will commit only if both sta & std have been writebacked

Co-authored-by: ZhangZifei <[email protected]>
  • Loading branch information
AugustusWillisWang and Lemover authored Apr 30, 2021
1 parent 68f25d3 commit 1b7aded
Show file tree
Hide file tree
Showing 13 changed files with 353 additions and 115 deletions.
13 changes: 10 additions & 3 deletions src/main/scala/xiangshan/Bundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,14 @@ object ValidUndirectioned {
}
}

object RSFeedbackType {
val tlbMiss = 0.U(2.W)
val mshrFull = 1.U(2.W)
val dataInvalid = 2.U(2.W)

def apply() = UInt(2.W)
}

class SCMeta(val useSC: Boolean)(implicit p: Parameters) extends XSBundle with HasSCParameter {
val tageTaken = if (useSC) Bool() else UInt(0.W)
val scUsed = if (useSC) Bool() else UInt(0.W)
Expand Down Expand Up @@ -407,14 +415,13 @@ class RoqCommitIO(implicit p: Parameters) extends XSBundle {
def hasCommitInstr = !isWalk && valid.asUInt.orR
}

class TlbFeedback(implicit p: Parameters) extends XSBundle {
class RSFeedback(implicit p: Parameters) extends XSBundle {
val rsIdx = UInt(log2Up(IssQueSize).W)
val hit = Bool()
val flushState = Bool()
val sourceType = RSFeedbackType()
}

class RSFeedback(implicit p: Parameters) extends TlbFeedback

class FrontendToBackendIO(implicit p: Parameters) extends XSBundle {
// to backend end
val cfVec = Vec(DecodeWidth, DecoupledIO(new CtrlFlow))
Expand Down
23 changes: 17 additions & 6 deletions src/main/scala/xiangshan/backend/MemBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val storeUnits = Seq.fill(exuParameters.StuCnt)(Module(new StoreUnit))
val exeUnits = loadUnits ++ storeUnits

loadUnits.zipWithIndex.map(x => x._1.suggestName("LoadUnit_"+x._2))
storeUnits.zipWithIndex.map(x => x._1.suggestName("StoreUnit_"+x._2))

val atomicsUnit = Module(new AtomicsUnit)

val loadWritebackOverride = Mux(atomicsUnit.io.out.valid, atomicsUnit.io.out.bits, loadUnits.head.io.ldout.bits)
Expand Down Expand Up @@ -221,7 +224,7 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
for (i <- 0 until exuParameters.LduCnt) {
loadUnits(i).io.redirect <> io.fromCtrlBlock.redirect
loadUnits(i).io.flush <> io.fromCtrlBlock.flush
loadUnits(i).io.tlbFeedback <> reservationStations(i).io.memfeedback
loadUnits(i).io.rsFeedback <> reservationStations(i).io.memfeedback
loadUnits(i).io.rsIdx := reservationStations(i).io.rsIdx // TODO: beautify it
loadUnits(i).io.isFirstIssue := reservationStations(i).io.isFirstIssue // NOTE: just for dtlb's perf cnt
loadUnits(i).io.dtlb <> dtlb.io.requestor(i)
Expand Down Expand Up @@ -255,13 +258,16 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)

stu.io.redirect <> io.fromCtrlBlock.redirect
stu.io.flush <> io.fromCtrlBlock.flush
stu.io.tlbFeedback <> rs.io.memfeedback
stu.io.rsFeedback <> rs.io.memfeedback
stu.io.rsIdx <> rs.io.rsIdx
stu.io.isFirstIssue <> rs.io.isFirstIssue // NOTE: just for dtlb's perf cnt
stu.io.dtlb <> dtlbReq
stu.io.stin <> rs.io.deq
stu.io.lsq <> lsq.io.storeIn(i)

// rs.io.storeData <> lsq.io.storeDataIn(i)
lsq.io.storeDataIn(i) := rs.io.stData

// sync issue info to rs
lsq.io.storeIssue(i).valid := rs.io.deq.valid
lsq.io.storeIssue(i).bits := rs.io.deq.bits
Expand Down Expand Up @@ -321,6 +327,9 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
val st0_atomics = reservationStations(atomic_rs0).io.deq.valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.deq.bits.uop.ctrl.fuType)
val st1_atomics = reservationStations(atomic_rs1).io.deq.valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.deq.bits.uop.ctrl.fuType)

val st0_data_atomics = reservationStations(atomic_rs0).io.stData.valid && FuType.storeIsAMO(reservationStations(atomic_rs0).io.stData.bits.uop.ctrl.fuType)
val st1_data_atomics = reservationStations(atomic_rs1).io.stData.valid && FuType.storeIsAMO(reservationStations(atomic_rs1).io.stData.bits.uop.ctrl.fuType)

when (st0_atomics) {
reservationStations(atomic_rs0).io.deq.ready := atomicsUnit.io.in.ready
storeUnits(0).io.stin.valid := false.B
Expand All @@ -342,6 +351,8 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)

atomicsUnit.io.in.valid := st0_atomics || st1_atomics
atomicsUnit.io.in.bits := Mux(st0_atomics, reservationStations(atomic_rs0).io.deq.bits, reservationStations(atomic_rs1).io.deq.bits)
atomicsUnit.io.storeDataIn.valid := st0_data_atomics || st1_data_atomics
atomicsUnit.io.storeDataIn.bits := Mux(st0_data_atomics, reservationStations(atomic_rs0).io.stData.bits, reservationStations(atomic_rs1).io.stData.bits)
atomicsUnit.io.rsIdx := Mux(st0_atomics, reservationStations(atomic_rs0).io.rsIdx, reservationStations(atomic_rs1).io.rsIdx)
atomicsUnit.io.redirect <> io.fromCtrlBlock.redirect
atomicsUnit.io.flush <> io.fromCtrlBlock.flush
Expand All @@ -366,14 +377,14 @@ class MemBlockImp(outer: MemBlock) extends LazyModuleImp(outer)
}

when (state === s_atomics_0) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs0).io.memfeedback
atomicsUnit.io.rsFeedback <> reservationStations(atomic_rs0).io.memfeedback

assert(!storeUnits(0).io.tlbFeedback.valid)
assert(!storeUnits(0).io.rsFeedback.valid)
}
when (state === s_atomics_1) {
atomicsUnit.io.tlbFeedback <> reservationStations(atomic_rs1).io.memfeedback
atomicsUnit.io.rsFeedback <> reservationStations(atomic_rs1).io.memfeedback

assert(!storeUnits(1).io.tlbFeedback.valid)
assert(!storeUnits(1).io.rsFeedback.valid)
}

lsq.io.exceptionAddr.lsIdx := io.lsqio.exceptionAddr.lsIdx
Expand Down
107 changes: 99 additions & 8 deletions src/main/scala/xiangshan/backend/issue/ReservationStation.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import utils._
import xiangshan.backend.decode.{ImmUnion, Imm_U}
import xiangshan.backend.exu.{Exu, ExuConfig}
import xiangshan.backend.roq.RoqPtr
import xiangshan.mem.SqPtr
import xiangshan.mem.{SqPtr, StoreDataBundle}

import scala.math.max

Expand Down Expand Up @@ -102,6 +102,7 @@ class ReservationStation
val numExist = Output(UInt(iqIdxWidth.W))
val fromDispatch = Flipped(DecoupledIO(new MicroOp))
val deq = DecoupledIO(new ExuInput)
val stData = if (exuCfg == StExeUnitCfg) ValidIO(new StoreDataBundle) else null
val srcRegValue = Input(Vec(srcNum, UInt(srcLen.W)))

val stIssuePtr = if (exuCfg == LdExeUnitCfg) Input(new SqPtr()) else null
Expand Down Expand Up @@ -143,6 +144,11 @@ class ReservationStation
select.io.memfeedback := io.memfeedback
select.io.flushState := io.memfeedback.bits.flushState
}
if (exuCfg == StExeUnitCfg) {
select.io.dataReadyVec := ctrl.io.dataReadyVec
} else {
select.io.dataReadyVec := DontCare
}

ctrl.io.in.valid := select.io.enq.ready && io.fromDispatch.valid // NOTE: ctrl doesnt care redirect for timing optimization
ctrl.io.flush := io.flush
Expand All @@ -162,6 +168,10 @@ class ReservationStation
if (exuCfg == LdExeUnitCfg) {
ctrl.io.stIssuePtr := RegNext(io.stIssuePtr)
}
if (exuCfg == StExeUnitCfg) {
ctrl.io.selData.valid := select.io.deqData.valid
ctrl.io.selData.bits := select.io.deqData.bits
}

data.io.in.valid := select.io.enq.fire()
data.io.in.addr := select.io.enq.bits
Expand All @@ -174,6 +184,7 @@ class ReservationStation
}
if (exuCfg == StExeUnitCfg) {
data.io.fpRegValue := io.fpRegValue
data.io.selData := select.io.deqData.bits
}
data.io.sel := select.io.deq.bits
data.io.listen.wen := ctrl.io.listen
Expand All @@ -196,6 +207,12 @@ class ReservationStation
if (srcNum > 1) { io.deq.bits.src2 := data.io.out(1) }
if (srcNum > 2) { io.deq.bits.src3 := data.io.out(2) }
if (exuCfg == JumpExeUnitCfg) { io.deq.bits.uop.cf.pc := data.io.pc }

if (exuCfg == StExeUnitCfg) {
io.stData.bits.uop := ctrl.io.stData.bits
io.stData.bits.data := data.io.stData
io.stData.valid := ctrl.io.stData.valid
}
}

class ReservationStationSelect
Expand Down Expand Up @@ -225,6 +242,7 @@ class ReservationStationSelect

val redirectVec = Input(Vec(iqSize, Bool()))
val readyVec = Input(Vec(iqSize, Bool()))
val dataReadyVec = Input(Vec(iqSize, Bool())) // NOTE: wanna dead code elimination eliminates the codes
val validVec = Output(Vec(iqSize, Bool()))
val indexVec = Output(Vec(iqSize, UInt(iqIdxWidth.W)))

Expand All @@ -236,6 +254,7 @@ class ReservationStationSelect
def fire() = valid && ready
}
val deq = DecoupledIO(UInt(iqIdxWidth.W))
val deqData = if (exuCfg == StExeUnitCfg) ValidIO(UInt(iqIdxWidth.W)) else null

val flushState = if (feedback) Input(Bool()) else null
val isFirstIssue = if (feedback) Output(Bool()) else null
Expand All @@ -251,7 +270,8 @@ class ReservationStationSelect
* count queue : record replay cycle
*/

val s_idle :: s_valid :: s_wait :: s_replay :: Nil = Enum(4)
val s_idle :: s_valid :: s_wait :: s_replay :: s_sent :: Nil = Enum(5)
val d_idle :: d_sent :: Nil = Enum(2)
/* state machine
* s_idle : empty slot, init state, set when deq
* s_valid : ready to be secleted
Expand All @@ -270,6 +290,11 @@ class ReservationStationSelect
val emptyIdxQueue = widthMap(i => emptyQueue(indexQueue(i)))
val countIdxQueue = widthMap(i => countQueue(indexQueue(i)))

// NOTE: wanna dead code elimination eliminates the below codes
val dataStateQueue = RegInit(VecInit(Seq.fill(iqSize)(d_idle)))
val dataValidQueue = VecInit(dataStateQueue.zip(stateQueue).map(a => a._1 === d_idle && a._2 =/= s_idle))
val dataReadyIdxQueue = widthMap(i => dataValidQueue(indexQueue(i)) && io.dataReadyVec(indexQueue(i)))

// select ready
// for no replay, select just equal to deq (attached)
// with replay, select is just two stage with deq.
Expand Down Expand Up @@ -305,6 +330,19 @@ class ReservationStationSelect
(if(feedback) ~(0.U(iqSize.W)) else
Mux(RegNext(selectValid && (io.redirect.valid || io.flush)), 0.U, ~(0.U(iqSize.W))))

// store deq data, receiver(the sq) must be ready
// NOTE: wanna dead code elimination eliminates the below codes
val lastDataMask = Wire(UInt(iqSize.W))
val dataMask = WireInit(VecInit((0 until iqSize).map(i => dataReadyIdxQueue(i)))).asUInt & lastDataMask
val dataIdx = ParallelPriorityMux(dataMask.asBools zip indexQueue)
val dataPtr = ParallelPriorityMux(dataMask.asBools.zipWithIndex.map{ case (a,i) => (a, i.U)}) // NOTE: the idx of indexQueue
val haveData = Cat(dataMask).orR
val dataIdxReg = RegNext(dataIdx, init = 0.U)
val dataValid = haveData
val dataReg = RegNext(dataValid, init = false.B)
val dataPtrReg = RegNext(Mux(moveMask(dataPtr), dataPtr-1.U, dataPtr), init = 0.U)
lastDataMask := ~Mux(dataReg, UIntToOH(dataPtrReg), 0.U)

// deq
val dequeue = Mux(RegNext(io.flush), false.B,
if (feedback) bubbleReg else bubbleReg || issueFire)
Expand All @@ -327,11 +365,28 @@ class ReservationStationSelect
if (feedback) {
when (io.memfeedback.valid) {
when (stateQueue(io.memfeedback.bits.rsIdx) === s_wait) {
stateQueue(io.memfeedback.bits.rsIdx) := Mux(io.memfeedback.bits.hit, s_idle, s_replay)
val s_finish_state = if (exuCfg == StExeUnitCfg) {
Mux(dataStateQueue(io.memfeedback.bits.rsIdx) === d_sent || (dataReg && dataIdxReg === io.memfeedback.bits.rsIdx),
s_idle, s_sent)
} else { s_idle }
stateQueue(io.memfeedback.bits.rsIdx) := Mux(io.memfeedback.bits.hit, s_finish_state, s_replay)
}
when (!io.memfeedback.bits.hit) {
countQueue(io.memfeedback.bits.rsIdx) := replayDelay(cntCountQueue(io.memfeedback.bits.rsIdx))
}
assert(stateQueue(io.memfeedback.bits.rsIdx) === s_wait, "mem feedback but rs dont wait for it")
}
}

if (exuCfg == StExeUnitCfg) {
when (dataReg) {
dataStateQueue(dataIdxReg) := d_sent
}
when (dataReg && stateQueue(dataIdxReg) === s_sent) {
stateQueue(dataIdxReg) := s_idle
}
for (i <- 0 until iqSize) {
assert(stateQueue(i) =/= s_sent || dataStateQueue(i) =/= d_sent, "dont want the state that addr and data both sent, but still not idle")
}
}

Expand Down Expand Up @@ -383,6 +438,7 @@ class ReservationStationSelect
val enqIdx = indexQueue(enqPtr)
when (enqueue) {
stateQueue(enqIdx) := s_valid
dataStateQueue(enqIdx) := d_idle
cntCountQueue(enqIdx) := 0.U
}

Expand All @@ -394,6 +450,11 @@ class ReservationStationSelect
io.deq.valid := selectValid
io.deq.bits := selectIndex

if (exuCfg == StExeUnitCfg) {
io.deqData.valid := dataValid
io.deqData.bits := dataIdx
}

io.numExist := RegNext(Mux(nextTailPtr.flag, if(isPow2(iqSize)) (iqSize-1).U else iqSize.U, nextTailPtr.value), init = (iqSize - 1).U)

assert(RegNext(Mux(tailPtr.flag, tailPtr.value===0.U, true.B)))
Expand Down Expand Up @@ -463,10 +524,13 @@ class ReservationStationCtrl
val uop = new MicroOp
}))
val sel = Flipped(ValidIO(UInt(iqIdxWidth.W)))
val selData = if (exuCfg == StExeUnitCfg) Flipped(ValidIO(UInt(iqIdxWidth.W))) else null
val out = ValidIO(new MicroOp)
val stData = if (exuCfg == StExeUnitCfg) ValidIO(new MicroOp) else null

val redirectVec = Output(Vec(iqSize, Bool()))
val readyVec = Output(Vec(iqSize, Bool()))
val dataReadyVec = if (exuCfg == StExeUnitCfg) Output(Vec(IssQueSize, Bool())) else null
val validVec = Input(Vec(iqSize, Bool()))
val indexVec = Input(Vec(iqSize, UInt(iqIdxWidth.W)))

Expand All @@ -486,7 +550,6 @@ class ReservationStationCtrl
val enqEn = io.in.valid
val enqEnReg = RegNext(enqEn && !(io.redirect.valid || io.flush), init = false.B)
val enqUop = io.in.bits.uop
val enqUopReg = RegEnable(enqUop, selValid)
val selPtr = io.sel.bits
val selPtrReg = RegEnable(selPtr, selValid)
val data = io.listen
Expand Down Expand Up @@ -547,7 +610,12 @@ class ReservationStationCtrl
}

// load wait store
io.readyVec := srcQueueWire.map(Cat(_).andR)
if (exuCfg == StExeUnitCfg) {
io.readyVec := srcQueueWire.map(a => a(0))
io.dataReadyVec := srcQueueWire.map(a => a(1))
} else {
io.readyVec := srcQueueWire.map(Cat(_).andR)
}
if (exuCfg == LdExeUnitCfg) {
val ldWait = Reg(Vec(iqSize, Bool()))
val sqIdx = Reg(Vec(iqSize, new SqPtr()))
Expand All @@ -566,7 +634,7 @@ class ReservationStationCtrl
}

val redirectHit = io.redirectVec(selPtr)
val uop = Module(new SyncDataModuleTemplate(new MicroOp, iqSize, 1, 1))
val uop = Module(new SyncDataModuleTemplate(new MicroOp, iqSize, if (exuCfg == StExeUnitCfg) 2 else 1, 1))

uop.io.raddr(0) := selPtr
io.out.valid := RegNext(selValid && ~redirectHit)
Expand All @@ -575,7 +643,14 @@ class ReservationStationCtrl
uop.io.waddr(0) := enqPtr
uop.io.wdata(0) := enqUop

class fastSendUop extends Bundle {
if (exuCfg == StExeUnitCfg) { // NOTE: send data part of st
uop.io.raddr(1) := io.selData.bits
io.stData.bits := uop.io.rdata(1)
io.stData.valid := RegNext(io.selData.valid && ~io.redirectVec(io.selData.bits))
}
// NOTE: st dont fast wake others, dont care override

class fastSendUop extends XSBundle {
val pdest = UInt(PhyRegIdxWidth.W)
val rfWen = Bool()
val fpWen = Bool()
Expand All @@ -595,6 +670,9 @@ class ReservationStationCtrl
red := roq.needFlush(io.redirect, io.flush)
}
io.out.bits.roqIdx := roqIdx(selPtrReg)
if (exuCfg == StExeUnitCfg) {
io.stData.bits.roqIdx := roqIdx(RegEnable(io.selData.bits, io.selData.valid))
}

io.fastUopOut := DontCare
if (fastWakeup) {
Expand Down Expand Up @@ -790,7 +868,10 @@ class ReservationStationData
}

val sel = Input(UInt(iqIdxWidth.W))
val selData = if(exuCfg == StExeUnitCfg) Input(UInt(iqIdxWidth.W)) else null
val out = Output(Vec(srcNum, UInt(srcLen.W)))
val stData = if(exuCfg == StExeUnitCfg) Output(UInt(srcLen.W)) else null

val pc = if(exuCfg == JumpExeUnitCfg) Output(UInt(VAddrBits.W)) else null
})

Expand Down Expand Up @@ -870,8 +951,18 @@ class ReservationStationData
(0 until srcNum).foreach(i => data(i).w(0).wdata := io.srcRegValue(i) )
}
// deq
data.map(_.r.addr := io.sel)
if (exuCfg == StExeUnitCfg) {
data(0).r.addr := io.sel
data(1).r.addr := io.selData
io.stData := data(1).r.rdata
} else {
data.map(_.r.addr := io.sel)
}

io.out := data.map(_.r.rdata)
if (exuCfg == StExeUnitCfg) {
io.out(1) := DontCare
}
if(pcMem.nonEmpty){
pcMem.get.io.raddr(0) := io.sel
io.pc := pcMem.get.io.rdata(0)
Expand Down
Loading

0 comments on commit 1b7aded

Please sign in to comment.