Skip to content

Commit

Permalink
mem: simplify software prefetch logic (OpenXiangShan#1176)
Browse files Browse the repository at this point in the history
* mem: update lsu op encoding
* decode: remove prefetch bits from CtrlSignals
* mem: simplify software prefetch logic in loadpipe
* mem: fix wrong dcacheShouldResp assertion
  • Loading branch information
AugustusWillisWang authored Oct 27, 2021
1 parent a9f27ba commit d200f59
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 70 deletions.
3 changes: 0 additions & 3 deletions src/main/scala/xiangshan/Bundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,6 @@ class CtrlSignals(implicit p: Parameters) extends XSBundle {
val fpu = new FPUCtrlSignals
val isMove = Bool()
val singleStep = Bool()
val isORI = Bool() //for softprefetch
val isSoftPrefetchRead = Bool() //for softprefetch
val isSoftPrefetchWrite = Bool() //for softprefetch
// This inst will flush all the pipe when it is the oldest inst in ROB,
// then replay from this inst itself
val replayInst = Bool()
Expand Down
21 changes: 8 additions & 13 deletions src/main/scala/xiangshan/backend/decode/DecodeUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -574,8 +574,6 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan
val cs = Wire(new CtrlSignals()).decode(ctrl_flow.instr, decode_table)
cs.singleStep := false.B
cs.replayInst := false.B
cs.isSoftPrefetchRead := false.B
cs.isSoftPrefetchWrite := false.B

val fpDecoder = Module(new FPDecoder)
fpDecoder.io.instr := ctrl_flow.instr
Expand Down Expand Up @@ -619,19 +617,16 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan

//to selectout prefetch.r/prefetch.w
val isORI = BitPat("b?????????????????110?????0010011") === ctrl_flow.instr
cs.isORI := isORI
when(cs.isORI) {
when(isORI) {
// TODO: add CSR based Zicbop config
when(cs.ldest === 0.U) {
cs.selImm := SelImm.IMM_S
cs.fuType := FuType.ldu
when(cs.lsrc(1) === "b00001".U) {
cs.isSoftPrefetchRead := true.B
cs.isSoftPrefetchWrite := false.B
cs.fuOpType := LSUOpType.prefetch_r
}.otherwise {
cs.isSoftPrefetchRead := false.B
cs.isSoftPrefetchWrite := true.B
cs.fuOpType := LSUOpType.prefetch_w
}
cs.selImm := SelImm.IMM_S
cs.fuType := FuType.ldu
cs.fuOpType := LSUOpType.lb
}
}

Expand Down Expand Up @@ -667,10 +662,10 @@ class DecodeUnit(implicit p: Parameters) extends XSModule with DecodeUnitConstan
io.deq.cf_ctrl.ctrl.srcType(0), io.deq.cf_ctrl.ctrl.srcType(1), io.deq.cf_ctrl.ctrl.srcType(2),
io.deq.cf_ctrl.ctrl.lsrc(0), io.deq.cf_ctrl.ctrl.lsrc(1), io.deq.cf_ctrl.ctrl.lsrc(2),
io.deq.cf_ctrl.ctrl.ldest, io.deq.cf_ctrl.ctrl.fuType, io.deq.cf_ctrl.ctrl.fuOpType)
XSDebug("out: rfWen=%d fpWen=%d isXSTrap=%d noSpecExec=%d isBlocked=%d flushPipe=%d isRVF=%d isORI=%x imm=%x\n",
XSDebug("out: rfWen=%d fpWen=%d isXSTrap=%d noSpecExec=%d isBlocked=%d flushPipe=%d isRVF=%d imm=%x\n",
io.deq.cf_ctrl.ctrl.rfWen, io.deq.cf_ctrl.ctrl.fpWen, io.deq.cf_ctrl.ctrl.isXSTrap,
io.deq.cf_ctrl.ctrl.noSpecExec, io.deq.cf_ctrl.ctrl.blockBackward, io.deq.cf_ctrl.ctrl.flushPipe,
io.deq.cf_ctrl.ctrl.isRVF, io.deq.cf_ctrl.ctrl.isORI, io.deq.cf_ctrl.ctrl.imm)
io.deq.cf_ctrl.ctrl.isRVF, io.deq.cf_ctrl.ctrl.imm)
XSDebug("out: excepVec=%b intrVec=%b\n",
io.deq.cf_ctrl.cf.exceptionVec.asUInt, io.deq.cf_ctrl.cf.intrVec.asUInt)
}
10 changes: 4 additions & 6 deletions src/main/scala/xiangshan/mem/MemCommon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ object genWdata {
class LsPipelineBundle(implicit p: Parameters) extends XSBundle {
val vaddr = UInt(VAddrBits.W)
val paddr = UInt(PAddrBits.W)
val func = UInt(6.W) //fixme???
// val func = UInt(6.W)
val mask = UInt(8.W)
val data = UInt((XLEN+1).W)
val uop = new MicroOp
Expand All @@ -66,13 +66,11 @@ class LsPipelineBundle(implicit p: Parameters) extends XSBundle {
val forwardMask = Vec(8, Bool())
val forwardData = Vec(8, UInt(8.W))

// For debug usage
val isFirstIssue = Bool()
//softprefetch
val isSoftPrefetch = Bool()
//softprefetch except
val isSoftPreExcept = Bool()
val isSoftPremmio = Bool()

// For debug usage
val isFirstIssue = Bool()
}

class StoreDataBundle(implicit p: Parameters) extends XSBundle {
Expand Down
44 changes: 19 additions & 25 deletions src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,9 @@ class LoadUnit_S0(implicit p: Parameters) extends XSModule with HasDCacheParamet
val s0_mask = Mux(io.loadFastMatch.orR, fastpath_mask, slowpath_mask)
XSPerfAccumulate("load_to_load_forward", io.loadFastMatch.orR && io.in.fire())

val isSoftPrefetch = Wire(Bool())
isSoftPrefetch := s0_uop.ctrl.isORI //it's a ORI but it exists in ldu, which means it's a softprefecth
val isSoftPrefetchRead = Wire(Bool())
val isSoftPrefetchWrite = Wire(Bool())
isSoftPrefetchRead := s0_uop.ctrl.isSoftPrefetchRead
isSoftPrefetchWrite := s0_uop.ctrl.isSoftPrefetchWrite
val isSoftPrefetch = LSUOpType.isPrefetch(s0_uop.ctrl.fuOpType)
val isSoftPrefetchRead = s0_uop.ctrl.fuOpType === LSUOpType.prefetch_r
val isSoftPrefetchWrite = s0_uop.ctrl.fuOpType === LSUOpType.prefetch_w

// query DTLB
io.dtlbReq.valid := io.in.valid
Expand Down Expand Up @@ -290,10 +287,8 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
val s2_mmio = !isSoftPrefetch && actually_mmio && !s2_exception
val s2_cache_miss = io.dcacheResp.bits.miss
val s2_cache_replay = io.dcacheResp.bits.replay
val s2_is_prefetch = io.in.bits.isSoftPrefetch

val s2_cache_miss_enter = io.dcacheResp.bits.miss_enter //missReq enter the mshr successfully
val isSoftPreExcept = io.in.bits.isSoftPreExcept
val isSoftPremmio = isSoftPrefetch && actually_mmio //TODO, fix it
// val cnt = RegInit(127.U)
// cnt := cnt + io.in.valid.asUInt
// val s2_forward_fail = io.lsq.matchInvalid || io.sbuffer.matchInvalid || cnt === 0.U
Expand All @@ -302,8 +297,8 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
// assert(!s2_forward_fail)
io.dcache_kill := false.B // move pmp resp kill to outside
io.dcacheResp.ready := true.B
val dcacheShouldResp = !(s2_tlb_miss || s2_exception || s2_mmio)
assert(!(io.in.valid && (dcacheShouldResp && !io.dcacheResp.valid) && (!isSoftPreExcept) && (!isSoftPremmio)), "DCache response got lost")
val dcacheShouldResp = !(s2_tlb_miss || s2_exception || s2_mmio || s2_is_prefetch)
assert(!(io.in.valid && (dcacheShouldResp && !io.dcacheResp.valid)), "DCache response got lost")

// merge forward result
// lsq has higher priority than sbuffer
Expand Down Expand Up @@ -350,17 +345,16 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
io.out.bits.data := rdataPartialLoad
// when exception occurs, set it to not miss and let it write back to rob (via int port)
if (EnableFastForward) {
when(io.in.bits.isSoftPrefetch) {
io.out.bits.miss := s2_cache_miss && !s2_exception && !s2_forward_fail && !fullForward && !s2_cache_miss_enter && !isSoftPreExcept && !isSoftPremmio
}.otherwise {
io.out.bits.miss := s2_cache_miss && !s2_exception && !s2_forward_fail && !fullForward
}
io.out.bits.miss := s2_cache_miss &&
!s2_exception &&
!s2_forward_fail &&
!fullForward &&
!s2_is_prefetch
} else {
when(io.in.bits.isSoftPrefetch) {
io.out.bits.miss := s2_cache_miss && !s2_exception && !s2_forward_fail && !s2_cache_miss_enter && !isSoftPreExcept && !isSoftPremmio
}.otherwise {
io.out.bits.miss := s2_cache_miss && !s2_exception && !s2_forward_fail
}
io.out.bits.miss := s2_cache_miss &&
!s2_exception &&
!s2_forward_fail &&
!s2_is_prefetch
}
io.out.bits.uop.ctrl.fpWen := io.in.bits.uop.ctrl.fpWen && !s2_exception
// if forward fail, replay this inst from fetch
Expand Down Expand Up @@ -390,16 +384,16 @@ class LoadUnit_S2(implicit p: Parameters) extends XSModule with HasLoadHelper {
// feedback tlb result to RS
io.rsFeedback.valid := io.in.valid
when (io.in.bits.isSoftPrefetch) {
io.rsFeedback.bits.hit := (!s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception || fullForward) && !s2_data_invalid) || s2_cache_miss_enter || isSoftPreExcept || isSoftPremmio
io.rsFeedback.bits.hit := (!s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception))
}.otherwise {
io.rsFeedback.bits.hit := !s2_tlb_miss && (!s2_cache_replay || s2_mmio || s2_exception || fullForward) && !s2_data_invalid
}
io.rsFeedback.bits.rsIdx := io.in.bits.rsIdx
io.rsFeedback.bits.flushState := io.in.bits.ptwBack
io.rsFeedback.bits.sourceType := Mux(s2_tlb_miss, RSFeedbackType.tlbMiss,
Mux(io.lsq.dataInvalid,
RSFeedbackType.dataInvalid,
RSFeedbackType.mshrFull
Mux(s2_cache_replay,
RSFeedbackType.mshrFull,
RSFeedbackType.dataInvalid
)
)
io.rsFeedback.bits.dataInvalidSqIdx.value := io.dataInvalidSqIdx
Expand Down
64 changes: 41 additions & 23 deletions src/main/scala/xiangshan/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -356,34 +356,52 @@ package object xiangshan {
}

object LSUOpType {
// normal load/store
// bit(1, 0) are size
def lb = "b000000".U
def lh = "b000001".U
def lw = "b000010".U
def ld = "b000011".U
def lbu = "b000100".U
def lhu = "b000101".U
def lwu = "b000110".U
def sb = "b001000".U
def sh = "b001001".U
def sw = "b001010".U
def sd = "b001011".U

def cbo_zero = "b001111".U // l1 cache op

def cbo_clean = "b011111".U // llc op
def cbo_flush = "b101111".U // llc op
def cbo_inval = "b111111".U // llc op

def isLoad(op: UInt): Bool = !op(3)
def isStore(op: UInt): Bool = op(3)
def isCbo(op: UInt): Bool = op(3, 0) === "b1111".U
// load pipeline

// normal load
// Note: bit(1, 0) are size, DO NOT CHANGE
// bit encoding: | load 0 | is unsigned(1bit) | size(2bit) |
def lb = "b0000".U
def lh = "b0001".U
def lw = "b0010".U
def ld = "b0011".U
def lbu = "b0100".U
def lhu = "b0101".U
def lwu = "b0110".U

// Zicbop software prefetch
// bit encoding: | prefetch 1 | 0 | prefetch type (2bit) |
def prefetch_i = "b1000".U // TODO
def prefetch_r = "b1001".U
def prefetch_w = "b1010".U

def isPrefetch(op: UInt): Bool = op(3)

// store pipeline
// normal store
// bit encoding: | store 00 | size(2bit) |
def sb = "b0000".U
def sh = "b0001".U
def sw = "b0010".U
def sd = "b0011".U

// l1 cache op
// bit encoding: | cbo_zero 01 | size(2bit) 11 |
def cbo_zero = "b0111".U

// llc op
// bit encoding: | prefetch 11 | suboptype(2bit) |
def cbo_clean = "b1100".U
def cbo_flush = "b1101".U
def cbo_inval = "b1110".U

def isCbo(op: UInt): Bool = op(3, 2) === "b11".U

// atomics
// bit(1, 0) are size
// since atomics use a different fu type
// so we can safely reuse other load/store's encodings
// bit encoding: | optype(4bit) | size (2bit) |
def lr_w = "b000010".U
def sc_w = "b000110".U
def amoswap_w = "b001010".U
Expand Down

0 comments on commit d200f59

Please sign in to comment.