Skip to content

Commit

Permalink
mmu: timing optimization (OpenXiangShan#1161)
Browse files Browse the repository at this point in the history
* l2tlb: opt timing of req_addr_low to page cache

* l2tlb: change signal name from first/secode to stage1/2

* l2tlb: page cache forgot satp.changed before, add it

* util: update DataHoldBypass, more general

* util: update PipelineConnect, add block signal

* l2tlb: page cache now need 3 cycle: in/read | read back/ecc check | out

* l2tlb: set ecc block to XLEN(64)

* l2tlb: when ecc error, dont hit. do it same cycle

* tlb: optimize repalce io, for covering refill and better timing
  • Loading branch information
Lemover authored Oct 23, 2021
1 parent eee4cb5 commit 3889e11
Show file tree
Hide file tree
Showing 8 changed files with 257 additions and 157 deletions.
2 changes: 1 addition & 1 deletion src/main/scala/utils/Hold.scala
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ object OneCycleValid {
* Hold the data when it is valid and bypass latest data
*/
object DataHoldBypass {
def apply(data: UInt, valid: Bool): UInt = {
def apply[T <: Data](data: T, valid: Bool): T = {
Mux(valid, data, RegEnable(data, valid))
}
}
Expand Down
11 changes: 7 additions & 4 deletions src/main/scala/utils/PipelineConnect.scala
Original file line number Diff line number Diff line change
Expand Up @@ -27,26 +27,29 @@ object PipelineConnect {
val out = DecoupledIO(gen.cloneType)
val rightOutFire = Input(Bool())
val isFlush = Input(Bool())
val block = Input(Bool())
})

val valid = RegInit(false.B)
valid.suggestName("pipeline_valid")
val leftFire = io.in.valid && io.out.ready && !io.block
when (io.rightOutFire) { valid := false.B }
when (io.in.valid && io.out.ready) { valid := true.B }
when (leftFire) { valid := true.B }
when (io.isFlush) { valid := false.B }

io.in.ready := io.out.ready
io.out.bits := RegEnable(io.in.bits, io.in.valid && io.out.ready)
io.in.ready := io.out.ready && !io.block
io.out.bits := RegEnable(io.in.bits, leftFire)
io.out.valid := valid //&& !isFlush
}

def apply[T <: Data]
(left: DecoupledIO[T], right: DecoupledIO[T], rightOutFire: Bool, isFlush: Bool,
(left: DecoupledIO[T], right: DecoupledIO[T], rightOutFire: Bool, isFlush: Bool, block: Bool = false.B,
moduleName: Option[String] = None
){
val pipelineConnect = Module(new PipelineConnectModule[T](left.bits.cloneType))
if(moduleName.nonEmpty) pipelineConnect.suggestName(moduleName.get)
pipelineConnect.io.in <> left
pipelineConnect.io.block := block
pipelineConnect.io.rightOutFire := rightOutFire
pipelineConnect.io.isFlush := isFlush
right <> pipelineConnect.io.out
Expand Down
71 changes: 59 additions & 12 deletions src/main/scala/xiangshan/cache/mmu/MMUBundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,6 @@ class TlbStorageIO(nSets: Int, nWays: Int, ports: Int)(implicit p: Parameters) e
val hit = Output(Bool())
val ppn = Output(UInt(ppnLen.W))
val perm = Output(new TlbPermBundle())
val hitVec = Output(UInt(nWays.W))
}))
}
val w = Flipped(ValidIO(new Bundle {
Expand All @@ -304,14 +303,15 @@ class TlbStorageIO(nSets: Int, nWays: Int, ports: Int)(implicit p: Parameters) e
val entry = new TlbEntry(pageNormal = true, pageSuper = false)
})))
}
val access = Vec(ports, new ReplaceAccessBundle(nSets, nWays))

def r_req_apply(valid: Bool, vpn: UInt, asid: UInt, i: Int): Unit = {
this.r.req(i).valid := valid
this.r.req(i).bits.vpn := vpn
}

def r_resp_apply(i: Int) = {
(this.r.resp(i).bits.hit, this.r.resp(i).bits.ppn, this.r.resp(i).bits.perm, this.r.resp(i).bits.hitVec)
(this.r.resp(i).bits.hit, this.r.resp(i).bits.ppn, this.r.resp(i).bits.perm)
}

def w_apply(valid: Bool, wayIdx: UInt, data: PtwResp): Unit = {
Expand All @@ -323,20 +323,23 @@ class TlbStorageIO(nSets: Int, nWays: Int, ports: Int)(implicit p: Parameters) e
override def cloneType: this.type = new TlbStorageIO(nSets, nWays, ports).asInstanceOf[this.type]
}

class ReplaceAccessBundle(nSets: Int, nWays: Int)(implicit p: Parameters) extends TlbBundle {
val sets = Output(UInt(log2Up(nSets).W))
val touch_ways = ValidIO(Output(UInt(log2Up(nWays).W)))

override def cloneType: this.type =new ReplaceAccessBundle(nSets, nWays).asInstanceOf[this.type]
}

class ReplaceIO(Width: Int, nSets: Int, nWays: Int)(implicit p: Parameters) extends TlbBundle {
val access = Flipped(new Bundle {
val sets = Output(Vec(Width, UInt(log2Up(nSets).W)))
val touch_ways = Vec(Width, ValidIO(Output(UInt(log2Up(nWays).W))))
})
val access = Vec(Width, Flipped(new ReplaceAccessBundle(nSets, nWays)))

val refillIdx = Output(UInt(log2Up(nWays).W))
val chosen_set = Flipped(Output(UInt(log2Up(nSets).W)))

def apply_sep(in: Seq[ReplaceIO], vpn: UInt): Unit = {
for (i <- 0 until Width) {
this.access.sets(i) := in(i).access.sets(0)
this.access.touch_ways(i) := in(i).access.touch_ways(0)
this.chosen_set := get_idx(vpn, nSets)
this.access(i) := in(i).access(0)
this.chosen_set := get_set_idx(vpn, nSets)
in(i).refillIdx := this.refillIdx
}
}
Expand Down Expand Up @@ -609,9 +612,53 @@ class PtwEntries(num: Int, tagLen: Int, level: Int, hasPerm: Boolean)(implicit p
class PTWEntriesWithEcc(eccCode: Code, num: Int, tagLen: Int, level: Int, hasPerm: Boolean)(implicit p: Parameters) extends PtwBundle {
val entries = new PtwEntries(num, tagLen, level, hasPerm)

private val encBits = eccCode.width(entries.getWidth)
private val eccBits = encBits - entries.getWidth
val ecc = UInt(eccBits.W)
val ecc_block = XLEN
val ecc_info = get_ecc_info()
val ecc = UInt(ecc_info._1.W)

def get_ecc_info(): (Int, Int, Int, Int) = {
val eccBits_per = eccCode.width(ecc_block) - ecc_block

val data_length = entries.getWidth
val data_align_num = data_length / ecc_block
val data_not_align = (data_length % ecc_block) != 0 // ugly code
val data_unalign_length = data_length - data_align_num * ecc_block
val eccBits_unalign = eccCode.width(data_unalign_length) - data_unalign_length

val eccBits = eccBits_per * data_align_num + eccBits_unalign
(eccBits, eccBits_per, data_align_num, data_unalign_length)
}

def encode() = {
val data = entries.asUInt()
val ecc_slices = Wire(Vec(ecc_info._3, UInt(ecc_info._2.W)))
for (i <- 0 until ecc_info._3) {
ecc_slices(i) := eccCode.encode(data((i+1)*ecc_block-1, i*ecc_block)) >> ecc_block
}
if (ecc_info._4 != 0) {
val ecc_unaligned = eccCode.encode(data(data.getWidth-1, ecc_info._3*ecc_block)) >> ecc_info._4
ecc := Cat(ecc_unaligned, ecc_slices.asUInt())
} else { ecc := ecc_slices.asUInt() }
}

def decode(): Bool = {
val data = entries.asUInt()
val res = Wire(Vec(ecc_info._3 + 1, Bool()))
for (i <- 0 until ecc_info._3) {
res(i) := eccCode.decode(Cat(ecc((i+1)*ecc_info._2-1, i*ecc_info._2), data((i+1)*ecc_block-1, i*ecc_block))).error
}
if (ecc_info._4 != 0) {
res(ecc_info._3) := eccCode.decode(
Cat(ecc(ecc_info._1-1, ecc_info._2*ecc_info._3), data(data.getWidth-1, ecc_info._3*ecc_block))).error
} else { res(ecc_info._3) := false.B }

Cat(res).orR
}

def gen(vpn: UInt, asid: UInt, data: UInt, levelUInt: UInt, prefetch: Bool) = {
this.entries := entries.genEntries(vpn, asid, data, levelUInt, prefetch)
this.encode()
}

override def cloneType: this.type = new PTWEntriesWithEcc(eccCode, num, tagLen, level, hasPerm).asInstanceOf[this.type]
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/xiangshan/cache/mmu/MMUConst.scala
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ trait HasTlbConst extends HasXSParameter {

val timeOutThreshold = 5000

def get_idx(vpn: UInt, nSets: Int): UInt = {
def get_set_idx(vpn: UInt, nSets: Int): UInt = {
vpn(log2Up(nSets)-1, 0)
}

Expand Down
8 changes: 4 additions & 4 deletions src/main/scala/xiangshan/cache/mmu/PTW.scala
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ class PTWImp(outer: PTW)(implicit p: Parameters) extends PtwModule(outer) with H
arb2.io.out.ready := cache.io.req.ready

cache.io.req.valid := arb2.io.out.valid
cache.io.req.bits.vpn := arb2.io.out.bits.vpn
cache.io.req.bits.source := arb2.io.out.bits.source
cache.io.req_isFirst := arb2.io.chosen =/= InArbMissQueuePort.U
cache.io.req.bits.req_info.vpn := arb2.io.out.bits.vpn
cache.io.req.bits.req_info.source := arb2.io.out.bits.source
cache.io.req.bits.isFirst := arb2.io.chosen =/= InArbMissQueuePort.U
cache.io.sfence := sfence
cache.io.csr := csr
cache.io.resp.ready := Mux(cache.io.resp.bits.hit, true.B, missQueue.io.in.ready || (!cache.io.resp.bits.toFsm.l2Hit && fsm.io.req.ready))
Expand Down Expand Up @@ -230,7 +230,7 @@ class PTWImp(outer: PTW)(implicit p: Parameters) extends PtwModule(outer) with H
cache.io.refill.bits.ptes := refill_data.asUInt
cache.io.refill.bits.req_info := Mux(refill_from_mq, mq_mem.refill, fsm.io.refill.req_info)
cache.io.refill.bits.level := Mux(refill_from_mq, 2.U, RegEnable(fsm.io.refill.level, init = 0.U, fsm.io.mem.req.fire()))
cache.io.refill.bits.addr_low := req_addr_low(RegNext(mem.d.bits.source))
cache.io.refill.bits.addr_low := RegNext(req_addr_low(mem.d.bits.source))

// pmp
pmp_check(0).req <> fsm.io.pmp.req
Expand Down
Loading

0 comments on commit 3889e11

Please sign in to comment.