-
Notifications
You must be signed in to change notification settings - Fork 0
qij3/CareerCup
Folders and files
Name | Name | Last commit message | Last commit date | |
---|---|---|---|---|
Repository files navigation
#include <stdio.h> #include <assert.h> #include <omp.h> #define CPU_READ 0 #define CPU_WRITE 1 #define MSI_INVALID 0 #define MSI_SHARED 1 #define MSI_MODIFIED 2 #define BUS_NULL 0 #define BUS_READ 1 #define BUS_WRITE 2 #define BUS_UPGRADE 3 // write with local shared(S) cache line #define BUS_REPLY 4 #define BUS_WRITEBACK 5 #define PHASE_ACTION 0 #define PHASE_REACTION 1 #define PRIORITY_NORMAL 0 #define PRIORITY_HIGH 1 #define CACHE_LINE_SIZE 4 #define MAX_NPROC 64 #define MAX_CACHE_SIZE 4096 #define MAX_TRACE_LEN 200000 #define MAX_SIM_CYCLE MAX_TRACE_LEN * 10000 const char * bus_cmd_names [] = { "null ", "read ", "write ", "write ", "reply ", "writeback" }; const char * priority_names [] = { "normal", "high " }; const char * msi_names [] = { "I", "S", "M" }; const char * phase_names [] = { "action ", "reaction" }; struct mem_rec { int addr; int cmd; }; struct thread_state { int pending_writeback_addr; int pending_addr; int pending_cmd; int completed_cycle; int cache_hit_cnt; int cache_miss_cnt; }; struct cache_entry { int tag; int state; }; struct bus_state { int winner_id; // can be CPU or memory int winner_cmd; // the bus command of the request int winner_addr; // the address of the request int priority; // priority of the request int last_winner_id; // last winner of the bus (for arbitration) }; int cache_size = 32; // size of each cache int nproc = 4; // # of threads int trace_len = 3; // trace length int sim_cycle = 0; int thread_completion_cnt = 0; struct mem_rec gmem_trace[MAX_NPROC][MAX_TRACE_LEN] = {}; struct thread_state thread_state[MAX_NPROC] = {}; struct cache_entry gcache[MAX_NPROC][MAX_CACHE_SIZE] = {}; struct bus_state gbus; // init void read_trace_file(); void init_sim_states(); // simulate void simulate(); void simulate_memory(int thread_id); void simulate_thread(int thread_id); // cache related void generate_memory_instruction(int thread_id, struct thread_state * tstate); int cache_snooping(int thread_id, int cmd, int addr, struct thread_state * tstate); void update_cache(int thread_id, int cmd, int addr); // bus related int arbitrate_bus(int thread_id, int cmd, int addr, int priority, int phase); // output related void print_cache_states(); void print_stat(); int cfloor(int x, int a) { return (x & ~(a-1)); } int main(int argc, char ** argv) { read_trace_file(); init_sim_states(); omp_set_num_threads(nproc+1); #pragma omp parallel simulate(); print_stat(); } void read_trace_file() { int i = 0; int j = 0; int addr = 0; int cpu_cmd = 0; int K = 0; scanf("%d %d %d \n", &nproc, &K, &trace_len); assert(trace_len < MAX_TRACE_LEN); assert(nproc < MAX_NPROC); cache_size = 1 << K; for(i = 0; i < trace_len; i++) { for(j = 0; j < nproc; j++) { scanf("%d %d ", &addr, &cpu_cmd); gmem_trace[j][i].addr = addr; gmem_trace[j][i].cmd = cpu_cmd; } scanf("\n"); } } void init_sim_states() { int i, j; assert(nproc < MAX_NPROC); assert(cache_size < MAX_CACHE_SIZE); for(i = 0; i < nproc; i++) { thread_state[i].pending_writeback_addr = -1; thread_state[i].pending_addr = -1; thread_state[i].pending_cmd = 0; thread_state[i].completed_cycle = 0; thread_state[i].cache_hit_cnt = 0; thread_state[i].cache_miss_cnt = 0; } for(i = 0; i < nproc; i++) { for(j = 0; j < cache_size; j++) { gcache[i][j].tag = -1; gcache[i][j].state = MSI_INVALID; } } gbus.winner_id = 0; gbus.winner_cmd = 0; gbus.winner_addr = 0; gbus.priority = PRIORITY_NORMAL; gbus.last_winner_id = nproc-1; // thread-0 will be selected for the first bus transaction } void print_cache_states() { int i = 0; int j = 0; if (nproc > 4 || cache_size > 8) { return; } printf("cache state: \n"); for(j = 0; j < cache_size; j++) { for(i = 0; i < nproc; i++) { printf("%d %s, ", gcache[i][j].tag, msi_names[gcache[i][j].state]); } printf("\n"); } } void print_stat() { int i; printf("completed cycle: "); for(i = 0; i < nproc; i++) { printf("%d, ", thread_state[i].completed_cycle); } printf("\n"); printf("cache hits: "); for(i = 0; i < nproc; i++) { printf("%d, ", thread_state[i].cache_hit_cnt); } printf("\n"); printf("cache misses: "); for(i = 0; i < nproc; i++) { printf("%d, ", thread_state[i].cache_miss_cnt); } printf("\n"); printf("cache miss rate: "); for(i = 0; i < nproc; i++) { printf("%6.3f, ", ((double)thread_state[i].cache_miss_cnt) / (thread_state[i].cache_miss_cnt + thread_state[i].cache_hit_cnt)); } printf("\n"); } void simulate() { int my_tid = omp_get_thread_num(); if (my_tid < nproc) { simulate_thread(my_tid); } else { assert(my_tid == nproc); simulate_memory(my_tid); } } void simulate_memory(int thread_id) { int my_tid = thread_id; while(sim_cycle < MAX_SIM_CYCLE) { //action phase arbitrate_bus(my_tid, BUS_NULL, -1, PRIORITY_NORMAL, PHASE_ACTION); if (gbus.winner_cmd == BUS_READ || gbus.winner_cmd == BUS_WRITE) { // memory feeds data with normal priority arbitrate_bus(my_tid, BUS_REPLY, gbus.winner_addr, PRIORITY_NORMAL, PHASE_REACTION); } else { arbitrate_bus(my_tid, BUS_NULL, -1, PRIORITY_NORMAL, PHASE_REACTION); } #pragma omp barrier if (thread_completion_cnt == nproc) { #pragma omp critical printf("THD:%d completed \n", my_tid); break; } } } void generate_memory_inst(int thread_id, struct thread_state * tstate, int rec_idx) { int trace_addr = -1; int trace_cmd = 0; int cache_addr = -1; int cache_idx = -1; assert(tstate->pending_writeback_addr == -1); assert(tstate->pending_addr == -1); assert(rec_idx < trace_len); trace_addr = gmem_trace[thread_id][rec_idx].addr; trace_cmd = gmem_trace[thread_id][rec_idx].cmd; cache_addr = cfloor(trace_addr, CACHE_LINE_SIZE); cache_idx = (cache_addr / CACHE_LINE_SIZE) % cache_size; if (trace_cmd == CPU_READ) { if (cache_addr == gcache[thread_id][cache_idx].tag) { assert(gcache[thread_id][cache_idx].state != MSI_INVALID); // hit in cache //printf("THD: %d addr %d read hit in cache \n", thread_id, cache_addr); tstate->cache_hit_cnt++; return; } else { // not hit in cache tstate->pending_addr = cache_addr; tstate->pending_cmd = BUS_READ; tstate->cache_miss_cnt++; } } else if (trace_cmd == CPU_WRITE) { if (cache_addr == gcache[thread_id][cache_idx].tag) { // hit in cache if (gcache[thread_id][cache_idx].state == MSI_MODIFIED) { // direct write } else { assert(gcache[thread_id][cache_idx].state == MSI_SHARED); tstate->pending_addr = cache_addr; tstate->pending_cmd = BUS_UPGRADE; // notify other processors to invalidate their copies } tstate->cache_hit_cnt++; } else { // not hit in cache tstate->pending_addr = cache_addr; tstate->pending_cmd = BUS_WRITE; tstate->cache_miss_cnt++; } } else { assert(0); } if (tstate->pending_addr != -1 && gcache[thread_id][cache_idx].tag != tstate->pending_addr) { // need to load new cache line // check if old cache line needs writeback if (gcache[thread_id][cache_idx].state == MSI_MODIFIED) { tstate->pending_writeback_addr = gcache[thread_id][cache_idx].tag; assert(tstate->pending_writeback_addr != -1); } else { // invalid old cache line gcache[thread_id][cache_idx].tag = -1; gcache[thread_id][cache_idx].state = MSI_INVALID; } } } int cache_snooping(int thread_id, int cmd, int addr, struct thread_state * tstate) { int cache_addr = cfloor(addr, CACHE_LINE_SIZE); assert(cache_addr == addr); int cache_idx = (cache_addr / CACHE_LINE_SIZE) % cache_size; int dirty_hit = 0; if (cache_addr != gcache[thread_id][cache_idx].tag) { // cache line not hit return 0; } // cache line hit if (gcache[thread_id][cache_idx].state == MSI_MODIFIED) { dirty_hit = 1; // clear pending writeback if (tstate->pending_writeback_addr == cache_addr) { tstate->pending_writeback_addr = -1; //printf("pending writeback addr snoop hit THD: %d, addr: %d\n", // thread_id, tstate->pending_snooping_writeback_addr); } } if (cmd == BUS_READ) { gcache[thread_id][cache_idx].state = MSI_SHARED; } else if (cmd == BUS_WRITE || cmd == BUS_UPGRADE) { if (tstate->pending_cmd == BUS_UPGRADE && tstate->pending_addr == cache_addr) { tstate->pending_cmd = BUS_WRITE; // the line has been invalidated by others // the write will be a cache miss instead of a cache hit tstate->cache_hit_cnt--; tstate->cache_miss_cnt++; } // invalidate this cache line gcache[thread_id][cache_idx].tag = -1; gcache[thread_id][cache_idx].state = MSI_INVALID; } //printf("snoop hit. THD:%d addr:%d \n", thread_id, addr); return dirty_hit; } void update_cache(int thread_id, int cmd, int addr) { int cache_addr = cfloor(addr, CACHE_LINE_SIZE); int cache_idx = (cache_addr / CACHE_LINE_SIZE) % cache_size; //printf("update cache: thd: %d cmd %s cache tag: %d, new addr: %d \n", // thread_id, bus_cmd_names[cmd], gcache[thread_id][cache_idx].tag, cache_addr); if(gcache[thread_id][cache_idx].tag != -1 && gcache[thread_id][cache_idx].tag != cache_addr) { // it is possible, // a pending writeback request is cancelled due to a remote read // the old line is still in S state assert(cmd != BUS_WRITEBACK); assert(gcache[thread_id][cache_idx].state == MSI_SHARED); // reset cache line gcache[thread_id][cache_idx].tag = -1; gcache[thread_id][cache_idx].state = MSI_INVALID; } if (cmd == BUS_WRITEBACK) { assert(gcache[thread_id][cache_idx].tag == cache_addr); assert(gcache[thread_id][cache_idx].state = MSI_MODIFIED); gcache[thread_id][cache_idx].tag = -1; gcache[thread_id][cache_idx].state = MSI_INVALID; } else if (cmd == BUS_UPGRADE) { assert(gcache[thread_id][cache_idx].tag == cache_addr); assert(gcache[thread_id][cache_idx].state = MSI_SHARED); gcache[thread_id][cache_idx].state = MSI_MODIFIED; } else if (cmd == BUS_WRITE) { assert(gcache[thread_id][cache_idx].tag == -1); gcache[thread_id][cache_idx].tag = cache_addr; gcache[thread_id][cache_idx].state = MSI_MODIFIED; } else if (cmd == BUS_READ) { assert(gcache[thread_id][cache_idx].tag == -1); gcache[thread_id][cache_idx].tag = cache_addr; gcache[thread_id][cache_idx].state = MSI_SHARED; } else { assert(0); } } void simulate_thread(int thread_id) { int my_tid = thread_id; struct thread_state * my_state = &thread_state[my_tid]; int win_bus = 0; int action_addr = -1; int action_cmd = 0; int reaction_addr = -1; int reaction_cmd = 0; int priority = 0; int trace_rec_idx = 0; while(sim_cycle < MAX_SIM_CYCLE) { if (my_state->pending_addr == -1) { // optional writeback must be processed first assert(my_state->pending_writeback_addr == -1); // check if more CPU commands to process if (trace_rec_idx < trace_len) { // process one CPU command generate_memory_inst(my_tid, my_state, trace_rec_idx); trace_rec_idx++; } } // get current BUS command to issue if (my_state->pending_writeback_addr != -1) { // writeback will be processed first action_cmd = BUS_WRITEBACK; action_addr = my_state->pending_writeback_addr; } else if (my_state->pending_addr != -1) { action_cmd = my_state->pending_cmd; action_addr = my_state->pending_addr; } else { action_cmd = BUS_NULL; action_addr = -1; } //active phase win_bus = arbitrate_bus(my_tid, action_cmd, action_addr, PRIORITY_NORMAL, PHASE_ACTION); if (!my_state->completed_cycle && trace_rec_idx == trace_len && action_cmd == BUS_NULL) { my_state->completed_cycle = sim_cycle; #pragma omp critical { thread_completion_cnt++; printf("THD:%d completed \n", my_tid); } } if (win_bus) { if (action_cmd == BUS_WRITEBACK) { reaction_cmd = action_cmd; reaction_addr = action_addr; } else { // waiting for reply reaction_cmd = BUS_NULL; reaction_addr = -1; } priority = PRIORITY_NORMAL; } else { int dirty_hit = 0; if (gbus.winner_id != -1) { // response to other thread's request dirty_hit = cache_snooping(my_tid, gbus.winner_cmd, gbus.winner_addr, my_state); } if (dirty_hit) { reaction_cmd = BUS_REPLY; reaction_addr = gbus.winner_addr; priority = PRIORITY_HIGH; } else { reaction_cmd = BUS_NULL; reaction_addr = -1; priority = PRIORITY_NORMAL; } } arbitrate_bus(my_tid, reaction_cmd, reaction_addr, priority, PHASE_REACTION); if (win_bus) { update_cache(my_tid, action_cmd, action_addr); if (action_cmd == BUS_WRITEBACK) { assert(my_state->pending_writeback_addr != -1); my_state->pending_writeback_addr = -1; } else { assert(my_state->pending_addr != -1); my_state->pending_addr = -1; } gbus.last_winner_id = my_tid; } #pragma omp master print_cache_states(); #pragma omp barrier // exit all threads after the barrier in arbitrate_bus if (thread_completion_cnt == nproc) { break; } } } int is_higher_priority(int thread_id, int priority) { if (gbus.winner_id == -1) { return 1; //no winner yet } // at most one priority is high if (priority == PRIORITY_HIGH && gbus.priority == PRIORITY_NORMAL) { return 1; } else if (priority == PRIORITY_NORMAL && gbus.priority == PRIORITY_HIGH) { return 0; } assert(priority == PRIORITY_NORMAL); assert(gbus.priority == PRIORITY_NORMAL); assert(thread_id <= nproc); assert(thread_id >= 0); assert(gbus.winner_id != thread_id); //compare the distance between the requesters and the last winner int winner_offset = ( nproc + nproc + gbus.winner_id - (gbus.last_winner_id + 1)) % nproc; int new_requester_offset = (nproc + nproc + thread_id - (gbus.last_winner_id + 1)) % nproc; //printf("is_higher_priority: %d %d %d %d %d \n", thread_id, gbus.winner_id, gbus.last_winner_id, new_requester_offset, winner_offset); if (new_requester_offset < winner_offset) { return 1; } else { assert(new_requester_offset > winner_offset); return 0; } } int arbitrate_bus(int thread_id, int cmd, int addr, int priority, int phase) { #pragma omp barrier #pragma omp single { // reset bus gbus.winner_id = -1; gbus.winner_addr = -1; gbus.winner_cmd = 0; gbus.priority = PRIORITY_NORMAL; if (phase == PHASE_ACTION) { sim_cycle++; } } // printf("arbitrate bus: THD:%d CMD:%s ADDR:%d PRI:%s \n", // thread_id, // bus_cmd_names[cmd], // addr, // priority_names[priority]); if (cmd != BUS_NULL) { #pragma omp critical { if (is_higher_priority(thread_id, priority)) { gbus.winner_id = thread_id; gbus.winner_cmd = cmd; gbus.winner_addr = addr; gbus.priority = priority; } } } #pragma omp barrier #pragma omp master { printf("C:%d P:%s THD:%d CMD:%s ADDR:%d PRI:%s\n", sim_cycle, phase_names[phase], gbus.winner_id, bus_cmd_names[gbus.winner_cmd], gbus.winner_addr, priority_names[gbus.priority]); } if (gbus.winner_id == thread_id) { return 1; } return 0; }
About
Practice
Resources
Stars
Watchers
Forks
Releases
No releases published
Packages 0
No packages published