From d8dc8913ac5d1e6c9654217481ba9fc76d0b909b Mon Sep 17 00:00:00 2001 From: Engin Kayraklioglu Date: Fri, 28 Jul 2017 13:17:04 -0400 Subject: [PATCH 1/4] Add graph500.chpl --- benchmark/graph500/graph500.chpl | 487 +++++++++++++++++++++++++++++++ 1 file changed, 487 insertions(+) create mode 100644 benchmark/graph500/graph500.chpl diff --git a/benchmark/graph500/graph500.chpl b/benchmark/graph500/graph500.chpl new file mode 100644 index 0000000..cc5bd63 --- /dev/null +++ b/benchmark/graph500/graph500.chpl @@ -0,0 +1,487 @@ + +use Random; +use BlockDist; +use LayoutCSR; +use Time; +use SyncList; +use Barrier; + +var r = new RandomStream(int, seed = 13); + +/** + * Generates (0, 1, 2, 3) with probabilities (0.57, 0.19, 0.19, 0.05) + */ +proc g500_generate_quartile(){ + const DENOM = 100; + const (A_NUM,B_NUM,C_NUM,D_NUM) = (57,19,19,5); + + var rand_num = (abs((r.getNext())%DENOM):int); + + if rand_num < A_NUM then return 0; + rand_num -= A_NUM; + if rand_num < B_NUM then return 1; + rand_num -= B_NUM; + if rand_num < C_NUM then return 2; + return 3; +} + +iter g500_vpair_generator(verticeIDs: [] int, numVertices: int, + edgefactor = 16, prng_seed=13){ + var r = new RandomStream(int, seed=prng_seed); + var u,v,q: int; + var quartile_size = numVertices/2; + + const numEdges = numVertices*edgefactor; + for edge in 1..numEdges { + (u,v) = (0,0); + var nv_temp = numVertices; + while nv_temp > 1 { + q = g500_generate_quartile(); + nv_temp /= 2; + u += nv_temp * (q/2); + v += nv_temp * (q%2); + } + + yield (verticeIDs[u], verticeIDs[v]); + } +} + +proc createGraph500(scale: int, edgefactor=16, prng_seed=13){ + const numVertices = 1< 1 { + writeln("Condition 3 fails " + e[1] + " " + e[2] + " " + + getLevel(e[1]) + " " + getLevel(e[2])); + /*writeln("Parents");*/ + /*[i in parents.domain] writeln(i, " ", parents[i]);*/ + /*writeln("Topology");*/ + /*g.print();*/ + /*writeln("Schedule");*/ + /*[i in vertexSpace] writeln(i, " ", g.bfIterator.scheduled_nb[i]);*/ + /*writeln("HAT Status");*/ + /*[l in g.bfs_state.levels] l.printStatus();*/ + return false; + } + } + + //the BFS tree spans an entire connected component's vertices, and + + + //a node and its parent are joined by an edge of the original graph. + + // FIXME this check is not easy do to lack of checking if an index + // exists in a sparseblockdom + /*forall i in 0..#(1< Date: Fri, 28 Jul 2017 13:20:22 -0400 Subject: [PATCH 2/4] Add skeleton build system for graph500 --- benchmark/graph500/Makefile | 5 +++++ benchmark/graph500/bin/.gitignore | 2 ++ 2 files changed, 7 insertions(+) create mode 100644 benchmark/graph500/Makefile create mode 100644 benchmark/graph500/bin/.gitignore diff --git a/benchmark/graph500/Makefile b/benchmark/graph500/Makefile new file mode 100644 index 0000000..3049e1e --- /dev/null +++ b/benchmark/graph500/Makefile @@ -0,0 +1,5 @@ +graph500: graph500.chpl + chpl -M../../queues -M../../queues/local -o bin/$@ $^ + +clean: + rm bin/graph500* diff --git a/benchmark/graph500/bin/.gitignore b/benchmark/graph500/bin/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/benchmark/graph500/bin/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore From e6a58f1e3026ae5c7b840bf728f967d278511cd1 Mon Sep 17 00:00:00 2001 From: Louis Jenkins Date: Fri, 18 Aug 2017 14:40:39 -0400 Subject: [PATCH 3/4] Graph500 unit testing --- benchmark/graph500/Makefile | 2 +- benchmark/graph500/graph500.chpl | 95 +++++++++++++------------------ collections/DistributedDeque.chpl | 19 ++++++- collections/SynchronizedList.chpl | 12 ++++ 4 files changed, 69 insertions(+), 59 deletions(-) diff --git a/benchmark/graph500/Makefile b/benchmark/graph500/Makefile index 3049e1e..f646821 100644 --- a/benchmark/graph500/Makefile +++ b/benchmark/graph500/Makefile @@ -1,5 +1,5 @@ graph500: graph500.chpl - chpl -M../../queues -M../../queues/local -o bin/$@ $^ + chpl -M../../collections -o bin/$@ $^ clean: rm bin/graph500* diff --git a/benchmark/graph500/graph500.chpl b/benchmark/graph500/graph500.chpl index cc5bd63..dd3edb3 100644 --- a/benchmark/graph500/graph500.chpl +++ b/benchmark/graph500/graph500.chpl @@ -3,7 +3,7 @@ use Random; use BlockDist; use LayoutCSR; use Time; -use SyncList; +use SynchronizedList; use Barrier; var r = new RandomStream(int, seed = 13); @@ -22,12 +22,12 @@ proc g500_generate_quartile(){ if rand_num < B_NUM then return 1; rand_num -= B_NUM; if rand_num < C_NUM then return 2; - return 3; + return 3; } -iter g500_vpair_generator(verticeIDs: [] int, numVertices: int, +iter g500_vpair_generator(verticeIDs: [] int, numVertices: int, edgefactor = 16, prng_seed=13){ - var r = new RandomStream(int, seed=prng_seed); + var r = new RandomStream(int, seed=prng_seed); var u,v,q: int; var quartile_size = numVertices/2; @@ -36,7 +36,7 @@ iter g500_vpair_generator(verticeIDs: [] int, numVertices: int, (u,v) = (0,0); var nv_temp = numVertices; while nv_temp > 1 { - q = g500_generate_quartile(); + q = g500_generate_quartile(); nv_temp /= 2; u += nv_temp * (q/2); v += nv_temp * (q%2); @@ -114,8 +114,16 @@ proc getBFSParentArray(g, key: int, ref parents: [] int){ // you can assume queues to be a "work queue": // distributed/unbounded/concurrent-safe enqueue-dequeue - var l0 = new SyncList(int); - var l1 = new SyncList(int); + var l0 : Collection(int); + var l1 : Collection(int); + + if isBag { + l0 = new DistributedBag(int); + l1 = new DistributedBag(int); + } else if isDeque { + l0 = new DistributedDeque(int); + l1 = new DistributedDeque(int); + } inline proc produceQueue { if step%2 == 0 then return l0; @@ -127,54 +135,31 @@ proc getBFSParentArray(g, key: int, ref parents: [] int){ else return l1; } - inline proc uglyClearWorkAround() { - while true { - var (x,exists) = consumeQueue.dequeue(); - if !exists then break; - } - } - //parent[i] = j means j is an immediate parent of i in breadth first //iteration that starts from the root. - consumeQueue.enqueue(key); - + consumeQueue.add(key); + produceQueue.freeze(); var b = new Barrier(numLocales*here.maxTaskPar); //as long as there vertices to be visited - while consumeQueue.data.size != 0 { - coforall l in Locales with (ref step) do on l { - coforall t in 0..#here.maxTaskPar with (ref step) { - while true { - var (exists, v) = consumeQueue.dequeue(); - if exists { - for n in g.dimIter(2,v) { // assume a serial `neighbors` iterator - /*writeln("\t", v, ",", n);*/ - - // note that this benign race condition is allowed. but you can - // assume otherwise, if that makes things easier - - // than the enqueue should be enclosed in a CAS conditional on - // parent[n] - if parents[n] == -1 { // doesn't have a parent yet (unvisited) - parents[n] = v; // "visit" - produceQueue.enqueue(n); // add to the "to-be-visited" queue - } - } - } - else { - break; - } - } - //proceed to the next level - b.barrier(); - if l.id==0 && t==0 { - uglyClearWorkAround(); - /*writeln("Stepping ", consumeQueue.data.size, " ",*/ - /*produceQueue.data.size);*/ - step += 1; + while !consumeQueue.isEmpty() { + forall v in consumeQueue { + for n in g.dimIter(2,v) { // assume a serial `neighbors` iterator + /*writeln("\t", v, ",", n);*/ + + // note that this benign race condition is allowed. but you can + // assume otherwise, if that makes things easier + + // than the enqueue should be enclosed in a CAS conditional on + // parent[n] + if parents[n] == -1 { // doesn't have a parent yet (unvisited) + parents[n] = v; // "visit" + produceQueue.add(n); // add to the "to-be-visited" queue } } } + consumeQueue.clear(); + step += 1; } } @@ -182,7 +167,7 @@ proc getBFSStats(parents: [] int, g) { var traversedEdges = 0, traversedNodes = 0; for i in 0..#(1< 1 { - writeln("Condition 3 fails " + e[1] + " " + e[2] + " " + + writeln("Condition 3 fails " + e[1] + " " + e[2] + " " + getLevel(e[1]) + " " + getLevel(e[2])); /*writeln("Parents");*/ /*[i in parents.domain] writeln(i, " ", parents[i]);*/ @@ -348,7 +333,7 @@ proc main(){ if debugKey != -1 { writeln("Starting"); - t.start(); + t.start(); getBFSParentArray(g, debugKey, parents); t.stop(); @@ -371,7 +356,7 @@ proc main(){ writef("%3i %10i ", i, sampleKeys[i]); - t.start(); + t.start(); getBFSParentArray(g, sampleKeys[i], parents); t.stop(); @@ -382,9 +367,9 @@ proc main(){ writef("%10.06r %15.6er", t.elapsed(), te/t.elapsed()); /*writeln("Parents: \n", parents);*/ - if validation then writef("%10s", - if validate(g, sampleKeys[i], parents) - then "Success" + if validation then writef("%10s", + if validate(g, sampleKeys[i], parents) + then "Success" else "FAIL"); else writef("%10s", "N/A"); diff --git a/collections/DistributedDeque.chpl b/collections/DistributedDeque.chpl index 9324d5a..93034d1 100644 --- a/collections/DistributedDeque.chpl +++ b/collections/DistributedDeque.chpl @@ -9,7 +9,12 @@ use Collection; of checking whether you may proceed with an operation, that is wait-free under most cases, lock-free in the worst case, which guarantees scalability. In the barrier, we perform freeze checks (if DEQUE_NO_FREEZE is not enabled), and bounds - checking (if a capacity is given). + checking (if a capacity is given). Deque operations are separated into two + levels: global and local. At a global level, we use simple fetchAdd and fetchSub + counters to denote which local deque we apply our operation to, and at a local level + we use an unrolled linked list which further has its own (non-atomic) counter. + By maintaining an + */ /* @@ -791,7 +796,11 @@ class DistributedDeque : Collection { } } - iter FIFO() { + /* + Iterates over the deque in First-In-First-Out order, from front to back. The + deque must be frozen or it will result in a halt. This operation is sequential. + */ + iter FIFO() : eltType { if !isFrozen() { halt("Ordered iteration requires the queue to be frozen."); } @@ -850,7 +859,11 @@ class DistributedDeque : Collection { } } - iter LIFO() { + /* + Iterates over the deque in Last-In-First-Out order, from back to front. The + deque must be frozen or it will result in a halt. This operation is sequential. + */ + iter LIFO() : eltType { if !isFrozen() { halt("Ordered iteration requires the queue to be frozen."); } diff --git a/collections/SynchronizedList.chpl b/collections/SynchronizedList.chpl index af0ff50..f2ca026 100644 --- a/collections/SynchronizedList.chpl +++ b/collections/SynchronizedList.chpl @@ -10,6 +10,7 @@ class SynchronizedList : Collection { var head : SynchronizedListNode(eltType); var tail : SynchronizedListNode(eltType); var lock$ : sync bool; + var _size : int; proc add(elt : eltType) : bool { on this { @@ -22,8 +23,11 @@ class SynchronizedList : Collection { tail.next = node; tail = node; } + this._size += 1; lock$; } + + return true; } @@ -44,9 +48,17 @@ class SynchronizedList : Collection { head = head.next; delete tmp; } + + this._size -= 1; } + lock$; } + return (hasElem, elem); } + + proc size() : int { + return _size; + } } From 0253a8673498afdb197c840069effa1f39e91004 Mon Sep 17 00:00:00 2001 From: Louis Jenkins Date: Fri, 18 Aug 2017 16:28:12 -0400 Subject: [PATCH 4/4] Segfault with graph500 --- benchmark/graph500/graph500.chpl | 23 +- bt.svg | 689 +++++++++++++++++++++++++++++++ 2 files changed, 698 insertions(+), 14 deletions(-) create mode 100644 bt.svg diff --git a/benchmark/graph500/graph500.chpl b/benchmark/graph500/graph500.chpl index dd3edb3..286a055 100644 --- a/benchmark/graph500/graph500.chpl +++ b/benchmark/graph500/graph500.chpl @@ -3,7 +3,8 @@ use Random; use BlockDist; use LayoutCSR; use Time; -use SynchronizedList; +use DistributedBag; +use DistributedDeque; use Barrier; var r = new RandomStream(int, seed = 13); @@ -106,24 +107,17 @@ proc numActualNeighbors(g, v) { return count; } +config param isBag = false; + proc getBFSParentArray(g, key: int, ref parents: [] int){ var step = 0; parents[key] = key; - // you can assume queues to be a "work queue": - // distributed/unbounded/concurrent-safe enqueue-dequeue - var l0 : Collection(int); - var l1 : Collection(int); - - if isBag { - l0 = new DistributedBag(int); - l1 = new DistributedBag(int); - } else if isDeque { - l0 = new DistributedDeque(int); - l1 = new DistributedDeque(int); - } + // Our data structures... + var l0 = (if isBag then new DistributedBag(int) else new DistributedDeque(int)); + var l1 = (if isBag then new DistributedBag(int) else new DistributedDeque(int)); inline proc produceQueue { if step%2 == 0 then return l0; @@ -139,10 +133,10 @@ proc getBFSParentArray(g, key: int, ref parents: [] int){ //iteration that starts from the root. consumeQueue.add(key); - produceQueue.freeze(); var b = new Barrier(numLocales*here.maxTaskPar); //as long as there vertices to be visited while !consumeQueue.isEmpty() { + consumeQueue.freeze(); forall v in consumeQueue { for n in g.dimIter(2,v) { // assume a serial `neighbors` iterator /*writeln("\t", v, ",", n);*/ @@ -158,6 +152,7 @@ proc getBFSParentArray(g, key: int, ref parents: [] int){ } } } + consumeQueue.unfreeze(); consumeQueue.clear(); step += 1; } diff --git a/bt.svg b/bt.svg new file mode 100644 index 0000000..0bf4777 --- /dev/null +++ b/bt.svg @@ -0,0 +1,689 @@ + + + + + + +G + + +0 + +/ + + +315179265 + +_start@start.S:122 + + +0->315179265 + + +[0-31] + + +-691397763 + +[empty]@0xffffffffffffffff + + +0->-691397763 + + +[0-28,30] + + +-2093759535 + +start_thread@pthread_create.c:309 + + +0->-2093759535 + + +[0-31] + + +-2134501904 + +[Fault Summary] + + +0->-2134501904 + + +[0,5,7,11,13-15,19-28,30] + + +762131901 + +qthread_master$$CFE_id_58403e1c_qlib@qthread.c:536 + + +0->762131901 + + +[1-31] + + +-347316729 + +__libc_start_main@libc-start.c:285 + + +315179265->-347316729 + + +[0-31] + + +-1919329781 + +main@main.c:32 + + +-347316729->-1919329781 + + +[0] + + +-1919329777 + +main@main.c:36 + + +-347316729->-1919329777 + + +[1-31] + + +1032997230 + +chpl_task_callMain@tasks-qthreads.c:857 + + +-1919329781->1032997230 + + +[0] + + +498383936 + +qthread_syncvar_blocker_func$$CFE_id_7902d5df_e51184f1@syncvar.c:333 + + +1032997230->498383936 + + +[0] + + +395734293 + +__lll_lock_elision@elision-lock.c:94 + + +498383936->395734293 + + +[0] + + +-236289326 + +__lll_lock_elision@0x203125c9 + + +395734293->-236289326 + + +[0] + + +314791733 + +__lll_lock_wait@lowlevellock.S:135 + + +-236289326->314791733 + + +[0] + + +1539693960 + +qthread_wrapper$$CFE_id_58403e1c_qlib@qthread.c:2292 + + +-691397763->1539693960 + + +[0] + + +1539693967 + +qthread_wrapper$$CFE_id_58403e1c_qlib@qthread.c:2299 + + +-691397763->1539693967 + + +[0-28,30] + + +909284907 + +main_wrapper$$CFE_id_70d80183_ed8f0e2b@tasks-qthreads.c:791 + + +1539693960->909284907 + + +[0] + + +982260061 + +chpl_executable_init@chpl-init.c:268 + + +909284907->982260061 + + +[0] + + +1830505643 + +chpl_gen_main@graph500.chpl:279 + + +982260061->1830505643 + + +[0] + + +598495046 + +chpl_user_main$$CFE_id_f9ee5c55_9b0c5384@graph500.chpl:287 + + +1830505643->598495046 + + +[0] + + +-297919505 + +createGraph500_chpl$$CFE_id_f9ee5c55_9b0c5384@graph500.chpl:78 + + +598495046->-297919505 + + +[0] + + +825781990 + +chpl___PLUS__ASSIGN_$$CFE_id_f9ee5c55_9b0c5384@ChapelDistribution.chpl:606 + + +-297919505->825781990 + + +[0] + + +1725954841 + +dsiBulkAdd$$CFE_id_f9ee5c55_9b0c5384@ChapelDistribution.chpl:407 + + +825781990->1725954841 + + +[0] + + +1273159494 + +bulkAdd_help_chpl3$$CFE_id_f9ee5c55_9b0c5384@SparseBlockDist.chpl:163 + + +1725954841->1273159494 + + +[0] + + +-2024049826 + +_waitEndCount3$$CFE_id_f9ee5c55_9b0c5384@ChapelBase.chpl:964 + + +1273159494->-2024049826 + + +[0] + + +-1119827012 + +on_fn136$$CFE_id_f9ee5c55_9b0c5384@NetworkAtomics.chpl:170 + + +-2024049826->-1119827012 + + +[0] + + +-1782324773 + +do_remote_get$$CFE_id_f94beb93_9960313b@comm-ugni.c:4059 + + +-1119827012->-1782324773 + + +[0] + + +-1034640353 + +post_fma_and_wait$$CFE_id_f94beb93_9960313b@comm-ugni.c:6171 + + +-1782324773->-1034640353 + + +[0] + + +625971348 + +post_fma$$CFE_id_f94beb93_9960313b@comm-ugni.c:6147 + + +-1034640353->625971348 + + +[0] + + +-907068018 + +acquire_comm_dom$$CFE_id_f94beb93_9960313b@comm-ugni.c:5928 + + +625971348->-907068018 + + +[0] + + +-274508891 + +atomic_load_explicit_bool$$CFE_id_f94beb93_9960313b@chpl-atomics.h:321 + + +-907068018->-274508891 + + +[0] + + +-852437746 + +qthread_master$$CFE_id_58403e1c_qlib@qthread.c:536 + + +-2093759535->-852437746 + + +[0-31] + + +1425139660 + +comm_task_wrapper$$CFE_id_70d80183_ed8f0e2b@tasks-qthreads.c:831 + + +-2093759535->1425139660 + + +[0-31] + + +-1396505020 + +qt_scheduler_get_thread@nemesis_threadqueues.c:397 + + +-852437746->-1396505020 + + +[0-31] + + +859176990 + +__pthread_cond_timedwait@pthread_cond_timedwait.S:238 + + +-1396505020->859176990 + + +[0-31] + + +1615102860 + +chapel_wrapper$$CFE_id_70d80183_ed8f0e2b@tasks-qthreads.c:812 + + +1539693967->1615102860 + + +[0-28,30] + + +-919839913 + +wrapon_fn_chpl64$$CFE_id_f9ee5c55_9b0c5384@SparseBlockDist.chpl:163 + + +1615102860->-919839913 + + +[0] + + +1940492027 + +fork_call_wrapper_large$$CFE_id_f94beb93_9960313b@comm-ugni.c:2936 + + +1615102860->1940492027 + + +[1-28,30] + + +1063170007 + +on_fn_chpl64$$CFE_id_f9ee5c55_9b0c5384@SparseBlockDist.chpl:164 + + +-919839913->1063170007 + + +[0] + + +1890331699 + +bulkAdd$$CFE_id_f9ee5c55_9b0c5384@ChapelArray.chpl:1336 + + +1063170007->1890331699 + + +[0] + + +-1669328512 + +dsiBulkAdd2$$CFE_id_f9ee5c55_9b0c5384@ChapelDistribution.chpl:410 + + +1890331699->-1669328512 + + +[0] + + +40334548 + +bulkAdd_help_chpl$$CFE_id_f9ee5c55_9b0c5384@LayoutCSR.chpl:300 + + +-1669328512->40334548 + + +[0] + + +963249411 + +__cray_EXP2@0x2058d6cd + + +40334548->963249411 + + +[0] + + +-415726767 + +__cray_EXP2_Z_03@0x2058d70b + + +963249411->-415726767 + + +[0] + + +1511576705 + +polling_task$$CFE_id_f94beb93_9960313b@comm-ugni.c:2176 + + +1425139660->1511576705 + + +[0-31] + + +2020477110 + +sched_yield@syscall-template.S:81 + + +1511576705->2020477110 + + +[0-31] + + +549102563 + +[SIGILL(4)] + + +-2134501904->549102563 + + +[0,5,7,11,13-15,19-28,30] + + +741169504 + +chpl_exit_common$$CFE_id_85563794_39134d7b_clone_6119_1@chplexit.c:54 + + +-1919329777->741169504 + + +[1-31] + + +988388986 + +chpl_comm_pre_task_exit@comm-ugni.c:2617 + + +741169504->988388986 + + +[1-31] + + +-1335818896 + +chpl_comm_barrier@comm-ugni.c:2586 + + +988388986->-1335818896 + + +[1-31] + + +1819904903 + +sched_yield@syscall-template.S:81 + + +-1335818896->1819904903 + + +[1-31] + + +-533771405 + +qt_scheduler_get_thread@nemesis_threadqueues.c:397 + + +762131901->-533771405 + + +[1-31] + + +1277266351 + +__pthread_cond_timedwait@pthread_cond_timedwait.S:238 + + +-533771405->1277266351 + + +[1-31] + + +-1223359704 + +wrapon_fn_chpl64$$CFE_id_f9ee5c55_9b0c5384@SparseBlockDist.chpl:163 + + +1940492027->-1223359704 + + +[1-28,30] + + +-706931770 + +on_fn_chpl64$$CFE_id_f9ee5c55_9b0c5384@SparseBlockDist.chpl:164 + + +-1223359704->-706931770 + + +[1-28,30] + + +639807108 + +bulkAdd$$CFE_id_f9ee5c55_9b0c5384@ChapelArray.chpl:1336 + + +-706931770->639807108 + + +[1-28,30] + + +-927059089 + +dsiBulkAdd2$$CFE_id_f9ee5c55_9b0c5384@ChapelDistribution.chpl:410 + + +639807108->-927059089 + + +[1-28,30] + + +1840016963 + +bulkAdd_help_chpl$$CFE_id_f9ee5c55_9b0c5384@LayoutCSR.chpl:300 + + +-927059089->1840016963 + + +[1-28,30] + + +-1651790862 + +__cray_EXP2@0x2058d6cd + + +1840016963->-1651790862 + + +[1-10,12-28,30] + + +-38075676 + +__cray_EXP2_03@0x2058d6cd + + +1840016963->-38075676 + + +[11] + + +-874276446 + +__cray_EXP2_Z_03@0x2058d70b + + +-1651790862->-874276446 + + +[1-10,12-28,30] + + +-1131396048 + +__cray_EXP2_Z_03@0x2058d70b + + +-38075676->-1131396048 + + +[11] + + +