Skip to content

Commit

Permalink
Fixed setup on EC2 nodes, added nemeses for pausing, killing, and ske…
Browse files Browse the repository at this point in the history
…wing
  • Loading branch information
aphyr committed Nov 11, 2016
1 parent 9cb698d commit ecf606e
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 44 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ pom.xml.asc
/.nrepl-port
.hgignore
.hg/
.lein-repl-history
.lein-deps-sum
.lein-failures
*~
.*.swp
/store
2 changes: 1 addition & 1 deletion project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[org.clojure/clojure "1.8.0"]
[org.clojure/tools.cli "0.3.3"]
[jepsen "0.1.3"]
[jepsen "0.1.4-SNAPSHOT"]
[org.mongodb/mongodb-driver "3.4.0-rc1"]]
:jvm-opts ["-Xmx16g"
"-Xms16g"
Expand Down
Binary file removed src/jepsen/mongodb/.core.clj.swp
Binary file not shown.
Binary file removed src/jepsen/mongodb/.runner.clj.swp
Binary file not shown.
115 changes: 74 additions & 41 deletions src/jepsen/mongodb/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
[clojure.walk :as walk]
[jepsen [core :as jepsen]
[db :as db]
[util :as util :refer [meh timeout]]
[util :as util :refer [meh
random-nonempty-subset
timeout]]
[control :as c :refer [|]]
[client :as client]
[checker :as checker]
Expand All @@ -16,6 +18,7 @@
[store :as store]
[report :as report]
[tests :as tests]]
[jepsen.nemesis.time :as nt]
[jepsen.control [net :as net]
[util :as cu]]
[jepsen.os.debian :as debian]
Expand All @@ -33,40 +36,44 @@
(cu/ensure-user! username)

; Download tarball
(let [local-file (nth (re-find #"file://(.+)" url) 1)
file (or local-file (c/cd "/tmp" (str "/tmp/" (cu/wget! url))))]
(try
(c/cd "/opt"
; Clean up old dir
(c/exec :rm :-rf "mongodb")
; Create mongodb & data dir
(c/exec :mkdir :-p "mongodb/data")
; Extract to mongodb
(c/exec :tar :xvf file :-C "mongodb" :--strip-components=1)
; Permissions
(c/exec :chown :-R (str username ":" username) "mongodb"))
(catch RuntimeException e
(condp re-find (.getMessage e)
#"tar: Unexpected EOF"
(if local-file
; Nothing we can do to recover here
(throw (RuntimeException.
(str "Local tarball " local-file " on node " (name node)
" is corrupt: unexpected EOF.")))
(do (info "Retrying corrupt tarball download")
(c/exec :rm :-rf file)
(install! node url)))

; Throw by default
(throw e))))))
(c/su
(let [local-file (nth (re-find #"file://(.+)" url) 1)
file (or local-file (c/cd "/tmp" (str "/tmp/" (cu/wget! url))))]
(try
(c/cd "/opt"
; Clean up old dir
(c/exec :rm :-rf "mongodb")
; Create mongodb & data dir
(c/exec :mkdir :-p "mongodb/data")
; Extract to mongodb
(c/exec :tar :xvf file :-C "mongodb" :--strip-components=1)
; Permissions
(c/exec :chown :-R (str username ":" username) "mongodb"))
(catch RuntimeException e
(condp re-find (.getMessage e)
#"tar: Unexpected EOF"
(if local-file
; Nothing we can do to recover here
(throw (RuntimeException.
(str "Local tarball " local-file " on node " (name node)
" is corrupt: unexpected EOF.")))
(do (info "Retrying corrupt tarball download")
(c/exec :rm :-rf file)
(install! node url)))

; Throw by default
(throw e)))))))

(defn configure!
"Deploy configuration files to the node."
[test node]
(c/exec :echo (-> "mongod.conf" io/resource slurp
(str/replace #"%STORAGE_ENGINE%" (:storage-engine test))
(str/replace #"%PROTOCOL_VERSION%" (:protocol-version test)))
:> "/opt/mongodb/mongod.conf"))
(c/sudo username
(c/exec :echo (-> "mongod.conf" io/resource slurp
(str/replace #"%STORAGE_ENGINE%"
(:storage-engine test))
(str/replace #"%PROTOCOL_VERSION%"
(:protocol-version test)))
:> "/opt/mongodb/mongod.conf")))

(defn start!
"Starts Mongod"
Expand All @@ -84,13 +91,16 @@
"Stops Mongod"
[test node]
(cu/stop-daemon! "mongod" "/opt/mongodb/pidfile")
(c/su (c/exec :killall :-9 "mongod"))
:stopped)

(defn savelog!
"Saves Mongod log"
[node]
(info node "copying mongod.log & stdout.log file to /root/")
(c/su
(c/exec :mkdir :-p "/opt/mongodb")
(c/exec :chown (str username ":" username) "/opt/mongodb")
(c/exec :touch "/opt/mongodb/mongod.log" "/opt/mongodb/stdout.log")
(c/exec :cp :-f "/opt/mongodb/mongod.log" "/opt/mongodb/stdout.log" "/root/")))

Expand Down Expand Up @@ -155,10 +165,10 @@
(m/admin-command! conn :replSetReconfig conf))

(defn node+port->node
"Take a mongo \"n1:27107\" string and return just the node as a keyword:
"Take a mongo \"n1:27107\" string and return just the node as a string:
:n1."
[s]
(keyword ((re-find #"(\w+?):" s) 1)))
((re-find #"(.+):\d+" s) 1))

(defn primaries
"What nodes does this conn think are primaries?"
Expand Down Expand Up @@ -223,7 +233,7 @@
"Block until all nodes in the test are known to this connection's replset
status"
[test conn]
(while (try (not= (set (:nodes test))
(while (try (not= (set (map name (:nodes test)))
(->> (replica-set-status conn)
:members
(map :name)
Expand All @@ -234,6 +244,11 @@
(get-in (ex-data e) [:result "errmsg"]))
true
(throw e))))
(info :replica-set-status (with-out-str (->> (replica-set-status conn)
:members
(map :name)
(map node+port->node)
pprint)))
(Thread/sleep 1000)))

(defn target-replica-set-config
Expand Down Expand Up @@ -302,8 +317,9 @@
(setup! [_ test node]
(util/timeout 300000
(throw (RuntimeException.
(str "Mongo setup on " node "timed out!")))
(str "Mongo setup on " node " timed out!")))
(debian/install [:libc++1 :libsnmp30])
(nt/install!)
(install! node url)
(configure! test node)
(start! test node)
Expand Down Expand Up @@ -349,20 +365,37 @@
{:type :info :f :stop}
{:type :info :f :start}])))))

(defn random-nonempty-subset
[nodes]
(take (inc (rand-int (count nodes))) (shuffle nodes)))

(defn kill-nem
"A nemesis that kills/restarts Mongo on randomly selected nodes."
[]
(nemesis/node-start-stopper random-nonempty-subset start! stop!))
(nemesis/node-start-stopper random-nonempty-subset start! stop!))

(defn pause-nem
"A nemesis that pauses Mongo on randomly selected nodes."
[]
(nemesis/hammer-time random-nonempty-subset "mongod"))

(defn clock-skew-nem
"Skews clocks on a random subset of nodes by dt seconds."
[dt]
(reify client/Client
(setup! [this test _]
(nt/reset-time! test)
this)

(invoke! [this test op]
(assoc op :value
(case (:f op)
:start (c/with-test-nodes test
(if (< (rand) 0.5)
(do (nt/bump-time! (* 1000 dt))
dt)
0))
:stop (info c/*host* "clock reset:" (nt/reset-time! test)))))

(teardown! [this test]
(nt/reset-time! test))))

(defn test-
"Constructs a test with the given name prefixed by 'mongodb ', merging any
given options. Special options for Mongo:
Expand All @@ -379,5 +412,5 @@
:os debian/os
:db (db (:tarball opts))
:checker (checker/perf)
:nemesis (pause-nem))
:nemesis (clock-skew-nem 60000))
opts))
7 changes: 5 additions & 2 deletions src/jepsen/mongodb/runner.clj
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,18 @@
:validate [(complement neg?) "Must be non-negative"]]

[nil "--tarball URL" "URL for the Mongo tarball to install. May be either HTTP, HTTPS, or a local file. For instance, --tarball https://foo.com/mongo.tgz, or file:///tmp/mongo.tgz"
:default "https://fastdl.mongodb.org/linux/mongodb-linux-x86_64-debian81-3.4.0-rc2.tgz"
:default "https://fastdl.mongodb.org/linux/mongodb-linux-x86_64-debian81-3.4.0-rc3.tgz"
:validate [(partial re-find #"^(file|https?)://.*\.(tar\.gz|tgz)")
"Must be a file://, http://, or https:// URL ending in .tar.gz or .tgz"]]
])

(defn test-cmd
[]
{"test" {:opt-spec (into jc/test-opt-spec opt-spec)
:opt-fn #(-> % (jc/rename-options {:node :nodes}))
:opt-fn #(-> %
(jc/rename-options {:node :nodes})
jc/validate-tarball
jc/read-nodes-file)
:usage jc/test-usage
:run (fn [{:keys [options]}]
(info "Test options:\n" (with-out-str (pprint options)))
Expand Down

0 comments on commit ecf606e

Please sign in to comment.