forked from Percona-QA/percona-qa
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pxc-replication-check.pl
executable file
·429 lines (357 loc) · 10.2 KB
/
pxc-replication-check.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Std;
use DBI;
# PXC GTID replication consistency checker
# [email protected] - 2015-03-25
# This script is designed to monitor IST replication state between
# PXC wsrep nodes for various SQL instructions. An example of a
# bug this will test is for is lp1421360. For reliable results,
# make sure there is no other traffic on the test cluster
my $VERSION='1.1';
### default config section, this can all be specified on command line ###
# first node in array is donor
my @nodes=('localhost');
my $user='root';
my $password='password';
my $database='test';
my $waitforIST=1; # seconds
my $input='';
my $dsn;
# this is the default SQL instructions that will be read
my $data=<<'_EOD_';
CREATE TABLE IF NOT EXISTS `my_test_table` (`id` INT PRIMARY KEY AUTO_INCREMENT, `value` CHAR(32)) ENGINE=InnoDB;
INSERT INTO `my_test_table` (`value`) VALUES (md5(rand()));
ANALYZE TABLE `my_test_table`;
OPTIMIZE TABLE `my_test_table`;
REPAIR TABLE `my_test_table`;
DROP TABLE `my_test_table`;
FLUSH DES_KEY_FILE;
FLUSH HOSTS;
FLUSH LOGS;
FLUSH BINARY LOGS;
FLUSH ENGINE LOGS;
FLUSH ERROR LOGS;
FLUSH GENERAL LOGS;
FLUSH RELAY LOGS;
FLUSH SLOW LOGS;
FLUSH PRIVILEGES;
FLUSH QUERY CACHE;
FLUSH STATUS;
FLUSH TABLES;
FLUSH USER_RESOURCES;
FLUSH TABLES WITH READ LOCK;
UNLOCK TABLES;
CREATE TABLE IF NOT EXISTS `my_test_table` (`id` INT PRIMARY KEY AUTO_INCREMENT, `value` CHAR(32)) ENGINE=InnoDB;
INSERT INTO `my_test_table` (`value`) VALUES (md5(rand()));
FLUSH TABLE `my_test_table`;
FLUSH TABLE `my_test_table` WITH READ LOCK;
UNLOCK TABLES;
FLUSH TABLES `my_test_table` FOR EXPORT;
UNLOCK TABLES;
DROP TABLE `my_test_table`;
_EOD_
### subroutines ###
# connect to a host
#
# parameter: {host address}
#
# returns: {db handle}
#
sub dbconnect {
my $host=shift(@_);
return(DBI->connect($dsn.$host, $user, $password,{ 'RaiseError' => 1, 'AutoCommit' => 1 }));
}
# check to make sure node is in GTID mode and synced
#
# parameters: {host address}
#
# returns: 2 on success, !2 on failure
#
sub check_node_status {
my $ret=0;
my $dbh=dbconnect(shift(@_));
my $sth=$dbh->prepare('SELECT @@global.gtid_mode');
$sth->execute();
if (my $row=$sth->fetchrow_hashref()) {
$ret=$row->{'@@global.gtid_mode'} eq 'ON';
}
$sth->finish();
if ($ret) {
$sth=$dbh->prepare("SHOW GLOBAL STATUS LIKE 'wsrep_local_state'");
$sth->execute();
if (my $row=$sth->fetchrow_hashref()) {
$ret += ($row->{'Value'} == 4);
}
$sth->finish();
}
$dbh->disconnect();
return($ret);
}
# get the server uuid
#
# parameters: {db handle}
#
# returns server uuid or 0 on failure
#
sub get_server_uuid {
my $ret=0;
my $dbh=shift(@_);
my $sth=$dbh->prepare('SELECT @@global.server_uuid');
$sth->execute();
if (my $row=$sth->fetchrow_hashref()) {
$ret=$row->{'@@global.server_uuid'};
}
$sth->finish();
return($ret);
}
# get last transaction from a gtid component
#
# parameter: {gtid commponnent either uuid:# or uuid:#-#}
#
# returns: the latest transaction number (last number) or 0 on error
#
sub get_last_transaction {
my $gtid=shift(@_);
my $ret=0;
if ($gtid =~ m/^([^:]+):([0-9]+)-([0-9]+)/) {
$ret=$3;
} elsif ($gtid =~ m/^([^:]+):([0-9]+)/) {
$ret=$2;
}
return($ret);
}
# get latest gtid transaction numbers
#
# parameters: {db handle}
#
# returns (glgtn ref): { 'local' => {#}, 'cluster' => {#}, 'wsrep_last_committed' => {#} }
#
sub get_latest_gtid_transaction_numbers {
my $ret={'local'=>0,'cluster'=>0,'wsrep_last_committed'=>0};
my $dbh=shift(@_);
# get @@global.gtid_executed
my $sth=$dbh->prepare('SELECT @@global.gtid_executed');
$sth->execute();
my $gtid_executed;
if (my $row=$sth->fetchrow_hashref()) {
$gtid_executed=$row->{'@@global.gtid_executed'};
}
$sth->finish();
# parse gtid_executed to get latest local and cluster transaction
my @gtids=split(/,[\s\r\n]+/m,$gtid_executed);
if ($#gtids > -1) {
for my $gtid (@gtids) {
if (index($gtid,get_server_uuid($dbh)) > -1) {
$ret->{'local'}=get_last_transaction($gtid);
} else {
$ret->{'cluster'}=get_last_transaction($gtid);
}
}
}
# get wsrep_last_committed
$sth=$dbh->prepare("SELECT VARIABLE_VALUE FROM ".
"INFORMATION_SCHEMA.SESSION_STATUS WHERE " .
"VARIABLE_NAME = 'wsrep_last_committed'");
$sth->execute();
if (my $row=$sth->fetchrow_hashref()) {
$ret->{'wsrep_last_committed'}=$row->{'VARIABLE_VALUE'};
}
$sth->finish();
return($ret);
}
# report replication consistency to STDOUT
#
# parameters: {host addr}, {glgtn ref before}, {glgtn ref after},
# {'joinerMode'=>0|1}}
#
# returns: number of errors reported
#
sub report_replication_consistency {
my $errs=0;
my($host,$before,$after,$opts)=@_;
my $joinerMode=0;
if (defined $opts) {
if (defined $opts->{'joinerMode'}) {
$joinerMode=$opts->{'joinerMode'};
}
}
# check cluster trans
if ($before->{'cluster'} >= $after->{'cluster'}) {
print "\tERROR ($host): GTID cluster trans " .
"before:$before->{'cluster'} " .
"after:$after->{'cluster'}\n";
$errs++;
}
# check local trans (if not in joiner mode)
if (!$joinerMode && $before->{'local'} != $after->{'local'}) {
print "\tERROR ($host): GTID local trans " .
"before:$before->{'local'} " .
"after:$after->{'local'}\n";
$errs++;
}
# check wsrep_last_committed
if ($before->{'wsrep_last_committed'} == $after->{'wsrep_last_committed'}) {
print "\tERROR ($host): wsrep_committed did not advance: $after->{'wsrep_last_committed'}\n";
$errs++;
}
# report if ok
if (!$errs) {
print "\tOK ($host): GTID trans#: $after->{'cluster'} seq: $after->{'wsrep_last_committed'}\n";
}
return($errs);
}
# get master status postion
#
# parameters: {db handle}
#
sub get_master_status_position {
my $errs=0;
my($dbh)=@_;
# get master position
my $sth=$dbh->prepare('SHOW MASTER STATUS');
$sth->execute();
my $position;
if (my $row=$sth->fetchrow_hashref()) {
$position=$row->{'Position'};
}
$sth->finish();
# return the position
return $position;
}
# usage
#
#
sub HELP_MESSAGE {
print <<"_EOH_";
Usage: [perl] ./pxc-replication-check.pl {options}
This script will check for consistency between the binary log used for async
replication and the WSREP syncronization status to insure that everything
that is written to the binary log is replicated to the WSREP cluster.
Note: In order for this script to run properly, you must have a cluster
setup and fully Synced with GTID mode on.
Options:
-h,--help\tPrints this help message and exits.
-i\t\tThe input file to read SQL instructions from.
\t\tBy default, the script reads from embedded \$data
\t\t(value - can be used for standard input)
-n\t\tThe node IP address to communicate with. If multiple
\t\tnodes are specified separated by commas, the secondary
\t\tnodes will be checked as IST joiners. (default: localhost)
-u\t\tMySQL user account to use (default: root)
-p\t\tMySQL password to use (default: password)
-d\t\tMySQL database to use (default: test)
-w\t\tNumber of seconds to wait for IST (default: 1)
-v,--version\tPrints version and exits.
_EOH_
exit 1;
}
# version
#
sub VERSION_MESSAGE {
print "pxc-replication-check.pl Version: $VERSION\n\n";
}
### main program ###
$Getopt::Std::STANDARD_HELP_VERSION=1;
my %options=();
getopts('hvu:p:d:w:n:i:',\%options);
# usage and version
if (defined $options{h}) {
VERSION_MESSAGE();
HELP_MESSAGE();
}
if (defined $options{v}) {
VERSION_MESSAGE();
exit;
}
# parse arguments
if (defined $options{n}) {
@nodes=split(',',$options{n});
}
if (defined $options{u}) {
$user=$options{u};
}
if (defined $options{p}) {
$password=$options{p};
}
if (defined $options{d}) {
$database=$options{d};
}
if (defined $options{w}) {
$waitforIST=$options{w};
}
if (defined $options{i}) {
$input=$options{i};
# load input from file into $data
local $/;
open SQL_INPUT,"<$input";
$data = <SQL_INPUT>;
close SQL_INPUT;
}
# construct DSN
$dsn="DBI:mysql:database=$database;host="; # host appended
# check all nodes are in GTID mode and Synced
for my $host (@nodes) {
if (check_node_status($host) != 2) {
die "ERROR ($host): not in GTID mode or not synced\n";
}
}
print "OK: All nodes are in GTID mode and Synced\n";
# Send SQL data to first host and check replication consistency in between
# nodes
#
my $hostDonor=shift(@nodes);
my $dbhDonor=dbconnect($hostDonor);
my $totalErrors=0;
my $lastMasterPosition=-1;
my $masterPosition=-1;
my $line=0;
for (split /^/, $data) {
chomp;
s/^\s+//g;
s/\s+$//g;
s/;$//g;
if ($_ gt '') {
print ++$line.":".substr($_,0,70).(length($_)>70?'...':'')."\n";
# record replication position before
$lastMasterPosition=$masterPosition;
if ($lastMasterPosition == -1) {
$lastMasterPosition=get_master_status_position($dbhDonor);
}
my $refDonorBefore=get_latest_gtid_transaction_numbers($dbhDonor);
$dbhDonor->do($_);
# record replication position after
$masterPosition=get_master_status_position($dbhDonor);
my $refDonorAfter=get_latest_gtid_transaction_numbers($dbhDonor);
if ($masterPosition > $lastMasterPosition) {
# report findings
$totalErrors += report_replication_consistency($hostDonor, $refDonorBefore, $refDonorAfter);
# check joiners
sleep($waitforIST);
for my $hostJoiner (@nodes) {
my $dbhJoiner=dbconnect($hostJoiner);
my $refJoinerAfter=get_latest_gtid_transaction_numbers($dbhJoiner);
$totalErrors += report_replication_consistency($hostJoiner,$refDonorBefore,$refJoinerAfter,
{'joinerMode'=>1});
$dbhJoiner->disconnect();
}
} else {
# Make sure cluster GTID didn't change
if ($refDonorBefore->{'cluster'} != $refDonorAfter->{'cluster'}) {
print "\tERROR Master position did not increment but cluster sequence did.\n";
$totalErrors++;
}
elsif ($masterPosition == $lastMasterPosition) {
print "\tOK Master Position unchanged, cluster sequence unchanged.\n";
} else {
print "\tOK Master Position reset, cluster sequence unchanged.\n"; # FLUSH LOGS
}
}
}
}
$dbhDonor->disconnect();
if ($totalErrors) {
print "ERRORS REPORTED: $totalErrors\n";
}
1;