226
226
TDEXLogSmgrInitWrite (bool encrypt_xlog )
227
227
{
228
228
WalEncryptionKey * key = pg_tde_read_last_wal_key ();
229
+ WalLocation start = {.tli = 1 ,.lsn = 0 };
230
+ WALKeyCacheRec * keys ;
229
231
230
232
/*
231
233
* Always generate a new key on starting PostgreSQL to protect against
@@ -246,6 +248,14 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
246
248
TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
247
249
}
248
250
251
+ keys = pg_tde_get_wal_cache_keys ();
252
+
253
+ if (keys == NULL )
254
+ {
255
+ /* TODO cache is empty, try to preread keys from disk */
256
+ keys = pg_tde_fetch_wal_keys (start );
257
+ }
258
+
249
259
if (key )
250
260
pfree (key );
251
261
}
@@ -263,6 +273,32 @@ TDEXLogSmgrInitWriteReuseKey()
263
273
}
264
274
}
265
275
276
+ /*
277
+ * Encrypt XLog page(s) from the buf and write to the segment file.
278
+ */
279
+ static ssize_t
280
+ TDEXLogWriteEncryptedPagesOldKeys (int fd , const void * buf , size_t count , off_t offset ,
281
+ TimeLineID tli , XLogSegNo segno , int segSize )
282
+ {
283
+ char * enc_buff = EncryptionBuf ;
284
+
285
+ #ifndef FRONTEND
286
+ Assert (count <= TDEXLogEncryptBuffSize ());
287
+ #endif
288
+
289
+ /* Copy the data as-is, as we might have unencrypted parts */
290
+ memcpy (enc_buff , buf , count );
291
+
292
+ /*
293
+ * This method potentially allocates, but only in very early execution
294
+ * Shouldn't happen in a write, where we are in a critical section
295
+ */
296
+ TDEXLogCryptBuffer (buf , enc_buff , count , offset , tli , segno , segSize );
297
+
298
+ return pg_pwrite (fd , enc_buff , count , offset );
299
+ }
300
+
301
+
266
302
/*
267
303
* Encrypt XLog page(s) from the buf and write to the segment file.
268
304
*/
@@ -284,6 +320,7 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
284
320
#endif
285
321
286
322
CalcXLogPageIVPrefix (tli , segno , key -> base_iv , iv_prefix );
323
+
287
324
pg_tde_stream_crypt (iv_prefix ,
288
325
offset ,
289
326
(char * ) buf ,
@@ -299,26 +336,59 @@ static ssize_t
299
336
tdeheap_xlog_seg_write (int fd , const void * buf , size_t count , off_t offset ,
300
337
TimeLineID tli , XLogSegNo segno , int segSize )
301
338
{
339
+ bool lastKeyUsable ;
340
+ bool afterLastKey ;
341
+ #ifdef FRONTEND
342
+ bool crashRecovery = false;
343
+ #else
344
+ bool crashRecovery = GetRecoveryState () == RECOVERY_STATE_CRASH ;
345
+ #endif
346
+
347
+ WalLocation loc = {.tli = tli };
348
+
349
+ XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
350
+
302
351
/*
303
352
* Set the last (most recent) key's start LSN if not set.
304
353
*
305
354
* This func called with WALWriteLock held, so no need in any extra sync.
306
355
*/
307
- if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && TDEXLogGetEncKeyLsn () == 0 )
308
- {
309
- WalLocation loc = {.tli = tli };
310
356
311
- XLogSegNoOffsetToRecPtr (segno , offset , segSize , loc .lsn );
357
+ lastKeyUsable = (TDEXLogGetEncKeyLsn () != 0 );
358
+ afterLastKey = (TDEXLogGetEncKeyLsn () <= loc .lsn );
312
359
313
- pg_tde_wal_last_key_set_location (loc );
314
- EncryptionKey .wal_start = loc ;
315
- TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
360
+ if (EncryptionKey .type != WAL_KEY_TYPE_INVALID && !lastKeyUsable )
361
+ {
362
+ WALKeyCacheRec * last_key = pg_tde_get_last_wal_key ();
363
+
364
+ if (!crashRecovery || EncryptionKey .type == WAL_KEY_TYPE_UNENCRYPTED )
365
+ {
366
+ /*
367
+ * TODO: the unencrypted case is still not perfect, we need to
368
+ * report an error in some cornercases
369
+ */
370
+ if (last_key == NULL || last_key -> start .lsn < loc .lsn )
371
+ {
372
+ pg_tde_wal_last_key_set_location (loc );
373
+ EncryptionKey .wal_start = loc ;
374
+ TDEXLogSetEncKeyLocation (EncryptionKey .wal_start );
375
+ lastKeyUsable = true;
376
+ }
377
+ }
316
378
}
317
379
318
- if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
380
+ if ((!afterLastKey || !lastKeyUsable ) && EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
381
+ {
382
+ return TDEXLogWriteEncryptedPagesOldKeys (fd , buf , count , offset , tli , segno , segSize );
383
+ }
384
+ else if (EncryptionKey .type == WAL_KEY_TYPE_ENCRYPTED )
385
+ {
319
386
return TDEXLogWriteEncryptedPages (fd , buf , count , offset , tli , segno );
387
+ }
320
388
else
389
+ {
321
390
return pg_pwrite (fd , buf , count , offset );
391
+ }
322
392
}
323
393
324
394
/*
@@ -340,7 +410,7 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
340
410
if (readsz <= 0 )
341
411
return readsz ;
342
412
343
- TDEXLogCryptBuffer (buf , count , offset , tli , segno , segSize );
413
+ TDEXLogCryptBuffer (buf , buf , count , offset , tli , segno , segSize );
344
414
345
415
return readsz ;
346
416
}
@@ -349,20 +419,22 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
349
419
* [De]Crypt buffer if needed based on provided segment offset, number and TLI
350
420
*/
351
421
void
352
- TDEXLogCryptBuffer (void * buf , size_t count , off_t offset ,
422
+ TDEXLogCryptBuffer (const void * buf , void * out_buf , size_t count , off_t offset ,
353
423
TimeLineID tli , XLogSegNo segno , int segSize )
354
424
{
355
425
WALKeyCacheRec * keys = pg_tde_get_wal_cache_keys ();
356
426
XLogRecPtr write_key_lsn ;
357
427
WalLocation data_end = {.tli = tli };
358
428
WalLocation data_start = {.tli = tli };
359
429
360
- if (! keys )
430
+ if (keys == NULL )
361
431
{
362
432
WalLocation start = {.tli = 1 ,.lsn = 0 };
363
433
364
434
/* cache is empty, try to read keys from disk */
365
- keys = pg_tde_fetch_wal_keys (start );
435
+ pg_tde_fetch_wal_keys (start );
436
+
437
+ keys = pg_tde_get_wal_cache_keys ();
366
438
}
367
439
368
440
/*
@@ -421,6 +493,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
421
493
off_t dec_end = XLogSegmentOffset (minlsn , segSize );
422
494
size_t dec_sz ;
423
495
char * dec_buf = (char * ) buf + (dec_off - offset );
496
+ char * o_buf = (char * ) out_buf + (dec_off - offset );
424
497
425
498
Assert (dec_off >= offset );
426
499
@@ -432,17 +505,26 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
432
505
dec_end = offset + count ;
433
506
}
434
507
435
- dec_sz = dec_end - dec_off ;
508
+ if (dec_end > dec_off )
509
+ {
510
+ dec_sz = dec_end - dec_off ;
511
+ }
512
+ else
513
+ {
514
+ /* assert? */
515
+ dec_sz = 0 ;
516
+ }
436
517
437
518
#ifdef TDE_XLOG_DEBUG
438
519
elog (DEBUG1 , "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %u_%X/%X" ,
439
520
dec_off , dec_off - offset , dec_sz , curr_key -> key .wal_start .tli , LSN_FORMAT_ARGS (curr_key -> key .wal_start .lsn ));
440
521
#endif
522
+
441
523
pg_tde_stream_crypt (iv_prefix ,
442
524
dec_off ,
443
525
dec_buf ,
444
526
dec_sz ,
445
- dec_buf ,
527
+ o_buf ,
446
528
curr_key -> key .key ,
447
529
& curr_key -> crypt_ctx );
448
530
}
0 commit comments