2
2
"""
3
3
PostgreSQL Reports Generator using PromQL
4
4
5
- This script generates reports for specific PostgreSQL check types (A002, A003, A004, A007, H001, F005, F004)
5
+ This script generates reports for specific PostgreSQL check types (A002, A003, A004, A007, H001, F005, F004, K001, K003 )
6
6
by querying Prometheus metrics using PromQL queries.
7
7
"""
8
8
@@ -428,6 +428,325 @@ def generate_f004_heap_bloat_report(self, cluster: str = "local", node_name: str
428
428
"total_bloat_size_pretty" : self .format_bytes (total_bloat_size )
429
429
}, node_name )
430
430
431
+ def generate_k001_query_calls_report (self , cluster : str = "local" , node_name : str = "node-01" ,
432
+ time_range_minutes : int = 60 ) -> Dict [str , Any ]:
433
+ """
434
+ Generate K001 Globally Aggregated Query Metrics report (sorted by calls).
435
+
436
+ Args:
437
+ cluster: Cluster name
438
+ node_name: Node name
439
+ time_range_minutes: Time range in minutes for metrics collection
440
+
441
+ Returns:
442
+ Dictionary containing query metrics sorted by calls
443
+ """
444
+ print ("Generating K001 Globally Aggregated Query Metrics report..." )
445
+
446
+ # Calculate time range
447
+ end_time = datetime .now ()
448
+ start_time = end_time - timedelta (minutes = time_range_minutes )
449
+
450
+ # Get pg_stat_statements metrics using the same logic as CSV endpoint
451
+ query_metrics = self ._get_pgss_metrics_data (cluster , node_name , start_time , end_time )
452
+
453
+ # Sort by calls (descending)
454
+ sorted_metrics = sorted (query_metrics , key = lambda x : x .get ('calls' , 0 ), reverse = True )
455
+
456
+ # Calculate totals
457
+ total_calls = sum (q .get ('calls' , 0 ) for q in sorted_metrics )
458
+ total_time = sum (q .get ('total_time' , 0 ) for q in sorted_metrics )
459
+ total_rows = sum (q .get ('rows' , 0 ) for q in sorted_metrics )
460
+
461
+ return self .format_report_data ("K001" , {
462
+ "query_metrics" : sorted_metrics ,
463
+ "summary" : {
464
+ "total_queries" : len (sorted_metrics ),
465
+ "total_calls" : total_calls ,
466
+ "total_time_ms" : total_time ,
467
+ "total_rows" : total_rows ,
468
+ "time_range_minutes" : time_range_minutes ,
469
+ "start_time" : start_time .isoformat (),
470
+ "end_time" : end_time .isoformat ()
471
+ }
472
+ }, node_name )
473
+
474
+ def generate_k003_top_queries_report (self , cluster : str = "local" , node_name : str = "node-01" ,
475
+ time_range_minutes : int = 60 , limit : int = 50 ) -> Dict [str , Any ]:
476
+ """
477
+ Generate K003 Top-50 Queries by total_time report.
478
+
479
+ Args:
480
+ cluster: Cluster name
481
+ node_name: Node name
482
+ time_range_minutes: Time range in minutes for metrics collection
483
+ limit: Number of top queries to return (default: 50)
484
+
485
+ Returns:
486
+ Dictionary containing top queries sorted by total execution time
487
+ """
488
+ print ("Generating K003 Top-50 Queries by total_time report..." )
489
+
490
+ # Calculate time range
491
+ end_time = datetime .now ()
492
+ start_time = end_time - timedelta (minutes = time_range_minutes )
493
+
494
+ # Get pg_stat_statements metrics using the same logic as CSV endpoint
495
+ query_metrics = self ._get_pgss_metrics_data (cluster , node_name , start_time , end_time )
496
+
497
+ # Sort by total_time (descending) and limit to top N
498
+ sorted_metrics = sorted (query_metrics , key = lambda x : x .get ('total_time' , 0 ), reverse = True )[:limit ]
499
+
500
+ # Calculate totals for the top queries
501
+ total_calls = sum (q .get ('calls' , 0 ) for q in sorted_metrics )
502
+ total_time = sum (q .get ('total_time' , 0 ) for q in sorted_metrics )
503
+ total_rows = sum (q .get ('rows' , 0 ) for q in sorted_metrics )
504
+
505
+ return self .format_report_data ("K003" , {
506
+ "top_queries" : sorted_metrics ,
507
+ "summary" : {
508
+ "queries_returned" : len (sorted_metrics ),
509
+ "total_calls" : total_calls ,
510
+ "total_time_ms" : total_time ,
511
+ "total_rows" : total_rows ,
512
+ "time_range_minutes" : time_range_minutes ,
513
+ "start_time" : start_time .isoformat (),
514
+ "end_time" : end_time .isoformat (),
515
+ "limit" : limit
516
+ }
517
+ }, node_name )
518
+
519
+ def _get_pgss_metrics_data (self , cluster : str , node_name : str , start_time : datetime , end_time : datetime ) -> List [Dict [str , Any ]]:
520
+ """
521
+ Get pg_stat_statements metrics data between two time points.
522
+ Adapted from the logic in flask-backend/app.py get_pgss_metrics_csv().
523
+
524
+ Args:
525
+ cluster: Cluster name
526
+ node_name: Node name
527
+ start_time: Start datetime
528
+ end_time: End datetime
529
+
530
+ Returns:
531
+ List of query metrics with calculated differences
532
+ """
533
+ # Metric name mapping for cleaner output
534
+ METRIC_NAME_MAPPING = {
535
+ 'calls' : 'calls' ,
536
+ 'exec_time_total' : 'total_time' ,
537
+ 'rows' : 'rows' ,
538
+ 'shared_bytes_hit_total' : 'shared_blks_hit' ,
539
+ 'shared_bytes_read_total' : 'shared_blks_read' ,
540
+ 'shared_bytes_dirtied_total' : 'shared_blks_dirtied' ,
541
+ 'shared_bytes_written_total' : 'shared_blks_written' ,
542
+ 'block_read_total' : 'blk_read_time' ,
543
+ 'block_write_total' : 'blk_write_time'
544
+ }
545
+
546
+ # Build filters
547
+ filters = [f'cluster="{ cluster } "' , f'node_name="{ node_name } "' ]
548
+ filter_str = '{' + ',' .join (filters ) + '}'
549
+
550
+ # Get all pg_stat_statements metrics
551
+ all_metrics = [
552
+ 'pgwatch_pg_stat_statements_calls' ,
553
+ 'pgwatch_pg_stat_statements_exec_time_total' ,
554
+ 'pgwatch_pg_stat_statements_rows' ,
555
+ 'pgwatch_pg_stat_statements_shared_bytes_hit_total' ,
556
+ 'pgwatch_pg_stat_statements_shared_bytes_read_total' ,
557
+ 'pgwatch_pg_stat_statements_shared_bytes_dirtied_total' ,
558
+ 'pgwatch_pg_stat_statements_shared_bytes_written_total' ,
559
+ 'pgwatch_pg_stat_statements_block_read_total' ,
560
+ 'pgwatch_pg_stat_statements_block_write_total'
561
+ ]
562
+
563
+ # Get metrics at start and end times
564
+ start_data = []
565
+ end_data = []
566
+
567
+ for metric in all_metrics :
568
+ metric_with_filters = f'{ metric } { filter_str } '
569
+
570
+ try :
571
+ # Query metrics around start time - use instant queries at specific timestamps
572
+ start_result = self .query_range (metric_with_filters , start_time - timedelta (minutes = 1 ), start_time + timedelta (minutes = 1 ))
573
+ if start_result :
574
+ start_data .extend (start_result )
575
+
576
+ # Query metrics around end time
577
+ end_result = self .query_range (metric_with_filters , end_time - timedelta (minutes = 1 ), end_time + timedelta (minutes = 1 ))
578
+ if end_result :
579
+ end_data .extend (end_result )
580
+
581
+ except Exception as e :
582
+ print (f"Warning: Failed to query metric { metric } : { e } " )
583
+ continue
584
+
585
+ # Process the data to calculate differences
586
+ return self ._process_pgss_data (start_data , end_data , start_time , end_time , METRIC_NAME_MAPPING )
587
+
588
+ def query_range (self , query : str , start_time : datetime , end_time : datetime , step : str = "30s" ) -> List [Dict [str , Any ]]:
589
+ """
590
+ Execute a range PromQL query.
591
+
592
+ Args:
593
+ query: PromQL query string
594
+ start_time: Start time
595
+ end_time: End time
596
+ step: Query step interval
597
+
598
+ Returns:
599
+ List of query results
600
+ """
601
+ params = {
602
+ 'query' : query ,
603
+ 'start' : start_time .timestamp (),
604
+ 'end' : end_time .timestamp (),
605
+ 'step' : step
606
+ }
607
+
608
+ try :
609
+ response = requests .get (f"{ self .base_url } /query_range" , params = params )
610
+ if response .status_code == 200 :
611
+ result = response .json ()
612
+ if result .get ('status' ) == 'success' :
613
+ return result .get ('data' , {}).get ('result' , [])
614
+ else :
615
+ print (f"Range query failed with status { response .status_code } : { response .text } " )
616
+ except Exception as e :
617
+ print (f"Range query error: { e } " )
618
+
619
+ return []
620
+
621
+ def _process_pgss_data (self , start_data : List [Dict ], end_data : List [Dict ],
622
+ start_time : datetime , end_time : datetime ,
623
+ metric_mapping : Dict [str , str ]) -> List [Dict [str , Any ]]:
624
+ """
625
+ Process pg_stat_statements data and calculate differences between start and end times.
626
+ Adapted from the logic in flask-backend/app.py process_pgss_data().
627
+ """
628
+ # Convert Prometheus data to dictionaries
629
+ start_metrics = self ._prometheus_to_dict (start_data , start_time )
630
+ end_metrics = self ._prometheus_to_dict (end_data , end_time )
631
+
632
+ if not start_metrics and not end_metrics :
633
+ return []
634
+
635
+ # Create a combined dictionary with all unique query identifiers
636
+ all_keys = set ()
637
+ all_keys .update (start_metrics .keys ())
638
+ all_keys .update (end_metrics .keys ())
639
+
640
+ result_rows = []
641
+
642
+ # Calculate differences for each query
643
+ for key in all_keys :
644
+ start_metric = start_metrics .get (key , {})
645
+ end_metric = end_metrics .get (key , {})
646
+
647
+ # Extract identifier components from key
648
+ db_name , query_id , user , instance = key
649
+
650
+ # Calculate actual duration from metric timestamps
651
+ start_timestamp = start_metric .get ('timestamp' )
652
+ end_timestamp = end_metric .get ('timestamp' )
653
+
654
+ if start_timestamp and end_timestamp :
655
+ start_dt = datetime .fromisoformat (start_timestamp )
656
+ end_dt = datetime .fromisoformat (end_timestamp )
657
+ actual_duration = (end_dt - start_dt ).total_seconds ()
658
+ else :
659
+ # Fallback to query parameter duration if timestamps are missing
660
+ actual_duration = (end_time - start_time ).total_seconds ()
661
+
662
+ # Create result row
663
+ row = {
664
+ 'queryid' : query_id ,
665
+ 'database' : db_name ,
666
+ 'user' : user ,
667
+ 'duration_seconds' : actual_duration
668
+ }
669
+
670
+ # Numeric columns to calculate differences for (using original metric names)
671
+ numeric_cols = list (metric_mapping .keys ())
672
+
673
+ # Calculate differences and rates
674
+ for col in numeric_cols :
675
+ start_val = start_metric .get (col , 0 )
676
+ end_val = end_metric .get (col , 0 )
677
+ diff = end_val - start_val
678
+
679
+ # Use simplified display name
680
+ display_name = metric_mapping [col ]
681
+
682
+ # Convert bytes to blocks for block-related metrics (PostgreSQL uses 8KB blocks)
683
+ if 'blks' in display_name and 'bytes' in col :
684
+ diff = diff / 8192 # Convert bytes to 8KB blocks
685
+
686
+ row [display_name ] = diff
687
+
688
+ # Calculate rates per second
689
+ if row ['duration_seconds' ] > 0 :
690
+ row [f'{ display_name } _per_sec' ] = diff / row ['duration_seconds' ]
691
+ else :
692
+ row [f'{ display_name } _per_sec' ] = 0
693
+
694
+ # Calculate per-call averages
695
+ calls_diff = row .get ('calls' , 0 )
696
+ if calls_diff > 0 :
697
+ row [f'{ display_name } _per_call' ] = diff / calls_diff
698
+ else :
699
+ row [f'{ display_name } _per_call' ] = 0
700
+
701
+ result_rows .append (row )
702
+
703
+ return result_rows
704
+
705
+ def _prometheus_to_dict (self , prom_data : List [Dict ], timestamp : datetime ) -> Dict :
706
+ """
707
+ Convert Prometheus API response to dictionary keyed by query identifiers.
708
+ Adapted from the logic in flask-backend/app.py prometheus_to_dict().
709
+ """
710
+ if not prom_data :
711
+ return {}
712
+
713
+ metrics_dict = {}
714
+
715
+ for metric_data in prom_data :
716
+ metric = metric_data .get ('metric' , {})
717
+ values = metric_data .get ('values' , [])
718
+
719
+ if not values :
720
+ continue
721
+
722
+ # Get the closest value to our timestamp
723
+ closest_value = min (values , key = lambda x : abs (float (x [0 ]) - timestamp .timestamp ()))
724
+
725
+ # Create unique key for this query
726
+ key = (
727
+ metric .get ('datname' , '' ),
728
+ metric .get ('queryid' , '' ),
729
+ metric .get ('user' , '' ),
730
+ metric .get ('instance' , '' )
731
+ )
732
+
733
+ # Initialize metric dict if not exists
734
+ if key not in metrics_dict :
735
+ metrics_dict [key ] = {
736
+ 'timestamp' : datetime .fromtimestamp (float (closest_value [0 ])).isoformat (),
737
+ }
738
+
739
+ # Add metric value
740
+ metric_name = metric .get ('__name__' , 'pgwatch_pg_stat_statements_calls' )
741
+ clean_name = metric_name .replace ('pgwatch_pg_stat_statements_' , '' )
742
+
743
+ try :
744
+ metrics_dict [key ][clean_name ] = float (closest_value [1 ])
745
+ except (ValueError , IndexError ):
746
+ metrics_dict [key ][clean_name ] = 0
747
+
748
+ return metrics_dict
749
+
431
750
def format_bytes (self , bytes_value : float ) -> str :
432
751
"""Format bytes value for human readable display."""
433
752
if bytes_value == 0 :
@@ -619,6 +938,8 @@ def generate_all_reports(self, cluster: str = "local", node_name: str = "node-01
619
938
reports ['H001' ] = self .generate_h001_invalid_indexes_report (cluster , node_name )
620
939
reports ['F005' ] = self .generate_f005_btree_bloat_report (cluster , node_name )
621
940
reports ['F004' ] = self .generate_f004_heap_bloat_report (cluster , node_name )
941
+ reports ['K001' ] = self .generate_k001_query_calls_report (cluster , node_name )
942
+ reports ['K003' ] = self .generate_k003_top_queries_report (cluster , node_name )
622
943
623
944
return reports
624
945
def create_report (self , api_url , token , project , epoch ):
@@ -673,7 +994,7 @@ def main():
673
994
help = 'Cluster name (default: local)' )
674
995
parser .add_argument ('--node-name' , default = 'node-01' ,
675
996
help = 'Node name (default: node-01)' )
676
- parser .add_argument ('--check-id' , choices = ['A002' , 'A003' , 'A004' , 'A007' , 'H001' , 'F005' , 'F004' , 'ALL' ],
997
+ parser .add_argument ('--check-id' , choices = ['A002' , 'A003' , 'A004' , 'A007' , 'H001' , 'F005' , 'F004' , 'K001' , 'K003' , ' ALL' ],
677
998
help = 'Specific check ID to generate (default: ALL)' )
678
999
parser .add_argument ('--output' , default = '-' ,
679
1000
help = 'Output file (default: stdout)' )
@@ -704,7 +1025,6 @@ def main():
704
1025
if not args .no_upload :
705
1026
generator .upload_report_file (args .api_url , args .token , report_id , f"{ report } .json" )
706
1027
if args .output == '-' :
707
-
708
1028
pass
709
1029
else :
710
1030
with open (args .output , 'w' ) as f :
@@ -726,6 +1046,10 @@ def main():
726
1046
report = generator .generate_f005_btree_bloat_report (args .cluster , args .node_name )
727
1047
elif args .check_id == 'F004' :
728
1048
report = generator .generate_f004_heap_bloat_report (args .cluster , args .node_name )
1049
+ elif args .check_id == 'K001' :
1050
+ report = generator .generate_k001_query_calls_report (args .cluster , args .node_name )
1051
+ elif args .check_id == 'K003' :
1052
+ report = generator .generate_k003_top_queries_report (args .cluster , args .node_name )
729
1053
730
1054
if args .output == '-' :
731
1055
print (json .dumps (report , indent = 2 ))
0 commit comments