Skip to content

Commit

Permalink
CPU子系统参数例程 + 项目说明文档
Browse files Browse the repository at this point in the history
  • Loading branch information
Zera-Algorithm committed Jul 22, 2022
1 parent c7e8c4d commit 3a99816
Show file tree
Hide file tree
Showing 51 changed files with 16,645 additions and 18 deletions.
54 changes: 54 additions & 0 deletions eBPF_Supermarket/CPU-subsystem/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
## CPU子系统指标捕获例程

### 0. 介绍

本目录是由一系列捕获CPU子系统指标(主要是调度指标)的例程组成的。

bpftrace_application 是一些 Bpftrace 构建的例程,需要预装 bpftrace,其特点是代码简单,能很快上手,缺点是不能支撑高复杂性的 eBPF 应用。

其余以 go_ 开头的各个文件夹是用 go语言 + eBPF 构建的eBPF例程,使用了开源的cilium/eBPF库,可以支撑高复杂性、模块化的 eBPF 应用。

### 1. 准备工作

环境:Ubuntu 20.04, 内核版本 5.13.0-30-generic

注:由于 eBPF 的 kprobe 逻辑与内核数据结构定义高度相关,而现在 BTF 的应用(可消除不同内核版本间数据结构的不兼容)还不是很成熟,因此在使用此例程前,需首先适配内核版本。

软件:

* go SDK(安装cilium库)

* llvm
* bpftrace

### 2. bpftrace应用

runqlen_percpu.c: 打印每个CPU的runqlen分布情况。使用了kprobe,挂载点是update_rq_clock.

runqlen_se.c: 打印每个CPU的 CFS 调度的队列长度分布情况。使用了kprobe,挂载点是update_rq_clock.

使用方法:

```shell
cd bpftrace_application
sudo ./runqlen_percpu.c
```

### 3. go_* 应用

**go_migrate_info**: 以事件的形式打印CPU间进程迁移的情况。每次迁移都打印一条信息,包括时间戳、进程pid、源CPU、目标CPU、进程优先级。这可用于后期前端开发可视化地显示进程迁移情况。

**go_schedule**: 打印每个CPU的runqlen分布情况。

**go_schedule_uninterruptible**: 打印整个kernel所有处于**不可打断阻塞状态**的任务的数目。

**go_switch_info**:每1s打印现有所有进程的进程切换数。

使用方法:

```shell
cd go_schedule
cd schedule
./run.sh
```

Original file line number Diff line number Diff line change
@@ -1,15 +1,4 @@
#!/usr/bin/env bpftrace
/*
* runqlen.bt CPU scheduler run queue length as a histogram.
* For Linux, uses bpftrace, eBPF.
*
* This is a bpftrace version of the bcc tool of the same name.
*
* Copyright 2018 Netflix, Inc.
* Licensed under the Apache License, Version 2.0 (the "License")
*
* 07-Oct-2018 Brendan Gregg Created this.
*/

#include <linux/sched.h>

Expand Down Expand Up @@ -58,16 +47,17 @@ struct cfs_rq_partial {

BEGIN
{
printf("Sampling run queue length at 99 Hertz... Hit Ctrl-C to end.\n");
printf("Sampling run queue length... Hit Ctrl-C to end.\n");
}

// 逐CPU显示运行队列长度
// 运行队列长度是指为RUNNABLE的进程的数目,与负载略有不同
kprobe:update_rq_clock
{
$task = (struct task_struct *)curtask;
$cpu = $task->on_cpu;
$rq = (struct rq *)arg0;
@q[cpu] = lhist($rq->nr_running, 0, 100, 1);
// printf("now = %d\n", nsecs);

$task = (struct task_struct *)curtask;
$my_q = (struct cfs_rq_partial *)$task->se.cfs_rq;
$len = $my_q->nr_running;
// $len = $len > 0 ? $len - 1 : 0;
@runqlen = lhist($len, 0, 100, 1);
}
111 changes: 111 additions & 0 deletions eBPF_Supermarket/CPU-subsystem/bpftrace_application/runqlen_se.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env bpftrace

#include <linux/sched.h>

/*从此开始是CFS的队列*/
struct cfs_rq {
struct load_weight load;
unsigned int nr_running;
unsigned int h_nr_running;
unsigned int idle_h_nr_running;
};
/*结束*/


/*从此开始是实时调度器的队列*/
// struct list_head {
// struct list_head *next;
// struct list_head *prev;
// };

struct rt_prio_array {
long unsigned int bitmap[2];
struct list_head queue[100];
};

struct rt_rq {
struct rt_prio_array active;
unsigned int rt_nr_running;
unsigned int rr_nr_running;
};
/*到此结束*/

/*dl scheduling 的结构体*/
// struct rb_node {
// long unsigned int __rb_parent_color;
// struct rb_node *rb_right;
// struct rb_node *rb_left;
// };

// struct rb_root {
// struct rb_node *rb_node;
// };

// struct rb_root_cached {
// struct rb_root rb_root;
// struct rb_node *rb_leftmost;
// };

struct dl_rq {
struct rb_root_cached root;
long unsigned int dl_nr_running;
};
/*结束*/


struct uclamp_bucket {
long unsigned int value: 11;
long unsigned int tasks: 53;
};

struct uclamp_rq {
unsigned int value;
struct uclamp_bucket bucket[5];
};

struct rq_partial {
raw_spinlock_t lock;
unsigned int nr_running;
unsigned int nr_numa_running;
unsigned int nr_preferred_running;
unsigned int numa_migrate_on;
long unsigned int last_blocked_load_update_tick;
unsigned int has_blocked_load;
long: 32;
long: 64;
long: 64;
long: 64;
call_single_data_t nohz_csd;
unsigned int nohz_tick_stopped;
atomic_t nohz_flags;
unsigned int ttwu_pending;
u64 nr_switches;
long: 64;
struct uclamp_rq uclamp[2];
unsigned int uclamp_flags;
long: 32;
long: 64;
long: 64;
long: 64;
struct cfs_rq cfs;
struct rt_rq rt;
struct dl_rq dl;
};

// 注意:结构体定义需要放在BEGIN前面
BEGIN {
printf("starting collecting se data...");
}

// 分别计算和保存各参数的最大值、最小值、平均值以及分布等等
kprobe:update_rq_clock {
$rq = (struct rq_partial *)arg0;
$len = $rq->cfs.nr_running;
@cfs_hist[cpu] = lhist($len, 0, 100, 1);
$dl = ($rq->dl);
// @rt_hist = lhist($rq->rt.rt_nr_running + $rq->rt.rr_nr_running, 0, 200, 1); // 使用@型的变量可以保存数据
// printf("rt_nr_running = %lu, rr_nr_running = %lu\n", $rq->rt.rt_nr_running, $rq->rt.rr_nr_running);
@dl_hist = lhist($rq->dl.dl_nr_running, 0, 100, 1);
printf("dl_nr_running = %lu\n", $dl.dl_nr_running);
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
//go:build linux
// +build linux

// This program demonstrates attaching an eBPF program to a kernel tracepoint.
// The eBPF program will be attached to the page allocation tracepoint and
// prints out the number of times it has been reached. The tracepoint fields
// are printed into /sys/kernel/debug/tracing/trace_pipe.
package main

import (
"log"
"time"

"github.com/cilium/ebpf/link"
"github.com/cilium/ebpf/rlimit"
)

// $BPF_CLANG and $BPF_CFLAGS are set by the Makefile.
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc $BPF_CLANG -cflags $BPF_CFLAGS bpf tracepoint.c -- -I../headers -I../..

const mapKey uint32 = 0

type migrate_value struct {
time uint64
pid int
prio int
orig_cpu int
dest_cpu int
}

func main() {
// Allow the current process to lock memory for eBPF resources.
if err := rlimit.RemoveMemlock(); err != nil {
log.Fatal(err)
}

// Load pre-compiled programs and maps into the kernel.
objs := bpfObjects{}
if err := loadBpfObjects(&objs, nil); err != nil {
log.Fatalf("loading objects: %v", err)
}
defer objs.Close()

// Open a tracepoint and attach the pre-compiled program. Each time
// the kernel function enters, the program will increment the execution
// counter by 1. The read loop below polls this map value once per
// second.
// The first two arguments are taken from the following pathname:
// /sys/kernel/debug/tracing/events/kmem/mm_page_alloc
kp, err := link.Tracepoint("sched", "sched_migrate_task", objs.SchedSwitch, nil)
if err != nil {
log.Fatalf("opening tracepoint: %s", err)
}
defer kp.Close()

// Read loop reporting the total amount of times the kernel
// function was entered, once per second.
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()

// objs.KprobeMap.Pin("/sys/fs/bpf/migrate");
log.Println("Waiting for events..")

var pos uint64 = 1
for range ticker.C {
var key uint32 = 0
var value uint64
var delList []uint32

objs.KprobeMap.Lookup(&key, &value)
log.Printf("--------total migration: %v-------\n", value)

for i := pos; i <= value; i++ {
var info [6]int32
var time uint64
// var val migrate_value
key = uint32(i)

err := objs.Queue.Lookup(key, &info)
objs.Queue.Lookup(key, &time)
if err != nil {
log.Printf("Failed to read key(%v) %v\n", key, err)
return
}
// 输出migrate的时间戳
log.Printf("timestamp(%v) #%v: pid %v, prio %v, core%v -> core%v\n",
time, key, info[2], info[3], info[4], info[5])

delList = append(delList, key)
}
pos = value + 1

for _, key := range delList {
objs.Queue.Delete(key)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// +build ignore

#include "common.h"

char __license[] SEC("license") = "Dual MIT/GPL";

typedef int pid_t;

struct migrate_value {
u64 time;
pid_t pid;
int prio;
int orig_cpu;
int dest_cpu;
};

struct bpf_map_def SEC("maps") queue = {
.type = BPF_MAP_TYPE_HASH,
// QUEUE不需要key
.key_size = sizeof(u32),
.value_size = sizeof(struct migrate_value),
.max_entries = 4096,
};

struct bpf_map_def SEC("maps") kprobe_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u64),
.max_entries = 1,
};

struct migrate_info {
u64 pad;
char comm[16];
pid_t pid;
int prio;
int orig_cpu;
int dest_cpu;
};

SEC("tracepoint/sched/sched_migrate_task")
int sched_switch(struct migrate_info *info) {
u32 key = 0;
u64 initval = 1, *valp;

valp = bpf_map_lookup_elem(&kprobe_map, &key);
if (!valp) {
// 没有找到表项
bpf_map_update_elem(&kprobe_map, &key, &initval, BPF_ANY);
return 0;
}

__sync_fetch_and_add(valp, 1);

u64 time = bpf_ktime_get_ns();
struct migrate_value val;
val.time = time;
val.pid = info->pid;
val.prio = info->prio;
val.orig_cpu = info->orig_cpu;
val.dest_cpu = info->dest_cpu;
bpf_map_update_elem(&queue, valp, &val, BPF_ANY); // 写入migrate值结构体
return 0;
}
7 changes: 7 additions & 0 deletions eBPF_Supermarket/CPU-subsystem/go_schedule/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
module github.com/cilium/ebpf/examples

go 1.17

require github.com/cilium/ebpf v0.9.0

require golang.org/x/sys v0.0.0-20211001092434-39dca1131b70 // indirect
Loading

0 comments on commit 3a99816

Please sign in to comment.