Skip to content

Commit

Permalink
Fixed up quiescence code
Browse files Browse the repository at this point in the history
  • Loading branch information
msiniavine committed Mar 16, 2012
1 parent 05a859a commit 929e92f
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 37 deletions.
152 changes: 136 additions & 16 deletions arch/x86/kernel/save_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
#include <linux/socket.h>
#include <linux/un.h>
#include <net/af_unix.h>
#include <linux/kprobes.h>
#include <linux/kallsyms.h>

#include <linux/set_state.h>

Expand Down Expand Up @@ -1081,20 +1083,6 @@ static void save_creds(struct task_struct* task, struct saved_task_struct* state
state->cap_bset = task->cap_bset;
}

static void check_status(struct task_struct* task)
{
/* if(task->state == TASK_UNINTERRUPTIBLE) */
/* { */
/* sprint("Task uninterruptible\n"); */
/* } */
/* else if (task->state == TASK_RUNNING && (task_pt_regs(task)->orig_ax < 0)) */
/* { */
/* sprint("Task desheduled\n"); */
/* } */

// sprint("%s %d state %d ax %d\n", task->comm, task->pid, task->state, task_pt_regs(task)->orig_ax);
// busy_wait(2);
}

static struct saved_task_struct* save_process(struct task_struct* task, struct map_entry* head)
{
Expand All @@ -1110,7 +1098,6 @@ static struct saved_task_struct* save_process(struct task_struct* task, struct m
INIT_LIST_HEAD(&current_task->thread_group);
INIT_LIST_HEAD(&current_task->vm_areas);

check_status(task);

sprint( "Target task %s pid: %d will be saved at %p\n", task->comm, task->pid, current_task);
strcpy(current_task->name, task->comm);
Expand Down Expand Up @@ -1244,6 +1231,139 @@ void save_running_processes(void)
read_unlock(&tasklist_lock);
}

static char* task_states(int state)
{
static char* states[] = {"Running", "Interruptible", "Uninterruptible", "Other"};
switch(state)
{
case TASK_RUNNING:
case TASK_INTERRUPTIBLE:
case TASK_UNINTERRUPTIBLE:
return states[state];
break;
default:
return states[3]; // other
break;
}
}

static int is_bad_task(struct task_struct* task)
{
if(task->state == TASK_INTERRUPTIBLE) return 0;
if(task->state == TASK_UNINTERRUPTIBLE)
{
sprint("%s[%d] uninterruptible, need to wait\n", task->comm, task->pid);
return 1;
}

if(task->state == TASK_RUNNING)
{
struct pt_regs* regs = task_pt_regs(task);
// exceptions
if(!strcmp(task->comm, "kstop/0")) return 0;
if(!strcmp(task->comm, "save_state")) return 0;
if(regs->orig_ax == -240) return 0;
sprint("%s[%d] running orig_ax %ld, pt_ip %p ts_ip %p\n", task->comm, task->pid, regs->orig_ax, (void*)regs->ip, (void*)task->thread.ip);
return 1;
}

return 0;

}

int is_ready_to_save(void)
{
int ready = 1;
struct task_struct* task;

read_lock(&tasklist_lock);
for_each_process(task)
{
struct task_struct* thread;
if(is_bad_task(task))
{
ready = 0;
goto out;
}

list_for_each_entry(thread, &task->thread_group, thread_group)
{
if(is_bad_task(thread))
{
ready = 0;
goto out;
}
}
}

out:
read_unlock(&tasklist_lock);
return ready;
}

static unsigned long* sys_call_table = (unsigned long*)0xc03776b0;

static struct kprobe int80;
static struct kprobe sysenter;
static int activated = 0;

static int redirected_syscall(void)
{
set_current_state(TASK_INTERRUPTIBLE);
schedule();
return -EINVAL;
}

static int block_handler(struct kprobe* p, struct pt_regs* regs)
{
struct pt_regs* r = task_pt_regs(current);
if(!activated) return 0;
sys_call_table[r->ax] = (unsigned long)redirected_syscall;
return 0;
}

void set_address_writable(unsigned long addr)
{
unsigned int level;
pte_t* pte = lookup_address(addr, &level);
if(pte->pte &~_PAGE_RW) pte->pte |= _PAGE_RW;
}

void activate_syscall_blocker()
{
activated = 1;
}

void install_syscall_blocker(void)
{
unsigned long probe_address = 0;

memset(&int80, 0, sizeof(int80));
memset(&sysenter, 0, sizeof(sysenter));

set_address_writable((unsigned long)sys_call_table);

int80.pre_handler = sysenter.pre_handler = block_handler;

probe_address = kallsyms_lookup_name("ia32_sysenter_target");
if(probe_address == 0)
{
sprint("Could not get sysenter address\n");
}
probe_address += 0x6a; // add offset to the call instruction
sysenter.addr = (kprobe_opcode_t*)probe_address;
register_kprobe(&sysenter);

probe_address = kallsyms_lookup_name("system_call");
if(probe_address == 0)
{
sprint("Could not get system call address\n");
}
probe_address += 0x3b; // add offset for int 0x80 system call
int80.addr = (kprobe_opcode_t*)probe_address;
register_kprobe(&int80);
}

static void print_saved_process(struct saved_task_struct* task)
{
struct shared_resource* elem;
Expand Down Expand Up @@ -1375,7 +1495,7 @@ int was_state_restored(struct task_struct* task)
// sprint("Checking restore state for %d\n", task_pid_nr(task));
for(;cur!=NULL;cur=cur->next)
{
if(cur->pid == pid_vnr(task_pid(task)))
if(cur->pid == task->pid)
{
// sprint("State was restored for process %d\n", task_pid_nr(task));
return 1;
Expand Down
13 changes: 0 additions & 13 deletions drivers/base/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1511,29 +1511,16 @@ void device_shutdown(void)
{
struct device *dev, *devn;

printk(KERN_EMERG "Device shutdown\n");

list_for_each_entry_safe_reverse(dev, devn, &devices_kset->list,
kobj.entry) {
printk(KERN_EMERG "Doing device %p\n", dev);
printk(KERN_EMERG "%s %s: save state shutting down\n", dev_driver_string(dev), dev_name(dev));
if (dev->bus && dev->bus->shutdown) {
dev_dbg(dev, "shutdown\n");
printk(KERN_EMERG "%s %s: %p - shutdown func\n", dev_driver_string(dev), dev_name(dev), dev->bus->shutdown);
dev->bus->shutdown(dev);
} else if (dev->driver && dev->driver->shutdown) {
dev_dbg(dev, "shutdown\n");
printk(KERN_EMERG "%s %s: %p - shutdown func\n", dev_driver_string(dev), dev_name(dev), dev->driver->shutdown);
dev->driver->shutdown(dev);
}
else
{
printk(KERN_EMERG "Shutdown was not called\n");
}

}

printk(KERN_EMERG "Finalizing device shutdown\n");
kobject_put(sysfs_dev_char_kobj);
kobject_put(sysfs_dev_block_kobj);
kobject_put(dev_kobj);
Expand Down
3 changes: 3 additions & 0 deletions include/linux/set_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ struct page* alloc_specific_page(unsigned long pfn, int mapcount);
int set_state_present(void);
int save_state(void);
void save_running_processes(void);
int is_ready_to_save(void);
void install_syscall_blocker(void);
void activate_syscall_blocker(void);

// timing specific functions
void time_start_quiesence(void);
Expand Down
34 changes: 28 additions & 6 deletions kernel/kexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1429,10 +1429,17 @@ module_init(crash_save_vmcoreinfo_init)

static int kexec_set_state(void* unused)
{
int ready = is_ready_to_save();
if(!ready)
{
activate_syscall_blocker();
return ready;
}

time_end_quiesence();
save_running_processes();

return 0;
return ready;
}

/*
Expand Down Expand Up @@ -1489,17 +1496,32 @@ static int do_kernel_kexec(unsigned int flags)
}
else if(flags == 0xdeadbeef)
{
int ready = 0;
int count = 0;
blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
printk(KERN_EMERG "Saving state\n");
time_start_quiesence();
stop_machine(kexec_set_state, NULL, NULL);

install_syscall_blocker();
while(!ready)
{
ready = stop_machine(kexec_set_state, NULL, NULL);
if(ready)
{
sprint("Done! Count %d\n", count);
break;
}
count ++;
if(count > 1000)
{
break;
}
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(5);
}

system_state = SYSTEM_RESTART;
printk(KERN_EMERG "Checkpoint done\n");
device_shutdown();
printk(KERN_EMERG "Device shutdown done\n");
sysdev_shutdown();
printk(KERN_EMERG "Starting new kernel\n");
machine_shutdown();

}
Expand Down
5 changes: 3 additions & 2 deletions load_kernel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
cd test
{
sleep 10
kexec -l /boot/vmlinuz-`uname -r` --append="root=/dev/sda1 1 irqpoll maxcpus=4 reset_devices load_state"
kexec -l /boot/vmlinuz-`uname -r` --append="root=/dev/sda1 1 irqpoll maxcpus=4 reset_devices load_state console=ttyS0,115200"
./save_state
} &

Expand All @@ -14,7 +14,8 @@ cd test

#./launcher `pidof mysqld`

./test_runner
#./test_runner
./test_mmap 7

#kexec -l /boot/vmlinuz-`uname -r` --initrd=/boot/initrd.img-`uname -r` --append="root=/dev/sda1 1 irqpoll maxcpus=4 reset_devices load_state debug early_printk bootmem_debug"
#kexec -l /boot/vmlinuz-`uname -r` --append="root=/dev/sda1 1 irqpoll maxcpus=4 reset_devices load_state debug early_printk"
Expand Down

0 comments on commit 929e92f

Please sign in to comment.