diff --git a/bio.c b/bio.c index a6cefc8b47..916968d3b1 100644 --- a/bio.c +++ b/bio.c @@ -80,6 +80,8 @@ bget(uint dev, uint sector) } // Not cached; recycle some non-busy and clean buffer. + // "clean" because B_DIRTY and !B_BUSY means log.c + // hasn't yet committed the changes to the buffer. for(b = bcache.head.prev; b != &bcache.head; b = b->prev){ if((b->flags & B_BUSY) == 0 && (b->flags & B_DIRTY) == 0){ b->dev = dev; diff --git a/defs.h b/defs.h index 23b1019ba1..560b19af0f 100644 --- a/defs.h +++ b/defs.h @@ -81,8 +81,8 @@ void microdelay(int); // log.c void initlog(void); void log_write(struct buf*); -void begin_trans(); -void commit_trans(); +void begin_op(); +void end_op(); // mp.c extern int ismp; diff --git a/exec.c b/exec.c index 7eaef5b5ad..8dbbdb66b1 100644 --- a/exec.c +++ b/exec.c @@ -18,9 +18,9 @@ exec(char *path, char **argv) struct proghdr ph; pde_t *pgdir, *oldpgdir; - begin_trans(); + begin_op(); if((ip = namei(path)) == 0){ - commit_trans(); + end_op(); return -1; } ilock(ip); @@ -50,7 +50,7 @@ exec(char *path, char **argv) goto bad; } iunlockput(ip); - commit_trans(); + end_op(); ip = 0; // Allocate two pages at the next page boundary. @@ -101,7 +101,7 @@ exec(char *path, char **argv) freevm(pgdir); if(ip){ iunlockput(ip); - commit_trans(); + end_op(); } return -1; } diff --git a/file.c b/file.c index 53c5af242f..98cad1e9a1 100644 --- a/file.c +++ b/file.c @@ -72,9 +72,9 @@ fileclose(struct file *f) if(ff.type == FD_PIPE) pipeclose(ff.pipe, ff.writable); else if(ff.type == FD_INODE){ - begin_trans(); + begin_op(); iput(ff.ip); - commit_trans(); + end_op(); } } @@ -136,12 +136,12 @@ filewrite(struct file *f, char *addr, int n) if(n1 > max) n1 = max; - begin_trans(); + begin_op(); ilock(f->ip); if ((r = writei(f->ip, addr + i, f->off, n1)) > 0) f->off += r; iunlock(f->ip); - commit_trans(); + end_op(); if(r < 0) break; diff --git a/log.c b/log.c index 95cc4d5752..0abe1fe7d2 100644 --- a/log.c +++ b/log.c @@ -5,18 +5,19 @@ #include "fs.h" #include "buf.h" -// Simple logging. Each file system system call -// should be surrounded with begin_trans() and commit_trans() calls. +// Simple logging that allows concurrent FS system calls. // -// The log holds at most one transaction at a time. Commit forces -// the log (with commit record) to disk, then installs the affected -// blocks to disk, then erases the log. begin_trans() ensures that -// only one system call can be in a transaction; others must wait. -// -// Allowing only one transaction at a time means that the file -// system code doesn't have to worry about the possibility of -// one transaction reading a block that another one has modified, -// for example an i-node block. +// A log transaction contains the updates of multiple FS system +// calls. The logging system only commits when there are +// no FS system calls active. Thus there is never +// any reasoning required about whether a commit might +// write an uncommitted system call's updates to disk. +// +// A system call should call begin_op()/end_op() to mark +// its start and end. Usually begin_op() just increments +// the count of in-progress FS system calls and returns. +// But if it thinks the log is close to running out, it +// sleeps until the last outstanding end_op() commits. // // The log is a physical re-do log containing disk blocks. // The on-disk log format: @@ -38,13 +39,15 @@ struct log { struct spinlock lock; int start; int size; - int busy; // a transaction is active + int outstanding; // how many FS sys calls are executing. + int committing; // in commit(), please wait. int dev; struct logheader lh; }; struct log log; static void recover_from_log(void); +static void commit(); void initlog(void) @@ -117,36 +120,88 @@ recover_from_log(void) write_head(); // clear the log } +// called at the start of each FS system call. void -begin_trans(void) +begin_op(void) { acquire(&log.lock); - while (log.busy) { - sleep(&log, &log.lock); + while(1){ + if(log.committing){ + sleep(&log, &log.lock); + } else if(log.lh.n + (log.outstanding+1)*MAXOPBLOCKS > LOGSIZE){ + // this op might exhaust log space; wait for commit. + sleep(&log, &log.lock); + } else { + log.outstanding += 1; + release(&log.lock); + break; + } } - log.busy = 1; - release(&log.lock); } +// called at the end of each FS system call. +// commits if this was the last outstanding operation. void -commit_trans(void) +end_op(void) +{ + int do_commit = 0; + + acquire(&log.lock); + log.outstanding -= 1; + if(log.committing) + panic("log.committing"); + if(log.outstanding == 0){ + do_commit = 1; + log.committing = 1; + } else { + // begin_op() may be waiting for log space. + wakeup(&log); + } + release(&log.lock); + + if(do_commit){ + // call commit w/o holding locks, since not allowed + // to sleep with locks. + commit(); + acquire(&log.lock); + log.committing = 0; + wakeup(&log); + release(&log.lock); + } +} + +// Copy modified blocks from cache to log. +static void +write_log(void) +{ + int tail; + + for (tail = 0; tail < log.lh.n; tail++) { + struct buf *to = bread(log.dev, log.start+tail+1); // log block + struct buf *from = bread(log.dev, log.lh.sector[tail]); // cache block + memmove(to->data, from->data, BSIZE); + bwrite(to); // write the log + brelse(from); + brelse(to); + } +} + +static void +commit() { if (log.lh.n > 0) { + write_log(); // Write modified blocks from cache to log write_head(); // Write header to disk -- the real commit install_trans(); // Now install writes to home locations log.lh.n = 0; write_head(); // Erase the transaction from the log } - - acquire(&log.lock); - log.busy = 0; - wakeup(&log); - release(&log.lock); } // Caller has modified b->data and is done with the buffer. -// Append the block to the log and record the block number, -// but don't write the log header (which would commit the write). +// Record the block number and pin in the cache with B_DIRTY. +// commit()/write_log() will do the disk write. +// // log_write() replaces bwrite(); a typical use is: // bp = bread(...) // modify bp->data[] @@ -159,21 +214,17 @@ log_write(struct buf *b) if (log.lh.n >= LOGSIZE || log.lh.n >= log.size - 1) panic("too big a transaction"); - if (!log.busy) - panic("write outside of trans"); + if (log.outstanding < 1) + panic("log_write outside of trans"); for (i = 0; i < log.lh.n; i++) { - if (log.lh.sector[i] == b->sector) // log absorbtion? + if (log.lh.sector[i] == b->sector) // log absorbtion break; } log.lh.sector[i] = b->sector; - struct buf *lbuf = bread(b->dev, log.start+i+1); - memmove(lbuf->data, b->data, BSIZE); - bwrite(lbuf); - brelse(lbuf); if (i == log.lh.n) log.lh.n++; - b->flags |= B_DIRTY; // XXX prevent eviction + b->flags |= B_DIRTY; // prevent eviction } //PAGEBREAK! diff --git a/mkfs.c b/mkfs.c index 4b0e3298dc..c168377275 100644 --- a/mkfs.c +++ b/mkfs.c @@ -13,7 +13,7 @@ #define static_assert(a, b) do { switch (0) case 0: case (a): ; } while (0) -int nblocks = 985; +int nblocks = (995-LOGSIZE); int nlog = LOGSIZE; int ninodes = 200; int size = 1024; diff --git a/param.h b/param.h index b6f6f46967..bdac60c9b9 100644 --- a/param.h +++ b/param.h @@ -3,10 +3,11 @@ #define NCPU 8 // maximum number of CPUs #define NOFILE 16 // open files per process #define NFILE 100 // open files per system -#define NBUF 10 // size of disk block cache #define NINODE 50 // maximum number of active i-nodes #define NDEV 10 // maximum major device number #define ROOTDEV 1 // device number of file system root disk #define MAXARG 32 // max exec arguments -#define LOGSIZE 10 // max data sectors in on-disk log +#define MAXOPBLOCKS 10 // max # of blocks any FS op writes +#define LOGSIZE (MAXOPBLOCKS*3) // max data sectors in on-disk log +#define NBUF (MAXOPBLOCKS*3) // size of disk block cache (>= LOGSIZE) diff --git a/proc.c b/proc.c index db0e9c7b82..a642f5a4da 100644 --- a/proc.c +++ b/proc.c @@ -186,9 +186,9 @@ exit(void) } } - begin_trans(); + begin_op(); iput(proc->cwd); - commit_trans(); + end_op(); proc->cwd = 0; acquire(&ptable.lock); diff --git a/sysfile.c b/sysfile.c index 095fca749f..2209f6e1eb 100644 --- a/sysfile.c +++ b/sysfile.c @@ -121,16 +121,16 @@ sys_link(void) if(argstr(0, &old) < 0 || argstr(1, &new) < 0) return -1; - begin_trans(); + begin_op(); if((ip = namei(old)) == 0){ - commit_trans(); + end_op(); return -1; } ilock(ip); if(ip->type == T_DIR){ iunlockput(ip); - commit_trans(); + end_op(); return -1; } @@ -148,7 +148,7 @@ sys_link(void) iunlockput(dp); iput(ip); - commit_trans(); + end_op(); return 0; @@ -157,7 +157,7 @@ sys_link(void) ip->nlink--; iupdate(ip); iunlockput(ip); - commit_trans(); + end_op(); return -1; } @@ -189,9 +189,9 @@ sys_unlink(void) if(argstr(0, &path) < 0) return -1; - begin_trans(); + begin_op(); if((dp = nameiparent(path, name)) == 0){ - commit_trans(); + end_op(); return -1; } @@ -225,13 +225,13 @@ sys_unlink(void) iupdate(ip); iunlockput(ip); - commit_trans(); + end_op(); return 0; bad: iunlockput(dp); - commit_trans(); + end_op(); return -1; } @@ -291,23 +291,23 @@ sys_open(void) if(argstr(0, &path) < 0 || argint(1, &omode) < 0) return -1; - begin_trans(); + begin_op(); if(omode & O_CREATE){ ip = create(path, T_FILE, 0, 0); if(ip == 0){ - commit_trans(); + end_op(); return -1; } } else { if((ip = namei(path)) == 0){ - commit_trans(); + end_op(); return -1; } ilock(ip); if(ip->type == T_DIR && omode != O_RDONLY){ iunlockput(ip); - commit_trans(); + end_op(); return -1; } } @@ -316,11 +316,11 @@ sys_open(void) if(f) fileclose(f); iunlockput(ip); - commit_trans(); + end_op(); return -1; } iunlock(ip); - commit_trans(); + end_op(); f->type = FD_INODE; f->ip = ip; @@ -336,13 +336,13 @@ sys_mkdir(void) char *path; struct inode *ip; - begin_trans(); + begin_op(); if(argstr(0, &path) < 0 || (ip = create(path, T_DIR, 0, 0)) == 0){ - commit_trans(); + end_op(); return -1; } iunlockput(ip); - commit_trans(); + end_op(); return 0; } @@ -354,16 +354,16 @@ sys_mknod(void) int len; int major, minor; - begin_trans(); + begin_op(); if((len=argstr(0, &path)) < 0 || argint(1, &major) < 0 || argint(2, &minor) < 0 || (ip = create(path, T_DEV, major, minor)) == 0){ - commit_trans(); + end_op(); return -1; } iunlockput(ip); - commit_trans(); + end_op(); return 0; } @@ -373,20 +373,20 @@ sys_chdir(void) char *path; struct inode *ip; - begin_trans(); + begin_op(); if(argstr(0, &path) < 0 || (ip = namei(path)) == 0){ - commit_trans(); + end_op(); return -1; } ilock(ip); if(ip->type != T_DIR){ iunlockput(ip); - commit_trans(); + end_op(); return -1; } iunlock(ip); iput(proc->cwd); - commit_trans(); + end_op(); proc->cwd = ip; return 0; } diff --git a/usertests.c b/usertests.c index 5a78c7cfac..22a7bfbd5d 100644 --- a/usertests.c +++ b/usertests.c @@ -512,51 +512,56 @@ sharedfd(void) } } -// two processes write two different files at the same +// four processes write different files at the same // time, to test block allocation. void -twofiles(void) +fourfiles(void) { - int fd, pid, i, j, n, total; + int fd, pid, i, j, n, total, pi; + char *names[] = { "f0", "f1", "f2", "f3" }; char *fname; - printf(1, "twofiles test\n"); + printf(1, "fourfiles test\n"); - unlink("f1"); - unlink("f2"); + for(pi = 0; pi < 4; pi++){ + fname = names[pi]; + unlink(fname); - pid = fork(); - if(pid < 0){ - printf(1, "fork failed\n"); - exit(); - } - - fname = pid ? "f1" : "f2"; - fd = open(fname, O_CREATE | O_RDWR); - if(fd < 0){ - printf(1, "create failed\n"); - exit(); - } + pid = fork(); + if(pid < 0){ + printf(1, "fork failed\n"); + exit(); + } - memset(buf, pid?'p':'c', 512); - for(i = 0; i < 12; i++){ - if((n = write(fd, buf, 500)) != 500){ - printf(1, "write failed %d\n", n); + if(pid == 0){ + fd = open(fname, O_CREATE | O_RDWR); + if(fd < 0){ + printf(1, "create failed\n"); + exit(); + } + + memset(buf, '0'+pi, 512); + for(i = 0; i < 12; i++){ + if((n = write(fd, buf, 500)) != 500){ + printf(1, "write failed %d\n", n); + exit(); + } + } exit(); } } - close(fd); - if(pid) + + for(pi = 0; pi < 4; pi++){ wait(); - else - exit(); + } for(i = 0; i < 2; i++){ - fd = open(i?"f1":"f2", 0); + fname = names[i]; + fd = open(fname, 0); total = 0; while((n = read(fd, buf, sizeof(buf))) > 0){ for(j = 0; j < n; j++){ - if(buf[j] != (i?'p':'c')){ + if(buf[j] != '0'+i){ printf(1, "wrong char\n"); exit(); } @@ -568,87 +573,80 @@ twofiles(void) printf(1, "wrong length %d\n", total); exit(); } + unlink(fname); } - unlink("f1"); - unlink("f2"); - - printf(1, "twofiles ok\n"); + printf(1, "fourfiles ok\n"); } -// two processes create and delete different files in same directory +// four processes create and delete different files in same directory void createdelete(void) { enum { N = 20 }; - int pid, i, fd; + int pid, i, fd, pi; char name[32]; printf(1, "createdelete test\n"); - pid = fork(); - if(pid < 0){ - printf(1, "fork failed\n"); - exit(); - } - name[0] = pid ? 'p' : 'c'; - name[2] = '\0'; - for(i = 0; i < N; i++){ - name[1] = '0' + i; - fd = open(name, O_CREATE | O_RDWR); - if(fd < 0){ - printf(1, "create failed\n"); + for(pi = 0; pi < 4; pi++){ + pid = fork(); + if(pid < 0){ + printf(1, "fork failed\n"); exit(); } - close(fd); - if(i > 0 && (i % 2 ) == 0){ - name[1] = '0' + (i / 2); - if(unlink(name) < 0){ - printf(1, "unlink failed\n"); - exit(); + + if(pid == 0){ + name[0] = 'p' + pi; + name[2] = '\0'; + for(i = 0; i < N; i++){ + name[1] = '0' + i; + fd = open(name, O_CREATE | O_RDWR); + if(fd < 0){ + printf(1, "create failed\n"); + exit(); + } + close(fd); + if(i > 0 && (i % 2 ) == 0){ + name[1] = '0' + (i / 2); + if(unlink(name) < 0){ + printf(1, "unlink failed\n"); + exit(); + } + } } + exit(); } } - if(pid==0) - exit(); - else + for(pi = 0; pi < 4; pi++){ wait(); + } + name[0] = name[1] = name[2] = 0; for(i = 0; i < N; i++){ - name[0] = 'p'; - name[1] = '0' + i; - fd = open(name, 0); - if((i == 0 || i >= N/2) && fd < 0){ - printf(1, "oops createdelete %s didn't exist\n", name); - exit(); - } else if((i >= 1 && i < N/2) && fd >= 0){ - printf(1, "oops createdelete %s did exist\n", name); - exit(); - } - if(fd >= 0) - close(fd); - - name[0] = 'c'; - name[1] = '0' + i; - fd = open(name, 0); - if((i == 0 || i >= N/2) && fd < 0){ - printf(1, "oops createdelete %s didn't exist\n", name); - exit(); - } else if((i >= 1 && i < N/2) && fd >= 0){ - printf(1, "oops createdelete %s did exist\n", name); - exit(); + for(pi = 0; pi < 4; pi++){ + name[0] = 'p' + pi; + name[1] = '0' + i; + fd = open(name, 0); + if((i == 0 || i >= N/2) && fd < 0){ + printf(1, "oops createdelete %s didn't exist\n", name); + exit(); + } else if((i >= 1 && i < N/2) && fd >= 0){ + printf(1, "oops createdelete %s did exist\n", name); + exit(); + } + if(fd >= 0) + close(fd); } - if(fd >= 0) - close(fd); } for(i = 0; i < N; i++){ - name[0] = 'p'; - name[1] = '0' + i; - unlink(name); - name[0] = 'c'; - unlink(name); + for(pi = 0; pi < 4; pi++){ + name[0] = 'p' + i; + name[1] = '0' + i; + unlink(name); + } } printf(1, "createdelete ok\n"); @@ -1716,6 +1714,12 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); + createdelete(); + linkunlink(); + concreate(); + fourfiles(); + sharedfd(); + bigargtest(); bigwrite(); bigargtest(); @@ -1741,18 +1745,12 @@ main(int argc, char *argv[]) fourteen(); bigfile(); subdir(); - concreate(); - linkunlink(); linktest(); unlinkread(); - createdelete(); - twofiles(); - sharedfd(); dirfile(); iref(); forktest(); bigdir(); // slow - exectest(); exit();