Skip to content

Commit

Permalink
Merge tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/kdave/linux

Pull btrfs updates from David Sterba:
 "Mainly core changes, refactoring and optimizations.

  Performance is improved in some areas, overall there may be a
  cumulative improvement due to refactoring that removed lookups in the
  IO path or simplified IO submission tracking.

  Core:

   - submit IO synchronously for fast checksums (crc32c and xxhash),
     remove high priority worker kthread

   - read extent buffer in one go, simplify IO tracking, bio submission
     and locking

   - remove additional tracking of redirtied extent buffers, originally
     added for zoned mode but actually not needed

   - track ordered extent pointer in bio to avoid rbtree lookups during
     IO

   - scrub, use recovered data stripes as cache to avoid unnecessary
     read

   - in zoned mode, optimize logical to physical mappings of extents

   - remove PageError handling, not set by VFS nor writeback

   - cleanups, refactoring, better structure packing

   - lots of error handling improvements

   - more assertions, lockdep annotations

   - print assertion failure with the exact line where it happens

   - tracepoint updates

   - more debugging prints

  Performance:

   - speedup in fsync(), better tracking of inode logged status can
     avoid transaction commit

   - IO path structures track logical offsets in data structures and
     does not need to look it up

  User visible changes:

   - don't commit transaction for every created subvolume, this can
     reduce time when many subvolumes are created in a batch

   - print affected files when relocation fails

   - trigger orphan file cleanup during START_SYNC ioctl

  Notable fixes:

   - fix crash when disabling quota and relocation

   - fix crashes when removing roots from drity list

   - fix transacion abort during relocation when converting from newer
     profiles not covered by fallback

   - in zoned mode, stop reclaiming block groups if filesystem becomes
     read-only

   - fix rare race condition in tree mod log rewind that can miss some
     btree node slots

   - with enabled fsverity, drop up-to-date page bit in case the
     verification fails"

* tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (194 commits)
  btrfs: fix race between quota disable and relocation
  btrfs: add comment to struct btrfs_fs_info::dirty_cowonly_roots
  btrfs: fix race when deleting free space root from the dirty cow roots list
  btrfs: fix race when deleting quota root from the dirty cow roots list
  btrfs: tracepoints: also show actual number of the outstanding extents
  btrfs: update i_version in update_dev_time
  btrfs: make btrfs_compressed_bioset static
  btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile
  btrfs: scrub: remove btrfs_fs_info::scrub_wr_completion_workers
  btrfs: scrub: remove scrub_ctx::csum_list member
  btrfs: do not BUG_ON after failure to migrate space during truncation
  btrfs: do not BUG_ON on failure to get dir index for new snapshot
  btrfs: send: do not BUG_ON() on unexpected symlink data extent
  btrfs: do not BUG_ON() when dropping inode items from log root
  btrfs: replace BUG_ON() at split_item() with proper error handling
  btrfs: do not BUG_ON() on tree mod log failures at btrfs_del_ptr()
  btrfs: do not BUG_ON() on tree mod log failures at insert_ptr()
  btrfs: do not BUG_ON() on tree mod log failure at insert_new_root()
  btrfs: do not BUG_ON() on tree mod log failures at push_nodes_for_insert()
  btrfs: abort transaction at update_ref_for_cow() when ref count is zero
  ...
  • Loading branch information
torvalds committed Jun 26, 2023
2 parents e940efa + 8a4a0b2 commit cc423f6
Show file tree
Hide file tree
Showing 75 changed files with 2,737 additions and 2,667 deletions.
44 changes: 39 additions & 5 deletions fs/btrfs/async-thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,16 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
return atomic_read(&wq->pending) > wq->thresh * 2;
}

static void btrfs_init_workqueue(struct btrfs_workqueue *wq,
struct btrfs_fs_info *fs_info)
{
wq->fs_info = fs_info;
atomic_set(&wq->pending, 0);
INIT_LIST_HEAD(&wq->ordered_list);
spin_lock_init(&wq->list_lock);
spin_lock_init(&wq->thres_lock);
}

struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
const char *name, unsigned int flags,
int limit_active, int thresh)
Expand All @@ -80,9 +90,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
if (!ret)
return NULL;

ret->fs_info = fs_info;
btrfs_init_workqueue(ret, fs_info);

ret->limit_active = limit_active;
atomic_set(&ret->pending, 0);
if (thresh == 0)
thresh = DFT_THRESHOLD;
/* For low threshold, disabling threshold is a better choice */
Expand All @@ -106,9 +116,33 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
return NULL;
}

INIT_LIST_HEAD(&ret->ordered_list);
spin_lock_init(&ret->list_lock);
spin_lock_init(&ret->thres_lock);
trace_btrfs_workqueue_alloc(ret, name);
return ret;
}

struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
struct btrfs_fs_info *fs_info, const char *name,
unsigned int flags)
{
struct btrfs_workqueue *ret;

ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;

btrfs_init_workqueue(ret, fs_info);

/* Ordered workqueues don't allow @max_active adjustments. */
ret->limit_active = 1;
ret->current_active = 1;
ret->thresh = NO_THRESHOLD;

ret->normal_wq = alloc_ordered_workqueue("btrfs-%s", flags, name);
if (!ret->normal_wq) {
kfree(ret);
return NULL;
}

trace_btrfs_workqueue_alloc(ret, name);
return ret;
}
Expand Down
3 changes: 3 additions & 0 deletions fs/btrfs/async-thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
unsigned int flags,
int limit_active,
int thresh);
struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
struct btrfs_fs_info *fs_info, const char *name,
unsigned int flags);
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
btrfs_func_t ordered_func, btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,
Expand Down
122 changes: 68 additions & 54 deletions fs/btrfs/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ struct btrfs_failed_bio {
atomic_t repair_count;
};

/* Is this a data path I/O that needs storage layer checksum and repair? */
static inline bool is_data_bbio(struct btrfs_bio *bbio)
{
return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
}

static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
{
return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE;
}

/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
Expand Down Expand Up @@ -61,20 +72,6 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
return bbio;
}

static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio)
{
struct btrfs_ordered_extent *ordered;
int ret;

ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
if (WARN_ON_ONCE(!ordered))
return BLK_STS_IOERR;
ret = btrfs_extract_ordered_extent(bbio, ordered);
btrfs_put_ordered_extent(ordered);

return errno_to_blk_status(ret);
}

static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
struct btrfs_bio *orig_bbio,
u64 map_length, bool use_append)
Expand All @@ -95,13 +92,41 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
bbio->inode = orig_bbio->inode;
bbio->file_offset = orig_bbio->file_offset;
if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED))
orig_bbio->file_offset += map_length;

orig_bbio->file_offset += map_length;
if (bbio_has_ordered_extent(bbio)) {
refcount_inc(&orig_bbio->ordered->refs);
bbio->ordered = orig_bbio->ordered;
}
atomic_inc(&orig_bbio->pending_ios);
return bbio;
}

/* Free a bio that was never submitted to the underlying device. */
static void btrfs_cleanup_bio(struct btrfs_bio *bbio)
{
if (bbio_has_ordered_extent(bbio))
btrfs_put_ordered_extent(bbio->ordered);
bio_put(&bbio->bio);
}

static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
{
if (bbio_has_ordered_extent(bbio)) {
struct btrfs_ordered_extent *ordered = bbio->ordered;

bbio->end_io(bbio);
btrfs_put_ordered_extent(ordered);
} else {
bbio->end_io(bbio);
}
}

void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
__btrfs_bio_end_io(bbio);
}

static void btrfs_orig_write_end_io(struct bio *bio);

static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
Expand Down Expand Up @@ -130,12 +155,12 @@ static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)

if (bbio->bio.bi_status)
btrfs_bbio_propagate_error(bbio, orig_bbio);
bio_put(&bbio->bio);
btrfs_cleanup_bio(bbio);
bbio = orig_bbio;
}

if (atomic_dec_and_test(&bbio->pending_ios))
bbio->end_io(bbio);
__btrfs_bio_end_io(bbio);
}

static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
Expand Down Expand Up @@ -327,7 +352,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);

/* Metadata reads are checked and repaired by the submitter. */
if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
if (is_data_bbio(bbio))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
else
btrfs_orig_bbio_end_io(bbio);
Expand All @@ -348,7 +373,7 @@ static void btrfs_simple_end_io(struct bio *bio)
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
btrfs_record_physical_zoned(bbio);
btrfs_orig_bbio_end_io(bbio);
}
Expand All @@ -361,8 +386,7 @@ static void btrfs_raid56_end_io(struct bio *bio)

btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
if (bio_op(bio) == REQ_OP_READ && bbio->inode &&
!(bbio->bio.bi_opf & REQ_META))
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
btrfs_check_read_bio(bbio, NULL);
else
btrfs_orig_bbio_end_io(bbio);
Expand Down Expand Up @@ -472,13 +496,12 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
/* Do not leak our private flag into the block layer. */
bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;

if (!bioc) {
/* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
if (bio_op(bio) != REQ_OP_READ)
btrfs_bio(bio)->orig_physical = smap->physical;
bio->bi_private = smap->dev;
bio->bi_end_io = btrfs_simple_end_io;
btrfs_submit_dev_bio(smap->dev, bio);
Expand Down Expand Up @@ -574,27 +597,20 @@ static void run_one_async_free(struct btrfs_work *work)

static bool should_async_write(struct btrfs_bio *bbio)
{
/*
* If the I/O is not issued by fsync and friends, (->sync_writers != 0),
* then try to defer the submission to a workqueue to parallelize the
* checksum calculation.
*/
if (atomic_read(&bbio->inode->sync_writers))
/* Submit synchronously if the checksum implementation is fast. */
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false;

/*
* Submit metadata writes synchronously if the checksum implementation
* is fast, or we are on a zoned device that wants I/O to be submitted
* in order.
* Try to defer the submission to a workqueue to parallelize the
* checksum calculation unless the I/O is issued synchronously.
*/
if (bbio->bio.bi_opf & REQ_META) {
struct btrfs_fs_info *fs_info = bbio->fs_info;
if (op_is_sync(bbio->bio.bi_opf))
return false;

if (btrfs_is_zoned(fs_info))
return false;
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
return false;
}
/* Zoned devices require I/O to be submitted in order. */
if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
return false;

return true;
}
Expand Down Expand Up @@ -622,10 +638,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,

btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
run_one_async_free);
if (op_is_sync(bbio->bio.bi_opf))
btrfs_queue_work(fs_info->hipri_workers, &async->work);
else
btrfs_queue_work(fs_info->workers, &async->work);
btrfs_queue_work(fs_info->workers, &async->work);
return true;
}

Expand All @@ -635,7 +648,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
struct btrfs_fs_info *fs_info = bbio->fs_info;
struct btrfs_bio *orig_bbio = bbio;
struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << 9;
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
bool use_append = btrfs_use_zone_append(bbio);
Expand All @@ -645,8 +658,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
int error;

btrfs_bio_counter_inc_blocked(fs_info);
error = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
if (error) {
ret = errno_to_blk_status(error);
goto fail;
Expand All @@ -665,7 +678,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* Save the iter for the end_io handler and preload the checksums for
* data reads.
*/
if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) {
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
bbio->saved_iter = bio->bi_iter;
ret = btrfs_lookup_bio_sums(bbio);
if (ret)
Expand All @@ -676,9 +689,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
if (use_append) {
bio->bi_opf &= ~REQ_OP_WRITE;
bio->bi_opf |= REQ_OP_ZONE_APPEND;
ret = btrfs_bio_extract_ordered_extent(bbio);
if (ret)
goto fail_put_bio;
}

/*
Expand All @@ -695,6 +705,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
ret = btrfs_bio_csum(bbio);
if (ret)
goto fail_put_bio;
} else if (use_append) {
ret = btrfs_alloc_dummy_sum(bbio);
if (ret)
goto fail_put_bio;
}
}

Expand All @@ -704,7 +718,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)

fail_put_bio:
if (map_length < length)
bio_put(bio);
btrfs_cleanup_bio(bbio);
fail:
btrfs_bio_counter_dec(fs_info);
btrfs_bio_end_io(orig_bbio, ret);
Expand Down
29 changes: 17 additions & 12 deletions fs/btrfs/bio.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,29 @@ struct btrfs_bio {

union {
/*
* Data checksumming and original I/O information for internal
* use in the btrfs_submit_bio machinery.
* For data reads: checksumming and original I/O information.
* (for internal use in the btrfs_submit_bio machinery only)
*/
struct {
u8 *csum;
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
struct bvec_iter saved_iter;
};

/* For metadata parentness verification. */
/*
* For data writes:
* - ordered extent covering the bio
* - pointer to the checksums for this bio
* - original physical address from the allocator
* (for zone append only)
*/
struct {
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_sum *sums;
u64 orig_physical;
};

/* For metadata reads: parentness verification. */
struct btrfs_tree_parent_check parent_check;
};

Expand Down Expand Up @@ -84,15 +97,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
struct btrfs_fs_info *fs_info,
btrfs_bio_end_io_t end_io, void *private);

static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
bbio->end_io(bbio);
}

/* Bio only refers to one ordered extent. */
#define REQ_BTRFS_ONE_ORDERED REQ_DRV
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);

/* Submit using blkcg_punt_bio_submit. */
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE
Expand Down
Loading

0 comments on commit cc423f6

Please sign in to comment.