Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Notable upstream pull request merges:
 #15516 da51bd1 Fix snap_obj_array memory leak in check_filesystem()
 #15519 35da345 L2ARC: Restrict write size to 1/4 of the device
 #15529 03e9caa Add a tunable to disable BRT support

Obtained from:	OpenZFS
OpenZFS commit:	03e9caa
  • Loading branch information
mmatuska committed Nov 17, 2023
2 parents 70e30ad + 03e9caa commit 47bb16f
Show file tree
Hide file tree
Showing 23 changed files with 87 additions and 71 deletions.
1 change: 1 addition & 0 deletions sys/contrib/openzfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
modules.order
Makefile
Makefile.in
changelog
*.patch
*.orig
*.tmp
Expand Down
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/META
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ Release: 1
Release-Tags: relext
License: CDDL
Author: OpenZFS
Linux-Maximum: 6.5
Linux-Maximum: 6.6
Linux-Minimum: 3.10
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ edonr
embedded_data
empty_bpobj
enabled_txg
encryption
extensible_dataset
filesystem_limits
hole_birth
Expand Down
1 change: 1 addition & 0 deletions sys/contrib/openzfs/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ ZFS_AC_DEBUG_INVARIANTS

AC_CONFIG_FILES([
contrib/debian/rules
contrib/debian/changelog
Makefile
include/Makefile
lib/libzfs/libzfs.pc
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
openzfs-linux (@VERSION@-1) unstable; urgency=low

* OpenZFS @VERSION@ is tagged.

-- Umer Saleem <[email protected]> Wed, 15 Nov 2023 15:00:00 +0500

openzfs-linux (2.2.99-1) unstable; urgency=low

* OpenZFS 2.2 is tagged.
Expand Down
4 changes: 1 addition & 3 deletions sys/contrib/openzfs/include/os/linux/spl/sys/kmem_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ typedef enum kmem_cbrc {
#define KMC_REAP_CHUNK INT_MAX
#define KMC_DEFAULT_SEEKS 1

#define KMC_RECLAIM_ONCE 0x1 /* Force a single shrinker pass */

extern struct list_head spl_kmem_cache_list;
extern struct rw_semaphore spl_kmem_cache_sem;

Expand Down Expand Up @@ -108,7 +106,7 @@ typedef struct spl_kmem_magazine {
uint32_t skm_refill; /* Batch refill size */
struct spl_kmem_cache *skm_cache; /* Owned by cache */
unsigned int skm_cpu; /* Owned by cpu */
void *skm_objs[0]; /* Object pointers */
void *skm_objs[]; /* Object pointers */
} spl_kmem_magazine_t;

typedef struct spl_kmem_obj {
Expand Down
2 changes: 2 additions & 0 deletions sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vfsops_os.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ extern "C" {
typedef struct zfsvfs zfsvfs_t;
struct znode;

extern int zfs_bclone_enabled;

/*
* This structure emulates the vfs_t from other platforms. It's purpose
* is to facilitate the handling of mount options and minimize structural
Expand Down
4 changes: 2 additions & 2 deletions sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ typedef struct raidz_row {
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
#endif
raidz_col_t rr_col[0]; /* Flexible array of I/O columns */
raidz_col_t rr_col[]; /* Flexible array of I/O columns */
} raidz_row_t;

typedef struct raidz_map {
Expand All @@ -149,7 +149,7 @@ typedef struct raidz_map {
zfs_locked_range_t *rm_lr;
const raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
raidz_col_t *rm_phys_col; /* if non-NULL, read i/o aggregation */
raidz_row_t *rm_row[0]; /* flexible array of rows */
raidz_row_t *rm_row[]; /* flexible array of rows */
} raidz_map_t;

/*
Expand Down
8 changes: 0 additions & 8 deletions sys/contrib/openzfs/man/man4/spl.4
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,6 @@ for use by the kmem caches.
For the majority of systems and workloads only a small number of threads are
required.
.
.It Sy spl_kmem_cache_reclaim Ns = Ns Sy 0 Pq uint
When this is set it prevents Linux from being able to rapidly reclaim all the
memory held by the kmem caches.
This may be useful in circumstances where it's preferable that Linux
reclaim memory from some other subsystem first.
Setting this will increase the likelihood out of memory events on a memory
constrained system.
.
.It Sy spl_kmem_cache_obj_per_slab Ns = Ns Sy 8 Pq uint
The preferred number of objects per slab in the cache.
In general, a larger value will increase the caches memory footprint
Expand Down
5 changes: 5 additions & 0 deletions sys/contrib/openzfs/man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -1154,6 +1154,11 @@ Selecting any option other than
results in vector instructions
from the respective CPU instruction set being used.
.
.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enable the experimental block cloning feature.
If this setting is 0, then even if feature@block_cloning is enabled,
attempts to clone blocks will act as though the feature is disabled.
.
.It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
Select a BLAKE3 implementation.
.Pp
Expand Down
9 changes: 9 additions & 0 deletions sys/contrib/openzfs/man/man7/zpool-features.7
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,11 @@ to the end of the line is ignored.
.Bd -literal -compact -offset 4n
.No example# Nm cat Pa /usr/share/zfs/compatibility.d/grub2
# Features which are supported by GRUB2
allocation_classes
async_destroy
block_cloning
bookmarks
device_rebuild
embedded_data
empty_bpobj
enabled_txg
Expand All @@ -229,8 +232,14 @@ filesystem_limits
hole_birth
large_blocks
livelist
log_spacemap
lz4_compress
project_quota
resilver_defer
spacemap_histogram
spacemap_v2
userobj_accounting
zilsaxattr
zpool_checkpoint

.No example# Nm zpool Cm create Fl o Sy compatibility Ns = Ns Ar grub2 Ar bootpool Ar vdev
Expand Down
4 changes: 4 additions & 0 deletions sys/contrib/openzfs/module/Kbuild.in
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))

UBSAN_SANITIZE_zap_leaf.o := n
UBSAN_SANITIZE_zap_micro.o := n
UBSAN_SANITIZE_sa.o := n

# Suppress incorrect warnings from versions of objtool which are not
# aware of x86 EVEX prefix instructions used for AVX512.
OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
Expand Down
11 changes: 0 additions & 11 deletions sys/contrib/openzfs/module/os/linux/spl/spl-kmem-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,6 @@ module_param(spl_kmem_cache_magazine_size, uint, 0444);
MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
"Default magazine size (2-256), set automatically (0)");

/*
* The default behavior is to report the number of objects remaining in the
* cache. This allows the Linux VM to repeatedly reclaim objects from the
* cache when memory is low satisfy other memory allocations. Alternately,
* setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
* is reclaimed. This may increase the likelihood of out of memory events.
*/
static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
module_param(spl_kmem_cache_reclaim, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");

static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
Expand Down
4 changes: 4 additions & 0 deletions sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -4249,4 +4249,8 @@ EXPORT_SYMBOL(zfs_map);
module_param(zfs_delete_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");

/* CSTYLED */
module_param(zfs_bclone_enabled, uint, 0644);
MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");

#endif
5 changes: 5 additions & 0 deletions sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include <sys/zfs_vnops.h>
#include <sys/zfeature.h>

int zfs_bclone_enabled = 1;

/*
* Clone part of a file via block cloning.
*
Expand All @@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
fstrans_cookie_t cookie;
int err;

if (!zfs_bclone_enabled)
return (-EOPNOTSUPP);

if (!spa_feature_is_enabled(
dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
return (-EOPNOTSUPP);
Expand Down
30 changes: 4 additions & 26 deletions sys/contrib/openzfs/module/zfs/arc.c
Original file line number Diff line number Diff line change
Expand Up @@ -8035,9 +8035,8 @@ l2arc_write_size(l2arc_dev_t *dev)
*/
size = l2arc_write_max;
if (size == 0) {
cmn_err(CE_NOTE, "Bad value for l2arc_write_max, value must "
"be greater than zero, resetting it to the default (%d)",
L2ARC_WRITE_SIZE);
cmn_err(CE_NOTE, "l2arc_write_max must be greater than zero, "
"resetting it to the default (%d)", L2ARC_WRITE_SIZE);
size = l2arc_write_max = L2ARC_WRITE_SIZE;
}

Expand All @@ -8060,30 +8059,9 @@ l2arc_write_size(l2arc_dev_t *dev)
* device. This is important in l2arc_evict(), otherwise infinite
* iteration can occur.
*/
if (size > dev->l2ad_end - dev->l2ad_start) {
cmn_err(CE_NOTE, "l2arc_write_max or l2arc_write_boost "
"plus the overhead of log blocks (persistent L2ARC, "
"%llu bytes) exceeds the size of the cache device "
"(guid %llu), resetting them to the default (%d)",
(u_longlong_t)l2arc_log_blk_overhead(size, dev),
(u_longlong_t)dev->l2ad_vdev->vdev_guid, L2ARC_WRITE_SIZE);
size = MIN(size, (dev->l2ad_end - dev->l2ad_start) / 4);

size = l2arc_write_max = l2arc_write_boost = L2ARC_WRITE_SIZE;

if (l2arc_trim_ahead > 1) {
cmn_err(CE_NOTE, "l2arc_trim_ahead set to 1");
l2arc_trim_ahead = 1;
}

if (arc_warm == B_FALSE)
size += l2arc_write_boost;

size += l2arc_log_blk_overhead(size, dev);
if (dev->l2ad_vdev->vdev_has_trim && l2arc_trim_ahead > 0) {
size += MAX(64 * 1024 * 1024,
(size * l2arc_trim_ahead) / 100);
}
}
size = P2ROUNDUP(size, 1ULL << dev->l2ad_vdev->vdev_ashift);

return (size);

Expand Down
6 changes: 4 additions & 2 deletions sys/contrib/openzfs/module/zfs/spa_errlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,10 @@ check_filesystem(spa_t *spa, uint64_t head_ds, zbookmark_err_phys_t *zep,
dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG);
}

if (zap_clone == 0 || aff_snap_count == 0)
return (0);
if (zap_clone == 0 || aff_snap_count == 0) {
error = 0;
goto out;
}

/* Check clones. */
zap_cursor_t *zc;
Expand Down
15 changes: 15 additions & 0 deletions sys/contrib/openzfs/tests/zfs-tests/include/libtest.shlib
Original file line number Diff line number Diff line change
Expand Up @@ -3334,6 +3334,21 @@ function set_tunable_impl
esac
}

function save_tunable
{
[[ ! -d $TEST_BASE_DIR ]] && return 1
[[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
}

function restore_tunable
{
[[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
set_tunable64 "$1" "$val"
rm $TEST_BASE_DIR/tunable-$1
}

#
# Get a global system tunable
#
Expand Down
1 change: 1 addition & 0 deletions sys/contrib/openzfs/tests/zfs-tests/include/tunables.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ VOL_INHIBIT_DEV UNSUPPORTED zvol_inhibit_dev
VOL_MODE vol.mode zvol_volmode
VOL_RECURSIVE vol.recursive UNSUPPORTED
VOL_USE_BLK_MQ UNSUPPORTED zvol_use_blk_mq
BCLONE_ENABLED zfs_bclone_enabled zfs_bclone_enabled
XATTR_COMPAT xattr_compat zfs_xattr_compat
ZEVENT_LEN_MAX zevent.len_max zfs_zevent_len_max
ZEVENT_RETAIN_MAX zevent.retain_max zfs_zevent_retain_max
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,8 @@ verify_runnable "global"

default_cleanup_noexit

if tunable_exists BCLONE_ENABLED ; then
log_must restore_tunable BCLONE_ENABLED
fi

log_pass
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,9 @@ fi

verify_runnable "global"

if tunable_exists BCLONE_ENABLED ; then
log_must save_tunable BCLONE_ENABLED
log_must set_tunable32 BCLONE_ENABLED 1
fi

log_pass
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,13 @@
# 2. Set l2arc_write_max to a value larger than the cache device.
# 3. Create a file larger than the cache device and random read
# for 10 sec.
# 4. Verify that l2arc_write_max is set back to the default.
# 5. Set l2arc_write_max to a value less than the cache device size but
# 4. Set l2arc_write_max to a value less than the cache device size but
# larger than the default (256MB).
# 6. Record the l2_size.
# 7. Random read for 1 sec.
# 8. Record the l2_size again.
# 9. If (6) <= (8) then we have not looped around yet.
# 10. If (6) > (8) then we looped around. Break out of the loop and test.
# 11. Destroy pool.
# 5. Record the l2_size.
# 6. Random read for 1 sec.
# 7. Record the l2_size again.
# 8. If (5) <= (7) then we have not looped around yet.
# 9. Destroy pool.
#

verify_runnable "global"
Expand Down Expand Up @@ -93,10 +91,6 @@ log_must zfs set relatime=off $TESTPOOL
log_must fio $FIO_SCRIPTS/mkfiles.fio
log_must fio $FIO_SCRIPTS/random_reads.fio

typeset write_max2=$(get_tunable L2ARC_WRITE_MAX)

log_must test $write_max2 -eq $write_max

log_must set_tunable32 L2ARC_WRITE_MAX $(( 256 * 1024 * 1024 ))
export RUNTIME=1

Expand All @@ -108,8 +102,6 @@ while $do_once || [[ $l2_size1 -le $l2_size2 ]]; do
do_once=false
done

log_must test $l2_size1 -gt $l2_size2

log_must zpool destroy $TESTPOOL

log_pass "Looping around a cache device succeeds."
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
# STRATEGY:
# 1. Create a pool with a known feature set.
# 2. Verify only those features are active/enabled.
# 3. Do this for all known feature sets
#

verify_runnable "global"
Expand All @@ -47,8 +48,11 @@ log_onexit cleanup

log_assert "creates a pool with a specified feature set enabled"

log_must zpool create -f -o compatibility=compat-2020 $TESTPOOL $DISKS
check_feature_set $TESTPOOL compat-2020
log_must zpool destroy -f $TESTPOOL
for compat in "$ZPOOL_COMPAT_DIR"/*
do
log_must zpool create -f -o compatibility="${compat##*/}" $TESTPOOL $DISKS
check_feature_set $TESTPOOL "${compat##*/}"
log_must zpool destroy -f $TESTPOOL
done

log_pass "creates a pool with a specified feature set enabled"

0 comments on commit 47bb16f

Please sign in to comment.