Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Notable upstream pull request merges:
 #15290 54b1b1d import: require force when cachefile hostid doesn't
                  match on-disk
 #15319 342357c Reduce number of metaslab preload taskq threads
 #15340 2a6c621 ARC: Remove b_cv from struct l1arc_buf_hdr
 #15347 75a2eb7 ARC: Drop different size headers for crypto
 #15350 96b9cf4 ARC: Remove b_bufcnt/b_ebufcnt from ARC headers
 #15353 66b81b3 ZIL: Reduce maximum size of WR_COPIED to 7.5K
 #15362 5b8688e zfsconcepts: add description of block cloning

Obtained from:	OpenZFS
OpenZFS commit:	66b81b3
  • Loading branch information
mmatuska committed Oct 8, 2023
2 parents 2821a74 + 66b81b3 commit b2526e8
Show file tree
Hide file tree
Showing 25 changed files with 512 additions and 332 deletions.
21 changes: 21 additions & 0 deletions sys/contrib/openzfs/.cirrus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
env:
CIRRUS_CLONE_DEPTH: 1
ARCH: amd64

build_task:
matrix:
freebsd_instance:
image_family: freebsd-12-4
freebsd_instance:
image_family: freebsd-13-2
freebsd_instance:
image_family: freebsd-14-0-snap
prepare_script:
- pkg install -y autoconf automake libtool gettext-runtime gmake ksh93 py39-packaging py39-cffi py39-sysctl
configure_script:
- env MAKE=gmake ./autogen.sh
- env MAKE=gmake ./configure --with-config="user" --with-python=3.9
build_script:
- gmake -j `sysctl -n kern.smp.cpus`
install_script:
- gmake install
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
!udev/**

!.editorconfig
!.cirrus.yml
!.gitignore
!.gitmodules
!AUTHORS
Expand All @@ -60,7 +61,6 @@
!TEST
!zfs.release.in


#
# Normal rules
#
Expand Down
23 changes: 19 additions & 4 deletions sys/contrib/openzfs/cmd/zpool/zpool_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3122,12 +3122,21 @@ zfs_force_import_required(nvlist_t *config)
nvlist_t *nvinfo;

state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE);
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);

/*
* The hostid on LOAD_INFO comes from the MOS label via
* spa_tryimport(). If its not there then we're likely talking to an
* older kernel, so use the top one, which will be from the label
* discovered in zpool_find_import(), or if a cachefile is in use, the
* local hostid.
*/
if (nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_HOSTID, &hostid) != 0)
nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);

if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid())
return (B_TRUE);

nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) {
mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo,
ZPOOL_CONFIG_MMP_STATE);
Expand Down Expand Up @@ -3198,15 +3207,21 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
time_t timestamp = 0;
uint64_t hostid = 0;

if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTNAME))
hostname = fnvlist_lookup_string(nvinfo,
ZPOOL_CONFIG_HOSTNAME);
else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
hostname = fnvlist_lookup_string(config,
ZPOOL_CONFIG_HOSTNAME);

if (nvlist_exists(config, ZPOOL_CONFIG_TIMESTAMP))
timestamp = fnvlist_lookup_uint64(config,
ZPOOL_CONFIG_TIMESTAMP);

if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_HOSTID))
hostid = fnvlist_lookup_uint64(nvinfo,
ZPOOL_CONFIG_HOSTID);
else if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
hostid = fnvlist_lookup_uint64(config,
ZPOOL_CONFIG_HOSTID);

Expand Down
3 changes: 3 additions & 0 deletions sys/contrib/openzfs/config/zfs-build.m4
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,9 @@ AC_DEFUN([ZFS_AC_RPM], [
AS_IF([test -n "$udevruledir" ], [
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_udevruledir $(udevruledir)"'
])
AS_IF([test -n "$bashcompletiondir" ], [
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' --define "_bashcompletiondir $(bashcompletiondir)"'
])
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_SYSTEMD)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PYZFS)'
RPM_DEFINE_UTIL=${RPM_DEFINE_UTIL}' $(DEFINE_PAM)'
Expand Down
12 changes: 4 additions & 8 deletions sys/contrib/openzfs/include/os/linux/zfs/sys/trace_arc.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__array(uint64_t, hdr_dva_word, 2)
__field(uint64_t, hdr_birth)
__field(uint32_t, hdr_flags)
__field(uint32_t, hdr_bufcnt)
__field(arc_buf_contents_t, hdr_type)
__field(uint16_t, hdr_psize)
__field(uint16_t, hdr_lsize)
Expand All @@ -70,7 +69,6 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
__entry->hdr_birth = ab->b_birth;
__entry->hdr_flags = ab->b_flags;
__entry->hdr_bufcnt = ab->b_l1hdr.b_bufcnt;
__entry->hdr_psize = ab->b_psize;
__entry->hdr_lsize = ab->b_lsize;
__entry->hdr_spa = ab->b_spa;
Expand All @@ -84,12 +82,12 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count;
),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
"flags 0x%x bufcnt %u type %u psize %u lsize %u spa %llu "
"flags 0x%x type %u psize %u lsize %u spa %llu "
"state_type %u access %lu mru_hits %u mru_ghost_hits %u "
"mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
__entry->hdr_birth, __entry->hdr_flags,
__entry->hdr_bufcnt, __entry->hdr_type, __entry->hdr_psize,
__entry->hdr_type, __entry->hdr_psize,
__entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type,
__entry->hdr_access, __entry->hdr_mru_hits,
__entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits,
Expand Down Expand Up @@ -192,7 +190,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__array(uint64_t, hdr_dva_word, 2)
__field(uint64_t, hdr_birth)
__field(uint32_t, hdr_flags)
__field(uint32_t, hdr_bufcnt)
__field(arc_buf_contents_t, hdr_type)
__field(uint16_t, hdr_psize)
__field(uint16_t, hdr_lsize)
Expand Down Expand Up @@ -223,7 +220,6 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
__entry->hdr_birth = hdr->b_birth;
__entry->hdr_flags = hdr->b_flags;
__entry->hdr_bufcnt = hdr->b_l1hdr.b_bufcnt;
__entry->hdr_psize = hdr->b_psize;
__entry->hdr_lsize = hdr->b_lsize;
__entry->hdr_spa = hdr->b_spa;
Expand Down Expand Up @@ -255,7 +251,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->zb_blkid = zb->zb_blkid;
),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
"flags 0x%x bufcnt %u psize %u lsize %u spa %llu state_type %u "
"flags 0x%x psize %u lsize %u spa %llu state_type %u "
"access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
"mfu_ghost_hits %u l2_hits %u refcount %lli } "
"bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
Expand All @@ -264,7 +260,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
"blkid %llu }",
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
__entry->hdr_birth, __entry->hdr_flags,
__entry->hdr_bufcnt, __entry->hdr_psize, __entry->hdr_lsize,
__entry->hdr_psize, __entry->hdr_lsize,
__entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
__entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
__entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
Expand Down
10 changes: 3 additions & 7 deletions sys/contrib/openzfs/include/sys/arc_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,6 @@ struct arc_write_callback {
* these two allocation states.
*/
typedef struct l1arc_buf_hdr {
/* for waiting on reads to complete */
kcondvar_t b_cv;
uint8_t b_byteswap;

/* protected by arc state mutex */
arc_state_t *b_state;
multilist_node_t b_arc_node;
Expand All @@ -173,7 +169,7 @@ typedef struct l1arc_buf_hdr {
uint32_t b_mru_ghost_hits;
uint32_t b_mfu_hits;
uint32_t b_mfu_ghost_hits;
uint32_t b_bufcnt;
uint8_t b_byteswap;
arc_buf_t *b_buf;

/* self protecting */
Expand Down Expand Up @@ -436,12 +432,12 @@ typedef struct l2arc_dev {
*/
typedef struct arc_buf_hdr_crypt {
abd_t *b_rabd; /* raw encrypted data */
dmu_object_type_t b_ot; /* object type */
uint32_t b_ebufcnt; /* count of encrypted buffers */

/* dsobj for looking up encryption key for l2arc encryption */
uint64_t b_dsobj;

dmu_object_type_t b_ot; /* object type */

/* encryption parameters */
uint8_t b_salt[ZIO_DATA_SALT_LEN];
uint8_t b_iv[ZIO_DATA_IV_LEN];
Expand Down
1 change: 0 additions & 1 deletion sys/contrib/openzfs/include/sys/metaslab_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ struct metaslab_group {
int64_t mg_activation_count;
metaslab_class_t *mg_class;
vdev_t *mg_vd;
taskq_t *mg_taskq;
metaslab_group_t *mg_prev;
metaslab_group_t *mg_next;

Expand Down
4 changes: 2 additions & 2 deletions sys/contrib/openzfs/include/sys/spa_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,9 @@ struct spa {

hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
taskq_t *spa_zvol_taskq; /* Taskq for minor management */
taskq_t *spa_metaslab_taskq; /* Taskq for metaslab preload */
taskq_t *spa_prefetch_taskq; /* Taskq for prefetch threads */
taskq_t *spa_upgrade_taskq; /* Taskq for upgrade jobs */
uint64_t spa_multihost; /* multihost aware (mmp) */
mmp_thread_t spa_mmp; /* multihost mmp thread */
list_t spa_leaf_list; /* list of leaf vdevs */
Expand All @@ -448,8 +450,6 @@ struct spa {
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
zfs_refcount_t spa_refcount; /* number of opens */

taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */
};

extern char *spa_config_path;
Expand Down
11 changes: 11 additions & 0 deletions sys/contrib/openzfs/man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,12 @@ Practical upper limit of total metaslabs per top-level vdev.
.It Sy metaslab_preload_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Enable metaslab group preloading.
.
.It Sy metaslab_preload_limit Ns = Ns Sy 10 Pq uint
Maximum number of metaslabs per group to preload
.
.It Sy metaslab_preload_pct Ns = Ns Sy 50 Pq uint
Percentage of CPUs to run a metaslab preload taskq
.
.It Sy metaslab_lba_weighting_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
Give more weight to metaslabs with lower LBAs,
assuming they have greater bandwidth,
Expand Down Expand Up @@ -2144,6 +2150,11 @@ On very fragmented pools, lowering this
.Pq typically to Sy 36 KiB
can improve performance.
.
.It Sy zil_maxcopied Ns = Ns Sy 7680 Ns B Po 7.5 KiB Pc Pq uint
This sets the maximum number of write bytes logged via WR_COPIED.
It tunes a tradeoff between additional memory copy and possibly worse log
space efficiency vs additional range lock/unlock.
.
.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64
This sets the minimum delay in nanoseconds ZIL care to delay block commit,
waiting for more records.
Expand Down
40 changes: 39 additions & 1 deletion sys/contrib/openzfs/man/man7/zfsconcepts.7
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
.\" Copyright 2019 Richard Laager. All rights reserved.
.\" Copyright 2018 Nexenta Systems, Inc.
.\" Copyright 2019 Joyent, Inc.
.\" Copyright 2023 Klara, Inc.
.\"
.Dd June 30, 2019
.Dd October 6, 2023
.Dt ZFSCONCEPTS 7
.Os
.
Expand Down Expand Up @@ -205,3 +206,40 @@ practices, such as regular backups.
Consider using the
.Sy compression
property as a less resource-intensive alternative.
.Ss Block cloning
Block cloning is a facility that allows a file (or parts of a file) to be
.Qq cloned ,
that is, a shallow copy made where the existing data blocks are referenced
rather than copied.
Later modifications to the data will cause a copy of the data block to be taken
and that copy modified.
This facility is used to implement
.Qq reflinks
or
.Qq file-level copy-on-write .
.Pp
Cloned blocks are tracked in a special on-disk structure called the Block
Reference Table
.Po BRT
.Pc .
Unlike deduplication, this table has minimal overhead, so can be enabled at all
times.
.Pp
Also unlike deduplication, cloning must be requested by a user program.
Many common file copying programs, including newer versions of
.Nm /bin/cp ,
will try to create clones automatically.
Look for
.Qq clone ,
.Qq dedupe
or
.Qq reflink
in the documentation for more information.
.Pp
There are some limitations to block cloning.
Only whole blocks can be cloned, and blocks can not be cloned if they are not
yet written to disk, or if they are encrypted, or the source and destination
.Sy recordsize
properties differ.
The OS may add additional restrictions;
for example, most versions of Linux will not allow clones across datasets.
22 changes: 0 additions & 22 deletions sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -614,28 +614,6 @@ SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, df_free_pct,
" space map to continue allocations in a first-fit fashion");
/* END CSTYLED */

/*
* Percentage of all cpus that can be used by the metaslab taskq.
*/
extern int metaslab_load_pct;

/* BEGIN CSTYLED */
SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, load_pct,
CTLFLAG_RWTUN, &metaslab_load_pct, 0,
"Percentage of cpus that can be used by the metaslab taskq");
/* END CSTYLED */

/*
* Max number of metaslabs per group to preload.
*/
extern uint_t metaslab_preload_limit;

/* BEGIN CSTYLED */
SYSCTL_UINT(_vfs_zfs_metaslab, OID_AUTO, preload_limit,
CTLFLAG_RWTUN, &metaslab_preload_limit, 0,
"Max number of metaslabs per group to preload");
/* END CSTYLED */

/* mmp.c */

int
Expand Down
Loading

0 comments on commit b2526e8

Please sign in to comment.