aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/super.c186
-rw-r--r--fs/affs/super.c374
-rw-r--r--fs/afs/dir.c25
-rw-r--r--fs/afs/dir_edit.c91
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/aio.c1
-rw-r--r--fs/attr.c61
-rw-r--r--fs/autofs/dev-ioctl.c5
-rw-r--r--fs/bcachefs/alloc_background.h3
-rw-r--r--fs/bcachefs/alloc_foreground.c19
-rw-r--r--fs/bcachefs/backpointers.c17
-rw-r--r--fs/bcachefs/bcachefs.h1
-rw-r--r--fs/bcachefs/bkey.c7
-rw-r--r--fs/bcachefs/btree_cache.c107
-rw-r--r--fs/bcachefs/btree_cache.h2
-rw-r--r--fs/bcachefs/btree_gc.c2
-rw-r--r--fs/bcachefs/btree_io.c6
-rw-r--r--fs/bcachefs/btree_iter.c13
-rw-r--r--fs/bcachefs/btree_node_scan.c2
-rw-r--r--fs/bcachefs/btree_update_interior.c33
-rw-r--r--fs/bcachefs/btree_write_buffer.c30
-rw-r--r--fs/bcachefs/btree_write_buffer.h1
-rw-r--r--fs/bcachefs/buckets.h19
-rw-r--r--fs/bcachefs/data_update.c21
-rw-r--r--fs/bcachefs/data_update.h3
-rw-r--r--fs/bcachefs/ec.c4
-rw-r--r--fs/bcachefs/errcode.h3
-rw-r--r--fs/bcachefs/extents.c91
-rw-r--r--fs/bcachefs/extents.h5
-rw-r--r--fs/bcachefs/fs-io.c17
-rw-r--r--fs/bcachefs/io_read.c10
-rw-r--r--fs/bcachefs/io_write.c7
-rw-r--r--fs/bcachefs/journal_io.c5
-rw-r--r--fs/bcachefs/move.c2
-rw-r--r--fs/bcachefs/opts.c4
-rw-r--r--fs/bcachefs/recovery.c21
-rw-r--r--fs/bcachefs/recovery_passes.c12
-rw-r--r--fs/bcachefs/recovery_passes_types.h1
-rw-r--r--fs/bcachefs/sb-downgrade.c3
-rw-r--r--fs/bcachefs/sb-errors_format.h6
-rw-r--r--fs/bcachefs/sb-members.c4
-rw-r--r--fs/bcachefs/sb-members_format.h6
-rw-r--r--fs/bcachefs/super-io.c5
-rw-r--r--fs/bcachefs/super.c1
-rw-r--r--fs/bcachefs/tests.c5
-rw-r--r--fs/befs/linuxvfs.c199
-rw-r--r--fs/btrfs/bio.c37
-rw-r--r--fs/btrfs/bio.h3
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/defrag.c10
-rw-r--r--fs/btrfs/delayed-ref.c4
-rw-r--r--fs/btrfs/extent_io.c7
-rw-r--r--fs/btrfs/extent_map.c7
-rw-r--r--fs/btrfs/file.c25
-rw-r--r--fs/btrfs/inode.c12
-rw-r--r--fs/btrfs/ordered-data.c4
-rw-r--r--fs/btrfs/super.c28
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/buffer.c5
-rw-r--r--fs/cachefiles/interface.c14
-rw-r--r--fs/cachefiles/namei.c5
-rw-r--r--fs/cachefiles/ondemand.c38
-rw-r--r--fs/ceph/addr.c20
-rw-r--r--fs/char_dev.c2
-rw-r--r--fs/coredump.c1
-rw-r--r--fs/dax.c45
-rw-r--r--fs/dcache.c16
-rw-r--r--fs/efs/super.c43
-rw-r--r--fs/erofs/super.c4
-rw-r--r--fs/eventpoll.c13
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/file.c24
-rw-r--r--fs/ext4/inode.c39
-rw-r--r--fs/ext4/namei.c5
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--fs/ext4/super.c33
-rw-r--r--fs/f2fs/data.c9
-rw-r--r--fs/fcntl.c4
-rw-r--r--fs/file.c288
-rw-r--r--fs/file_table.c50
-rw-r--r--fs/freevxfs/vxfs_dir.h2
-rw-r--r--fs/fs-writeback.c40
-rw-r--r--fs/fs_parser.c21
-rw-r--r--fs/gfs2/export.c1
-rw-r--r--fs/gfs2/file.c2
-rw-r--r--fs/gfs2/glock.c12
-rw-r--r--fs/hfs/super.c342
-rw-r--r--fs/hfsplus/hfsplus_fs.h7
-rw-r--r--fs/hfsplus/options.c263
-rw-r--r--fs/hfsplus/super.c84
-rw-r--r--fs/hfsplus/wrapper.c2
-rw-r--r--fs/hpfs/super.c414
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/inode.c313
-rw-r--r--fs/iomap/buffered-io.c19
-rw-r--r--fs/iomap/direct-io.c43
-rw-r--r--fs/iomap/trace.h3
-rw-r--r--fs/jfs/jfs_filsys.h1
-rw-r--r--fs/jfs/super.c469
-rw-r--r--fs/libfs.c12
-rw-r--r--fs/lockd/svclock.c7
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namei.c41
-rw-r--r--fs/namespace.c161
-rw-r--r--fs/netfs/buffered_read.c8
-rw-r--r--fs/netfs/buffered_write.c41
-rw-r--r--fs/netfs/fscache_volume.c3
-rw-r--r--fs/nfs/client.c3
-rw-r--r--fs/nfs/inode.c70
-rw-r--r--fs/nfs/localio.c3
-rw-r--r--fs/nfs/nfs4proc.c4
-rw-r--r--fs/nfs/super.c10
-rw-r--r--fs/nfs_common/nfslocalio.c23
-rw-r--r--fs/nfsd/nfs4proc.c8
-rw-r--r--fs/nfsd/nfs4state.c19
-rw-r--r--fs/nfsd/vfs.c13
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/gcinode.c4
-rw-r--r--fs/nilfs2/mdt.c1
-rw-r--r--fs/nilfs2/namei.c3
-rw-r--r--fs/nilfs2/page.c25
-rw-r--r--fs/notify/dnotify/dnotify.c5
-rw-r--r--fs/notify/fanotify/fanotify.c1
-rw-r--r--fs/notify/fanotify/fanotify_user.c1
-rw-r--r--fs/ocfs2/export.c1
-rw-r--r--fs/ocfs2/file.c10
-rw-r--r--fs/ocfs2/resize.c2
-rw-r--r--fs/ocfs2/super.c13
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/open.c17
-rw-r--r--fs/overlayfs/copy_up.c1
-rw-r--r--fs/overlayfs/params.c116
-rw-r--r--fs/pidfs.c86
-rw-r--r--fs/posix_acl.c13
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/fd.c12
-rw-r--r--fs/proc/softirqs.c2
-rw-r--r--fs/proc/task_mmu.c4
-rw-r--r--fs/proc/vmcore.c9
-rw-r--r--fs/read_write.c16
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/smb/client/connect.c14
-rw-r--r--fs/smb/server/connection.c1
-rw-r--r--fs/smb/server/connection.h1
-rw-r--r--fs/smb/server/mgmt/user_session.c15
-rw-r--r--fs/smb/server/server.c20
-rw-r--r--fs/smb/server/smb_common.c10
-rw-r--r--fs/smb/server/smb_common.h2
-rw-r--r--fs/squashfs/file_direct.c9
-rw-r--r--fs/stat.c46
-rw-r--r--fs/super.c26
-rw-r--r--fs/tracefs/inode.c12
-rw-r--r--fs/ubifs/super.c399
-rw-r--r--fs/unicode/utf8-core.c26
-rw-r--r--fs/unicode/utf8-selftest.c3
-rw-r--r--fs/userfaultfd.c28
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c2
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c6
-rw-r--r--fs/xfs/xfs_buf.c7
-rw-r--r--fs/xfs/xfs_buf.h4
-rw-r--r--fs/xfs/xfs_file.c16
-rw-r--r--fs/xfs/xfs_filestream.c99
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_inode.h20
-rw-r--r--fs/xfs/xfs_ioctl.c4
-rw-r--r--fs/xfs/xfs_iomap.c2
-rw-r--r--fs/xfs/xfs_iops.c32
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--fs/xfs/xfs_trace.h15
169 files changed, 3617 insertions, 2302 deletions
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index f0b999a4961b..017c48a80203 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -6,7 +6,8 @@
*/
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
+#include <linux/fs_context.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
@@ -115,87 +116,61 @@ static int adfs_show_options(struct seq_file *seq, struct dentry *root)
return 0;
}
-enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_ftsuffix, Opt_err};
+enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_ftsuffix};
-static const match_table_t tokens = {
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_ownmask, "ownmask=%o"},
- {Opt_othmask, "othmask=%o"},
- {Opt_ftsuffix, "ftsuffix=%u"},
- {Opt_err, NULL}
+static const struct fs_parameter_spec adfs_param_spec[] = {
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
+ fsparam_u32oct ("ownmask", Opt_ownmask),
+ fsparam_u32oct ("othmask", Opt_othmask),
+ fsparam_u32 ("ftsuffix", Opt_ftsuffix),
+ {}
};
-static int parse_options(struct super_block *sb, struct adfs_sb_info *asb,
- char *options)
+static int adfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *p;
- int option;
-
- if (!options)
- return 0;
-
- while ((p = strsep(&options, ",")) != NULL) {
- substring_t args[MAX_OPT_ARGS];
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_uid:
- if (match_int(args, &option))
- return -EINVAL;
- asb->s_uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(asb->s_uid))
- return -EINVAL;
- break;
- case Opt_gid:
- if (match_int(args, &option))
- return -EINVAL;
- asb->s_gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(asb->s_gid))
- return -EINVAL;
- break;
- case Opt_ownmask:
- if (match_octal(args, &option))
- return -EINVAL;
- asb->s_owner_mask = option;
- break;
- case Opt_othmask:
- if (match_octal(args, &option))
- return -EINVAL;
- asb->s_other_mask = option;
- break;
- case Opt_ftsuffix:
- if (match_int(args, &option))
- return -EINVAL;
- asb->s_ftsuffix = option;
- break;
- default:
- adfs_msg(sb, KERN_ERR,
- "unrecognised mount option \"%s\" or missing value",
- p);
- return -EINVAL;
- }
+ struct adfs_sb_info *asb = fc->s_fs_info;
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, adfs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_uid:
+ asb->s_uid = result.uid;
+ break;
+ case Opt_gid:
+ asb->s_gid = result.gid;
+ break;
+ case Opt_ownmask:
+ asb->s_owner_mask = result.uint_32;
+ break;
+ case Opt_othmask:
+ asb->s_other_mask = result.uint_32;
+ break;
+ case Opt_ftsuffix:
+ asb->s_ftsuffix = result.uint_32;
+ break;
+ default:
+ return -EINVAL;
}
return 0;
}
-static int adfs_remount(struct super_block *sb, int *flags, char *data)
+static int adfs_reconfigure(struct fs_context *fc)
{
- struct adfs_sb_info temp_asb;
- int ret;
+ struct adfs_sb_info *new_asb = fc->s_fs_info;
+ struct adfs_sb_info *asb = ADFS_SB(fc->root->d_sb);
- sync_filesystem(sb);
- *flags |= ADFS_SB_FLAGS;
+ sync_filesystem(fc->root->d_sb);
+ fc->sb_flags |= ADFS_SB_FLAGS;
- temp_asb = *ADFS_SB(sb);
- ret = parse_options(sb, &temp_asb, data);
- if (ret == 0)
- *ADFS_SB(sb) = temp_asb;
+ /* Structure copy newly parsed options */
+ *asb = *new_asb;
- return ret;
+ return 0;
}
static int adfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -273,7 +248,6 @@ static const struct super_operations adfs_sops = {
.write_inode = adfs_write_inode,
.put_super = adfs_put_super,
.statfs = adfs_statfs,
- .remount_fs = adfs_remount,
.show_options = adfs_show_options,
};
@@ -361,34 +335,21 @@ static int adfs_validate_dr0(struct super_block *sb, struct buffer_head *bh,
return 0;
}
-static int adfs_fill_super(struct super_block *sb, void *data, int silent)
+static int adfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct adfs_discrecord *dr;
struct object_info root_obj;
- struct adfs_sb_info *asb;
+ struct adfs_sb_info *asb = sb->s_fs_info;
struct inode *root;
int ret = -EINVAL;
+ int silent = fc->sb_flags & SB_SILENT;
sb->s_flags |= ADFS_SB_FLAGS;
- asb = kzalloc(sizeof(*asb), GFP_KERNEL);
- if (!asb)
- return -ENOMEM;
-
sb->s_fs_info = asb;
sb->s_magic = ADFS_SUPER_MAGIC;
sb->s_time_gran = 10000000;
- /* set default options */
- asb->s_uid = GLOBAL_ROOT_UID;
- asb->s_gid = GLOBAL_ROOT_GID;
- asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK;
- asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK;
- asb->s_ftsuffix = 0;
-
- if (parse_options(sb, asb, data))
- goto error;
-
/* Try to probe the filesystem boot block */
ret = adfs_probe(sb, ADFS_DISCRECORD, 1, adfs_validate_bblk);
if (ret == -EILSEQ)
@@ -453,18 +414,61 @@ error:
return ret;
}
-static struct dentry *adfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int adfs_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, adfs_fill_super);
+}
+
+static void adfs_free_fc(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
+ struct adfs_context *asb = fc->s_fs_info;
+
+ kfree(asb);
+}
+
+static const struct fs_context_operations adfs_context_ops = {
+ .parse_param = adfs_parse_param,
+ .get_tree = adfs_get_tree,
+ .reconfigure = adfs_reconfigure,
+ .free = adfs_free_fc,
+};
+
+static int adfs_init_fs_context(struct fs_context *fc)
+{
+ struct adfs_sb_info *asb;
+
+ asb = kzalloc(sizeof(struct adfs_sb_info), GFP_KERNEL);
+ if (!asb)
+ return -ENOMEM;
+
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ struct super_block *sb = fc->root->d_sb;
+ struct adfs_sb_info *old_asb = ADFS_SB(sb);
+
+ /* structure copy existing options before parsing */
+ *asb = *old_asb;
+ } else {
+ /* set default options */
+ asb->s_uid = GLOBAL_ROOT_UID;
+ asb->s_gid = GLOBAL_ROOT_GID;
+ asb->s_owner_mask = ADFS_DEFAULT_OWNER_MASK;
+ asb->s_other_mask = ADFS_DEFAULT_OTHER_MASK;
+ asb->s_ftsuffix = 0;
+ }
+
+ fc->ops = &adfs_context_ops;
+ fc->s_fs_info = asb;
+
+ return 0;
}
static struct file_system_type adfs_fs_type = {
.owner = THIS_MODULE,
.name = "adfs",
- .mount = adfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = adfs_init_fs_context,
+ .parameters = adfs_param_spec,
};
MODULE_ALIAS_FS("adfs");
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 3c5821339609..2fa40337776d 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -14,7 +14,8 @@
#include <linux/module.h>
#include <linux/init.h>
#include <linux/statfs.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
+#include <linux/fs_context.h>
#include <linux/magic.h>
#include <linux/sched.h>
#include <linux/cred.h>
@@ -27,7 +28,6 @@
static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int affs_show_options(struct seq_file *m, struct dentry *root);
-static int affs_remount (struct super_block *sb, int *flags, char *data);
static void
affs_commit_super(struct super_block *sb, int wait)
@@ -155,140 +155,114 @@ static const struct super_operations affs_sops = {
.put_super = affs_put_super,
.sync_fs = affs_sync_fs,
.statfs = affs_statfs,
- .remount_fs = affs_remount,
.show_options = affs_show_options,
};
enum {
Opt_bs, Opt_mode, Opt_mufs, Opt_notruncate, Opt_prefix, Opt_protect,
Opt_reserved, Opt_root, Opt_setgid, Opt_setuid,
- Opt_verbose, Opt_volume, Opt_ignore, Opt_err,
+ Opt_verbose, Opt_volume, Opt_ignore,
};
-static const match_table_t tokens = {
- {Opt_bs, "bs=%u"},
- {Opt_mode, "mode=%o"},
- {Opt_mufs, "mufs"},
- {Opt_notruncate, "nofilenametruncate"},
- {Opt_prefix, "prefix=%s"},
- {Opt_protect, "protect"},
- {Opt_reserved, "reserved=%u"},
- {Opt_root, "root=%u"},
- {Opt_setgid, "setgid=%u"},
- {Opt_setuid, "setuid=%u"},
- {Opt_verbose, "verbose"},
- {Opt_volume, "volume=%s"},
- {Opt_ignore, "grpquota"},
- {Opt_ignore, "noquota"},
- {Opt_ignore, "quota"},
- {Opt_ignore, "usrquota"},
- {Opt_err, NULL},
+struct affs_context {
+ kuid_t uid; /* uid to override */
+ kgid_t gid; /* gid to override */
+ unsigned int mode; /* mode to override */
+ unsigned int reserved; /* Number of reserved blocks */
+ int root_block; /* FFS root block number */
+ int blocksize; /* Initial device blksize */
+ char *prefix; /* Prefix for volumes and assigns */
+ char volume[32]; /* Vol. prefix for absolute symlinks */
+ unsigned long mount_flags; /* Options */
};
-static int
-parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, s32 *root,
- int *blocksize, char **prefix, char *volume, unsigned long *mount_opts)
+static const struct fs_parameter_spec affs_param_spec[] = {
+ fsparam_u32 ("bs", Opt_bs),
+ fsparam_u32oct ("mode", Opt_mode),
+ fsparam_flag ("mufs", Opt_mufs),
+ fsparam_flag ("nofilenametruncate", Opt_notruncate),
+ fsparam_string ("prefix", Opt_prefix),
+ fsparam_flag ("protect", Opt_protect),
+ fsparam_u32 ("reserved", Opt_reserved),
+ fsparam_u32 ("root", Opt_root),
+ fsparam_gid ("setgid", Opt_setgid),
+ fsparam_uid ("setuid", Opt_setuid),
+ fsparam_flag ("verbose", Opt_verbose),
+ fsparam_string ("volume", Opt_volume),
+ fsparam_flag ("grpquota", Opt_ignore),
+ fsparam_flag ("noquota", Opt_ignore),
+ fsparam_flag ("quota", Opt_ignore),
+ fsparam_flag ("usrquota", Opt_ignore),
+ {},
+};
+
+static int affs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *p;
- substring_t args[MAX_OPT_ARGS];
-
- /* Fill in defaults */
-
- *uid = current_uid();
- *gid = current_gid();
- *reserved = 2;
- *root = -1;
- *blocksize = -1;
- volume[0] = ':';
- volume[1] = 0;
- *mount_opts = 0;
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token, n, option;
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_bs:
- if (match_int(&args[0], &n))
- return 0;
- if (n != 512 && n != 1024 && n != 2048
- && n != 4096) {
- pr_warn("Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
- return 0;
- }
- *blocksize = n;
- break;
- case Opt_mode:
- if (match_octal(&args[0], &option))
- return 0;
- *mode = option & 0777;
- affs_set_opt(*mount_opts, SF_SETMODE);
- break;
- case Opt_mufs:
- affs_set_opt(*mount_opts, SF_MUFS);
- break;
- case Opt_notruncate:
- affs_set_opt(*mount_opts, SF_NO_TRUNCATE);
- break;
- case Opt_prefix:
- kfree(*prefix);
- *prefix = match_strdup(&args[0]);
- if (!*prefix)
- return 0;
- affs_set_opt(*mount_opts, SF_PREFIX);
- break;
- case Opt_protect:
- affs_set_opt(*mount_opts, SF_IMMUTABLE);
- break;
- case Opt_reserved:
- if (match_int(&args[0], reserved))
- return 0;
- break;
- case Opt_root:
- if (match_int(&args[0], root))
- return 0;
- break;
- case Opt_setgid:
- if (match_int(&args[0], &option))
- return 0;
- *gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(*gid))
- return 0;
- affs_set_opt(*mount_opts, SF_SETGID);
- break;
- case Opt_setuid:
- if (match_int(&args[0], &option))
- return 0;
- *uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(*uid))
- return 0;
- affs_set_opt(*mount_opts, SF_SETUID);
- break;
- case Opt_verbose:
- affs_set_opt(*mount_opts, SF_VERBOSE);
- break;
- case Opt_volume: {
- char *vol = match_strdup(&args[0]);
- if (!vol)
- return 0;
- strscpy(volume, vol, 32);
- kfree(vol);
- break;
- }
- case Opt_ignore:
- /* Silently ignore the quota options */
- break;
- default:
- pr_warn("Unrecognized mount option \"%s\" or missing value\n",
- p);
- return 0;
+ struct affs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ int n;
+ int opt;
+
+ opt = fs_parse(fc, affs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_bs:
+ n = result.uint_32;
+ if (n != 512 && n != 1024 && n != 2048
+ && n != 4096) {
+ pr_warn("Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
+ return -EINVAL;
}
+ ctx->blocksize = n;
+ break;
+ case Opt_mode:
+ ctx->mode = result.uint_32 & 0777;
+ affs_set_opt(ctx->mount_flags, SF_SETMODE);
+ break;
+ case Opt_mufs:
+ affs_set_opt(ctx->mount_flags, SF_MUFS);
+ break;
+ case Opt_notruncate:
+ affs_set_opt(ctx->mount_flags, SF_NO_TRUNCATE);
+ break;
+ case Opt_prefix:
+ kfree(ctx->prefix);
+ ctx->prefix = param->string;
+ param->string = NULL;
+ affs_set_opt(ctx->mount_flags, SF_PREFIX);
+ break;
+ case Opt_protect:
+ affs_set_opt(ctx->mount_flags, SF_IMMUTABLE);
+ break;
+ case Opt_reserved:
+ ctx->reserved = result.uint_32;
+ break;
+ case Opt_root:
+ ctx->root_block = result.uint_32;
+ break;
+ case Opt_setgid:
+ ctx->gid = result.gid;
+ affs_set_opt(ctx->mount_flags, SF_SETGID);
+ break;
+ case Opt_setuid:
+ ctx->uid = result.uid;
+ affs_set_opt(ctx->mount_flags, SF_SETUID);
+ break;
+ case Opt_verbose:
+ affs_set_opt(ctx->mount_flags, SF_VERBOSE);
+ break;
+ case Opt_volume:
+ strscpy(ctx->volume, param->string, 32);
+ break;
+ case Opt_ignore:
+ /* Silently ignore the quota options */
+ break;
+ default:
+ return -EINVAL;
}
- return 1;
+ return 0;
}
static int affs_show_options(struct seq_file *m, struct dentry *root)
@@ -329,27 +303,22 @@ static int affs_show_options(struct seq_file *m, struct dentry *root)
* hopefully have the guts to do so. Until then: sorry for the mess.
*/
-static int affs_fill_super(struct super_block *sb, void *data, int silent)
+static int affs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct affs_sb_info *sbi;
+ struct affs_context *ctx = fc->fs_private;
struct buffer_head *root_bh = NULL;
struct buffer_head *boot_bh;
struct inode *root_inode = NULL;
- s32 root_block;
+ int silent = fc->sb_flags & SB_SILENT;
int size, blocksize;
u32 chksum;
int num_bm;
int i, j;
- kuid_t uid;
- kgid_t gid;
- int reserved;
- unsigned long mount_flags;
int tmp_flags; /* fix remount prototype... */
u8 sig[4];
int ret;
- pr_debug("read_super(%s)\n", data ? (const char *)data : "no options");
-
sb->s_magic = AFFS_SUPER_MAGIC;
sb->s_op = &affs_sops;
sb->s_flags |= SB_NODIRATIME;
@@ -369,19 +338,16 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
spin_lock_init(&sbi->work_lock);
INIT_DELAYED_WORK(&sbi->sb_work, flush_superblock);
- if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
- &blocksize,&sbi->s_prefix,
- sbi->s_volume, &mount_flags)) {
- pr_err("Error parsing options\n");
- return -EINVAL;
- }
- /* N.B. after this point s_prefix must be released */
+ sbi->s_flags = ctx->mount_flags;
+ sbi->s_mode = ctx->mode;
+ sbi->s_uid = ctx->uid;
+ sbi->s_gid = ctx->gid;
+ sbi->s_reserved = ctx->reserved;
+ sbi->s_prefix = ctx->prefix;
+ ctx->prefix = NULL;
+ memcpy(sbi->s_volume, ctx->volume, 32);
- sbi->s_flags = mount_flags;
- sbi->s_mode = i;
- sbi->s_uid = uid;
- sbi->s_gid = gid;
- sbi->s_reserved= reserved;
+ /* N.B. after this point s_prefix must be released */
/* Get the size of the device in 512-byte blocks.
* If we later see that the partition uses bigger
@@ -396,15 +362,16 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
i = bdev_logical_block_size(sb->s_bdev);
j = PAGE_SIZE;
+ blocksize = ctx->blocksize;
if (blocksize > 0) {
i = j = blocksize;
size = size / (blocksize / 512);
}
for (blocksize = i; blocksize <= j; blocksize <<= 1, size >>= 1) {
- sbi->s_root_block = root_block;
- if (root_block < 0)
- sbi->s_root_block = (reserved + size - 1) / 2;
+ sbi->s_root_block = ctx->root_block;
+ if (ctx->root_block < 0)
+ sbi->s_root_block = (ctx->reserved + size - 1) / 2;
pr_debug("setting blocksize to %d\n", blocksize);
affs_set_blocksize(sb, blocksize);
sbi->s_partition_size = size;
@@ -424,7 +391,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
"size=%d, reserved=%d\n",
sb->s_id,
sbi->s_root_block + num_bm,
- blocksize, size, reserved);
+ ctx->blocksize, size, ctx->reserved);
root_bh = affs_bread(sb, sbi->s_root_block + num_bm);
if (!root_bh)
continue;
@@ -447,7 +414,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
got_root:
/* Keep super block in cache */
sbi->s_root_bh = root_bh;
- root_block = sbi->s_root_block;
+ ctx->root_block = sbi->s_root_block;
/* Find out which kind of FS we have */
boot_bh = sb_bread(sb, 0);
@@ -506,7 +473,7 @@ got_root:
return -EINVAL;
}
- if (affs_test_opt(mount_flags, SF_VERBOSE)) {
+ if (affs_test_opt(ctx->mount_flags, SF_VERBOSE)) {
u8 len = AFFS_ROOT_TAIL(sb, root_bh)->disk_name[0];
pr_notice("Mounting volume \"%.*s\": Type=%.3s\\%c, Blocksize=%d\n",
len > 31 ? 31 : len,
@@ -528,7 +495,7 @@ got_root:
/* set up enough so that it can read an inode */
- root_inode = affs_iget(sb, root_block);
+ root_inode = affs_iget(sb, ctx->root_block);
if (IS_ERR(root_inode))
return PTR_ERR(root_inode);
@@ -548,56 +515,43 @@ got_root:
return 0;
}
-static int
-affs_remount(struct super_block *sb, int *flags, char *data)
+static int affs_reconfigure(struct fs_context *fc)
{
+ struct super_block *sb = fc->root->d_sb;
+ struct affs_context *ctx = fc->fs_private;
struct affs_sb_info *sbi = AFFS_SB(sb);
- int blocksize;
- kuid_t uid;
- kgid_t gid;
- int mode;
- int reserved;
- int root_block;
- unsigned long mount_flags;
int res = 0;
- char volume[32];
- char *prefix = NULL;
-
- pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
sync_filesystem(sb);
- *flags |= SB_NODIRATIME;
-
- memcpy(volume, sbi->s_volume, 32);
- if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block,
- &blocksize, &prefix, volume,
- &mount_flags)) {
- kfree(prefix);
- return -EINVAL;
- }
+ fc->sb_flags |= SB_NODIRATIME;
flush_delayed_work(&sbi->sb_work);
- sbi->s_flags = mount_flags;
- sbi->s_mode = mode;
- sbi->s_uid = uid;
- sbi->s_gid = gid;
+ /*
+ * NB: Historically, only mount_flags, mode, uid, gic, prefix,
+ * and volume are accepted during remount.
+ */
+ sbi->s_flags = ctx->mount_flags;
+ sbi->s_mode = ctx->mode;
+ sbi->s_uid = ctx->uid;
+ sbi->s_gid = ctx->gid;
/* protect against readers */
spin_lock(&sbi->symlink_lock);
- if (prefix) {
+ if (ctx->prefix) {
kfree(sbi->s_prefix);
- sbi->s_prefix = prefix;
+ sbi->s_prefix = ctx->prefix;
+ ctx->prefix = NULL;
}
- memcpy(sbi->s_volume, volume, 32);
+ memcpy(sbi->s_volume, ctx->volume, 32);
spin_unlock(&sbi->symlink_lock);
- if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
+ if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb))
return 0;
- if (*flags & SB_RDONLY)
+ if (fc->sb_flags & SB_RDONLY)
affs_free_bitmap(sb);
else
- res = affs_init_bitmap(sb, flags);
+ res = affs_init_bitmap(sb, &fc->sb_flags);
return res;
}
@@ -624,10 +578,9 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static struct dentry *affs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int affs_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
+ return get_tree_bdev(fc, affs_fill_super);
}
static void affs_kill_sb(struct super_block *sb)
@@ -643,12 +596,61 @@ static void affs_kill_sb(struct super_block *sb)
}
}
+static void affs_free_fc(struct fs_context *fc)
+{
+ struct affs_context *ctx = fc->fs_private;
+
+ kfree(ctx->prefix);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations affs_context_ops = {
+ .parse_param = affs_parse_param,
+ .get_tree = affs_get_tree,
+ .reconfigure = affs_reconfigure,
+ .free = affs_free_fc,
+};
+
+static int affs_init_fs_context(struct fs_context *fc)
+{
+ struct affs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct affs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ struct super_block *sb = fc->root->d_sb;
+ struct affs_sb_info *sbi = AFFS_SB(sb);
+
+ /*
+ * NB: historically, no options other than volume were
+ * preserved across a remount unless they were explicitly
+ * passed in.
+ */
+ memcpy(ctx->volume, sbi->s_volume, 32);
+ } else {
+ ctx->uid = current_uid();
+ ctx->gid = current_gid();
+ ctx->reserved = 2;
+ ctx->root_block = -1;
+ ctx->blocksize = -1;
+ ctx->volume[0] = ':';
+ }
+
+ fc->ops = &affs_context_ops;
+ fc->fs_private = ctx;
+
+ return 0;
+}
+
static struct file_system_type affs_fs_type = {
.owner = THIS_MODULE,
.name = "affs",
- .mount = affs_mount,
.kill_sb = affs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = affs_init_fs_context,
+ .parameters = affs_param_spec,
};
MODULE_ALIAS_FS("affs");
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index f8622ed72e08..ada363af5aab 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -12,6 +12,7 @@
#include <linux/swap.h>
#include <linux/ctype.h>
#include <linux/sched.h>
+#include <linux/iversion.h>
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
#include "afs_fs.h"
@@ -1823,6 +1824,8 @@ error:
static void afs_rename_success(struct afs_operation *op)
{
+ struct afs_vnode *vnode = AFS_FS_I(d_inode(op->dentry));
+
_enter("op=%08x", op->debug_id);
op->ctime = op->file[0].scb.status.mtime_client;
@@ -1832,6 +1835,22 @@ static void afs_rename_success(struct afs_operation *op)
op->ctime = op->file[1].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[1]);
}
+
+ /* If we're moving a subdir between dirs, we need to update
+ * its DV counter too as the ".." will be altered.
+ */
+ if (S_ISDIR(vnode->netfs.inode.i_mode) &&
+ op->file[0].vnode != op->file[1].vnode) {
+ u64 new_dv;
+
+ write_seqlock(&vnode->cb_lock);
+
+ new_dv = vnode->status.data_version + 1;
+ vnode->status.data_version = new_dv;
+ inode_set_iversion_raw(&vnode->netfs.inode, new_dv);
+
+ write_sequnlock(&vnode->cb_lock);
+ }
}
static void afs_rename_edit_dir(struct afs_operation *op)
@@ -1873,6 +1892,12 @@ static void afs_rename_edit_dir(struct afs_operation *op)
&vnode->fid, afs_edit_dir_for_rename_2);
}
+ if (S_ISDIR(vnode->netfs.inode.i_mode) &&
+ new_dvnode != orig_dvnode &&
+ test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ afs_edit_dir_update_dotdot(vnode, new_dvnode,
+ afs_edit_dir_for_rename_sub);
+
new_inode = d_inode(new_dentry);
if (new_inode) {
spin_lock(&new_inode->i_lock);
diff --git a/fs/afs/dir_edit.c b/fs/afs/dir_edit.c
index a71bff10496b..fe223fb78111 100644
--- a/fs/afs/dir_edit.c
+++ b/fs/afs/dir_edit.c
@@ -127,10 +127,10 @@ static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
/*
* Scan a directory block looking for a dirent of the right name.
*/
-static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name,
+static int afs_dir_scan_block(const union afs_xdr_dir_block *block, const struct qstr *name,
unsigned int blocknum)
{
- union afs_xdr_dirent *de;
+ const union afs_xdr_dirent *de;
u64 bitmap;
int d, len, n;
@@ -492,3 +492,90 @@ error:
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
goto out_unmap;
}
+
+/*
+ * Edit a subdirectory that has been moved between directories to update the
+ * ".." entry.
+ */
+void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
+ enum afs_edit_dir_reason why)
+{
+ union afs_xdr_dir_block *block;
+ union afs_xdr_dirent *de;
+ struct folio *folio;
+ unsigned int nr_blocks, b;
+ pgoff_t index;
+ loff_t i_size;
+ int slot;
+
+ _enter("");
+
+ i_size = i_size_read(&vnode->netfs.inode);
+ if (i_size < AFS_DIR_BLOCK_SIZE) {
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ return;
+ }
+ nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
+
+ /* Find a block that has sufficient slots available. Each folio
+ * contains two or more directory blocks.
+ */
+ for (b = 0; b < nr_blocks; b++) {
+ index = b / AFS_DIR_BLOCKS_PER_PAGE;
+ folio = afs_dir_get_folio(vnode, index);
+ if (!folio)
+ goto error;
+
+ block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_pos(folio));
+
+ /* Abandon the edit if we got a callback break. */
+ if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
+ goto invalidated;
+
+ slot = afs_dir_scan_block(block, &dotdot_name, b);
+ if (slot >= 0)
+ goto found_dirent;
+
+ kunmap_local(block);
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+
+ /* Didn't find the dirent to clobber. Download the directory again. */
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_nodd,
+ 0, 0, 0, 0, "..");
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out;
+
+found_dirent:
+ de = &block->dirents[slot];
+ de->u.vnode = htonl(new_dvnode->fid.vnode);
+ de->u.unique = htonl(new_dvnode->fid.unique);
+
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_dd, b, slot,
+ ntohl(de->u.vnode), ntohl(de->u.unique), "..");
+
+ kunmap_local(block);
+ folio_unlock(folio);
+ folio_put(folio);
+ inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
+
+out:
+ _leave("");
+ return;
+
+invalidated:
+ kunmap_local(block);
+ folio_unlock(folio);
+ folio_put(folio);
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_inval,
+ 0, 0, 0, 0, "..");
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out;
+
+error:
+ trace_afs_edit_dir(vnode, why, afs_edit_dir_update_error,
+ 0, 0, 0, 0, "..");
+ clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+ goto out;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 52aab09a32a9..c9d620175e80 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -1073,6 +1073,8 @@ extern void afs_check_for_remote_deletion(struct afs_operation *);
extern void afs_edit_dir_add(struct afs_vnode *, struct qstr *, struct afs_fid *,
enum afs_edit_dir_reason);
extern void afs_edit_dir_remove(struct afs_vnode *, struct qstr *, enum afs_edit_dir_reason);
+void afs_edit_dir_update_dotdot(struct afs_vnode *vnode, struct afs_vnode *new_dvnode,
+ enum afs_edit_dir_reason why);
/*
* dir_silly.c
diff --git a/fs/aio.c b/fs/aio.c
index e8920178b50f..72e3970f4225 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2191,7 +2191,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
- /* TODO: use a hash or array, this sucks. */
list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
if (kiocb->ki_res.obj == obj) {
ret = kiocb->ki_cancel(&kiocb->rw);
diff --git a/fs/attr.c b/fs/attr.c
index c04d19b58f12..9caf63d20d03 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -272,6 +272,47 @@ out_big:
EXPORT_SYMBOL(inode_newsize_ok);
/**
+ * setattr_copy_mgtime - update timestamps for mgtime inodes
+ * @inode: inode timestamps to be updated
+ * @attr: attrs for the update
+ *
+ * With multigrain timestamps, take more care to prevent races when
+ * updating the ctime. Always update the ctime to the very latest using
+ * the standard mechanism, and use that to populate the atime and mtime
+ * appropriately (unless those are being set to specific values).
+ */
+static void setattr_copy_mgtime(struct inode *inode, const struct iattr *attr)
+{
+ unsigned int ia_valid = attr->ia_valid;
+ struct timespec64 now;
+
+ if (ia_valid & ATTR_CTIME) {
+ /*
+ * In the case of an update for a write delegation, we must respect
+ * the value in ia_ctime and not use the current time.
+ */
+ if (ia_valid & ATTR_DELEG)
+ now = inode_set_ctime_deleg(inode, attr->ia_ctime);
+ else
+ now = inode_set_ctime_current(inode);
+ } else {
+ /* If ATTR_CTIME isn't set, then ATTR_MTIME shouldn't be either. */
+ WARN_ON_ONCE(ia_valid & ATTR_MTIME);
+ now = current_time(inode);
+ }
+
+ if (ia_valid & ATTR_ATIME_SET)
+ inode_set_atime_to_ts(inode, attr->ia_atime);
+ else if (ia_valid & ATTR_ATIME)
+ inode_set_atime_to_ts(inode, now);
+
+ if (ia_valid & ATTR_MTIME_SET)
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
+ else if (ia_valid & ATTR_MTIME)
+ inode_set_mtime_to_ts(inode, now);
+}
+
+/**
* setattr_copy - copy simple metadata updates into the generic inode
* @idmap: idmap of the mount the inode was found from
* @inode: the inode to be updated
@@ -303,12 +344,6 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
i_uid_update(idmap, attr, inode);
i_gid_update(idmap, attr, inode);
- if (ia_valid & ATTR_ATIME)
- inode_set_atime_to_ts(inode, attr->ia_atime);
- if (ia_valid & ATTR_MTIME)
- inode_set_mtime_to_ts(inode, attr->ia_mtime);
- if (ia_valid & ATTR_CTIME)
- inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
if (!in_group_or_capable(idmap, inode,
@@ -316,6 +351,20 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
mode &= ~S_ISGID;
inode->i_mode = mode;
}
+
+ if (is_mgtime(inode))
+ return setattr_copy_mgtime(inode, attr);
+
+ if (ia_valid & ATTR_ATIME)
+ inode_set_atime_to_ts(inode, attr->ia_atime);
+ if (ia_valid & ATTR_MTIME)
+ inode_set_mtime_to_ts(inode, attr->ia_mtime);
+ if (ia_valid & ATTR_CTIME) {
+ if (ia_valid & ATTR_DELEG)
+ inode_set_ctime_deleg(inode, attr->ia_ctime);
+ else
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
+ }
}
EXPORT_SYMBOL(setattr_copy);
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index f011e026358e..6d57efbb8110 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -110,6 +110,7 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
*/
static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
{
+ unsigned int inr = _IOC_NR(cmd);
int err;
err = check_dev_ioctl_version(cmd, param);
@@ -133,7 +134,7 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
* check_name() return for AUTOFS_DEV_IOCTL_TIMEOUT_CMD.
*/
err = check_name(param->path);
- if (cmd == AUTOFS_DEV_IOCTL_TIMEOUT_CMD)
+ if (inr == AUTOFS_DEV_IOCTL_TIMEOUT_CMD)
err = err ? 0 : -EINVAL;
if (err) {
pr_warn("invalid path supplied for cmd(0x%08x)\n",
@@ -141,8 +142,6 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
goto out;
}
} else {
- unsigned int inr = _IOC_NR(cmd);
-
if (inr == AUTOFS_DEV_IOCTL_OPENMOUNT_CMD ||
inr == AUTOFS_DEV_IOCTL_REQUESTER_CMD ||
inr == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) {
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h
index f8e87c6721b1..163a67b97a40 100644
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -168,6 +168,9 @@ static inline bool data_type_movable(enum bch_data_type type)
static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
struct bch_dev *ca)
{
+ if (a.data_type >= BCH_DATA_NR)
+ return 0;
+
if (!data_type_movable(a.data_type) ||
!bch2_bucket_sectors_fragmented(ca, a))
return 0;
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c
index 5836870ab882..372178c8d416 100644
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -162,6 +162,10 @@ static void open_bucket_free_unused(struct bch_fs *c, struct open_bucket *ob)
ARRAY_SIZE(c->open_buckets_partial));
spin_lock(&c->freelist_lock);
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets++;
+ rcu_read_unlock();
+
ob->on_partial_list = true;
c->open_buckets_partial[c->open_buckets_partial_nr++] =
ob - c->open_buckets;
@@ -972,7 +976,7 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
u64 avail;
bch2_dev_usage_read_fast(ca, &usage);
- avail = dev_buckets_free(ca, usage, watermark);
+ avail = dev_buckets_free(ca, usage, watermark) + ca->nr_partial_buckets;
if (!avail)
continue;
@@ -981,6 +985,10 @@ static int bucket_alloc_set_partial(struct bch_fs *c,
i);
ob->on_partial_list = false;
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
+ rcu_read_unlock();
+
ret = add_new_bucket(c, ptrs, devs_may_alloc,
nr_replicas, nr_effective,
have_cache, ob);
@@ -1191,7 +1199,13 @@ void bch2_open_buckets_stop(struct bch_fs *c, struct bch_dev *ca,
--c->open_buckets_partial_nr;
swap(c->open_buckets_partial[i],
c->open_buckets_partial[c->open_buckets_partial_nr]);
+
ob->on_partial_list = false;
+
+ rcu_read_lock();
+ bch2_dev_rcu(c, ob->dev)->nr_partial_buckets--;
+ rcu_read_unlock();
+
spin_unlock(&c->freelist_lock);
bch2_open_bucket_put(c, ob);
spin_lock(&c->freelist_lock);
@@ -1610,8 +1624,7 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c,
ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
ob++) {
spin_lock(&ob->lock);
- if (ob->valid && !ob->on_partial_list &&
- (!ca || ob->dev == ca->dev_idx))
+ if (ob->valid && (!ca || ob->dev == ca->dev_idx))
bch2_open_bucket_to_text(out, c, ob);
spin_unlock(&ob->lock);
}
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 47455a85c909..654a58132a4d 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -52,6 +52,12 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
enum bch_validate_flags flags)
{
struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
+ int ret = 0;
+
+ bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH,
+ c, backpointer_level_bad,
+ "backpointer level bad: %u >= %u",
+ bp.v->level, BTREE_MAX_DEPTH);
rcu_read_lock();
struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode);
@@ -64,7 +70,6 @@ int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k,
struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p);
struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset);
rcu_read_unlock();
- int ret = 0;
bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
!bpos_eq(bp.k->p, bp_pos),
@@ -947,9 +952,13 @@ int bch2_check_extents_to_backpointers(struct bch_fs *c)
static int check_one_backpointer(struct btree_trans *trans,
struct bbpos start,
struct bbpos end,
- struct bkey_s_c_backpointer bp,
+ struct bkey_s_c bp_k,
struct bkey_buf *last_flushed)
{
+ if (bp_k.k->type != KEY_TYPE_backpointer)
+ return 0;
+
+ struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k);
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bbpos pos = bp_to_bbpos(*bp.v);
@@ -1004,9 +1013,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans,
POS_MIN, BTREE_ITER_prefetch, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
progress_update_iter(trans, &progress, &iter, "backpointers_to_extents");
- check_one_backpointer(trans, start, end,
- bkey_s_c_to_backpointer(k),
- &last_flushed);
+ check_one_backpointer(trans, start, end, k, &last_flushed);
}));
bch2_bkey_buf_exit(&last_flushed, c);
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index f4151ee51b03..e94a83b8113e 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -555,6 +555,7 @@ struct bch_dev {
u64 alloc_cursor[3];
unsigned nr_open_buckets;
+ unsigned nr_partial_buckets;
unsigned nr_btree_reserve;
size_t inc_gen_needs_gc;
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index 587d7318a2e8..995ba32e9b6e 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -643,7 +643,7 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
enum bch_validate_flags flags,
struct printbuf *err)
{
- unsigned i, bits = KEY_PACKED_BITS_START;
+ unsigned bits = KEY_PACKED_BITS_START;
if (f->nr_fields != BKEY_NR_FIELDS) {
prt_printf(err, "incorrect number of fields: got %u, should be %u",
@@ -655,9 +655,8 @@ int bch2_bkey_format_invalid(struct bch_fs *c,
* Verify that the packed format can't represent fields larger than the
* unpacked format:
*/
- for (i = 0; i < f->nr_fields; i++) {
- if ((!c || c->sb.version_min >= bcachefs_metadata_version_snapshot) &&
- bch2_bkey_format_field_overflows(f, i)) {
+ for (unsigned i = 0; i < f->nr_fields; i++) {
+ if (bch2_bkey_format_field_overflows(f, i)) {
unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
u64 unpacked_max = ~((~0ULL << 1) << (unpacked_bits - 1));
unsigned packed_bits = min(64, f->bits_per_field[i]);
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 6e4afb2b5441..7123019ab3bc 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -59,16 +59,38 @@ static inline size_t btree_cache_can_free(struct btree_cache_list *list)
static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b)
{
+ BUG_ON(!list_empty(&b->list));
+
if (b->c.lock.readers)
- list_move(&b->list, &bc->freed_pcpu);
+ list_add(&b->list, &bc->freed_pcpu);
else
- list_move(&b->list, &bc->freed_nonpcpu);
+ list_add(&b->list, &bc->freed_nonpcpu);
+}
+
+static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b)
+{
+ BUG_ON(!list_empty(&b->list));
+ BUG_ON(!b->data);
+
+ bc->nr_freeable++;
+ list_add(&b->list, &bc->freeable);
}
-static void btree_node_data_free(struct bch_fs *c, struct btree *b)
+void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
{
struct btree_cache *bc = &c->btree_cache;
+ mutex_lock(&bc->lock);
+ __bch2_btree_node_to_freelist(bc, b);
+ mutex_unlock(&bc->lock);
+
+ six_unlock_write(&b->c.lock);
+ six_unlock_intent(&b->c.lock);
+}
+
+static void __btree_node_data_free(struct btree_cache *bc, struct btree *b)
+{
+ BUG_ON(!list_empty(&b->list));
BUG_ON(btree_node_hashed(b));
/*
@@ -94,11 +116,17 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b)
#endif
b->aux_data = NULL;
- bc->nr_freeable--;
-
btree_node_to_freedlist(bc, b);
}
+static void btree_node_data_free(struct btree_cache *bc, struct btree *b)
+{
+ BUG_ON(list_empty(&b->list));
+ list_del_init(&b->list);
+ --bc->nr_freeable;
+ __btree_node_data_free(bc, b);
+}
+
static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
const void *obj)
{
@@ -174,21 +202,10 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
bch2_btree_lock_init(&b->c, 0);
- bc->nr_freeable++;
- list_add(&b->list, &bc->freeable);
+ __bch2_btree_node_to_freelist(bc, b);
return b;
}
-void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b)
-{
- mutex_lock(&c->btree_cache.lock);
- list_move(&b->list, &c->btree_cache.freeable);
- mutex_unlock(&c->btree_cache.lock);
-
- six_unlock_write(&b->c.lock);
- six_unlock_intent(&b->c.lock);
-}
-
static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b)
{
struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p);
@@ -236,11 +253,11 @@ void bch2_btree_cache_unpin(struct bch_fs *c)
/* Btree in memory cache - hash table */
-void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
+void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
{
lockdep_assert_held(&bc->lock);
- int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
+ int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params);
BUG_ON(ret);
/* Cause future lookups for this node to fail: */
@@ -248,17 +265,22 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
if (b->c.btree_id < BTREE_ID_NR)
--bc->nr_by_btree[b->c.btree_id];
+ --bc->live[btree_node_pinned(b)].nr;
+ list_del_init(&b->list);
+}
- bc->live[btree_node_pinned(b)].nr--;
- bc->nr_freeable++;
- list_move(&b->list, &bc->freeable);
+void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b)
+{
+ __bch2_btree_node_hash_remove(bc, b);
+ __bch2_btree_node_to_freelist(bc, b);
}
int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
{
+ BUG_ON(!list_empty(&b->list));
BUG_ON(b->hash_val);
- b->hash_val = btree_ptr_hash_val(&b->key);
+ b->hash_val = btree_ptr_hash_val(&b->key);
int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash,
bch_btree_cache_params);
if (ret)
@@ -270,10 +292,8 @@ int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b)
bool p = __btree_node_pinned(bc, b);
mod_bit(BTREE_NODE_pinned, &b->flags, p);
- list_move_tail(&b->list, &bc->live[p].list);
+ list_add_tail(&b->list, &bc->live[p].list);
bc->live[p].nr++;
-
- bc->nr_freeable--;
return 0;
}
@@ -485,7 +505,7 @@ static unsigned long bch2_btree_cache_scan(struct shrinker *shrink,
goto out;
if (!btree_node_reclaim(c, b, true)) {
- btree_node_data_free(c, b);
+ btree_node_data_free(bc, b);
six_unlock_write(&b->c.lock);
six_unlock_intent(&b->c.lock);
freed++;
@@ -501,10 +521,10 @@ restart:
bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++;
--touched;;
} else if (!btree_node_reclaim(c, b, true)) {
- bch2_btree_node_hash_remove(bc, b);
+ __bch2_btree_node_hash_remove(bc, b);
+ __btree_node_data_free(bc, b);
freed++;
- btree_node_data_free(c, b);
bc->nr_freed++;
six_unlock_write(&b->c.lock);
@@ -587,7 +607,7 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c)
BUG_ON(btree_node_read_in_flight(b) ||
btree_node_write_in_flight(b));
- btree_node_data_free(c, b);
+ btree_node_data_free(bc, b);
}
BUG_ON(!bch2_journal_error(&c->journal) &&
@@ -786,8 +806,8 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
BUG_ON(!six_trylock_intent(&b->c.lock));
BUG_ON(!six_trylock_write(&b->c.lock));
-got_node:
+got_node:
/*
* btree_free() doesn't free memory; it sticks the node on the end of
* the list. Check if there's any freed nodes there:
@@ -796,7 +816,12 @@ got_node:
if (!btree_node_reclaim(c, b2, false)) {
swap(b->data, b2->data);
swap(b->aux_data, b2->aux_data);
+
+ list_del_init(&b2->list);
+ --bc->nr_freeable;
btree_node_to_freedlist(bc, b2);
+ mutex_unlock(&bc->lock);
+
six_unlock_write(&b2->c.lock);
six_unlock_intent(&b2->c.lock);
goto got_mem;
@@ -810,11 +835,8 @@ got_node:
goto err;
}
- mutex_lock(&bc->lock);
- bc->nr_freeable++;
got_mem:
- mutex_unlock(&bc->lock);
-
+ BUG_ON(!list_empty(&b->list));
BUG_ON(btree_node_hashed(b));
BUG_ON(btree_node_dirty(b));
BUG_ON(btree_node_write_in_flight(b));
@@ -845,7 +867,7 @@ err:
if (bc->alloc_lock == current) {
b2 = btree_node_cannibalize(c);
clear_btree_node_just_written(b2);
- bch2_btree_node_hash_remove(bc, b2);
+ __bch2_btree_node_hash_remove(bc, b2);
if (b) {
swap(b->data, b2->data);
@@ -855,9 +877,9 @@ err:
six_unlock_intent(&b2->c.lock);
} else {
b = b2;
- list_del_init(&b->list);
}
+ BUG_ON(!list_empty(&b->list));
mutex_unlock(&bc->lock);
trace_and_count(c, btree_cache_cannibalize, trans);
@@ -936,7 +958,7 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
b->hash_val = 0;
mutex_lock(&bc->lock);
- list_add(&b->list, &bc->freeable);
+ __bch2_btree_node_to_freelist(bc, b);
mutex_unlock(&bc->lock);
six_unlock_write(&b->c.lock);
@@ -1312,9 +1334,12 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
b = bch2_btree_node_fill(trans, path, k, btree_id,
level, SIX_LOCK_read, false);
- if (!IS_ERR_OR_NULL(b))
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ return ret;
+ if (b)
six_unlock_read(&b->c.lock);
- return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
+ return 0;
}
void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1353,7 +1378,7 @@ wait_on_io:
mutex_lock(&bc->lock);
bch2_btree_node_hash_remove(bc, b);
- btree_node_data_free(c, b);
+ btree_node_data_free(bc, b);
mutex_unlock(&bc->lock);
out:
six_unlock_write(&b->c.lock);
diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h
index 367acd217c6a..66e86d1a178d 100644
--- a/fs/bcachefs/btree_cache.h
+++ b/fs/bcachefs/btree_cache.h
@@ -14,7 +14,9 @@ void bch2_recalc_btree_reserve(struct bch_fs *);
void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *);
+void __bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *);
+
int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *);
int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *,
unsigned, enum btree_id);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 0ca3feeb42c8..81dcf9e512c0 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -182,7 +182,7 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
bch2_btree_node_drop_keys_outside_node(b);
mutex_lock(&c->btree_cache.lock);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
+ __bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, &new->k_i);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 6296a11ccb09..839d68802e42 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -733,11 +733,8 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
c, ca, b, i, NULL,
bset_past_end_of_btree_node,
"bset past end of btree node (offset %u len %u but written %zu)",
- offset, sectors, ptr_written ?: btree_sectors(c))) {
+ offset, sectors, ptr_written ?: btree_sectors(c)))
i->u64s = 0;
- ret = 0;
- goto out;
- }
btree_err_on(offset && !i->u64s,
-BCH_ERR_btree_node_read_err_fixable,
@@ -829,7 +826,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
BSET_BIG_ENDIAN(i), write,
&bn->format);
}
-out:
fsck_err:
printbuf_exit(&buf2);
printbuf_exit(&buf1);
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 0883cf6e1a3e..eef9b89c561d 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -882,6 +882,18 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
__bch2_btree_and_journal_iter_init_node_iter(trans, &jiter, l->b, l->iter, path->pos);
k = bch2_btree_and_journal_iter_peek(&jiter);
+ if (!k.k) {
+ struct printbuf buf = PRINTBUF;
+
+ prt_str(&buf, "node not found at pos ");
+ bch2_bpos_to_text(&buf, path->pos);
+ prt_str(&buf, " at btree ");
+ bch2_btree_pos_to_text(&buf, c, l->b);
+
+ ret = bch2_fs_topology_error(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ goto err;
+ }
bch2_bkey_buf_reassemble(out, c, k);
@@ -889,6 +901,7 @@ static noinline int btree_node_iter_and_journal_peek(struct btree_trans *trans,
c->opts.btree_node_prefetch)
ret = btree_path_prefetch_j(trans, path, &jiter);
+err:
bch2_btree_and_journal_iter_exit(&jiter);
return ret;
}
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index a7aedb134e9f..30131c3bdd97 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -186,7 +186,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
.ptrs[0].type = 1 << BCH_EXTENT_ENTRY_ptr,
.ptrs[0].offset = offset,
.ptrs[0].dev = ca->dev_idx,
- .ptrs[0].gen = *bucket_gen(ca, sector_to_bucket(ca, offset)),
+ .ptrs[0].gen = bucket_gen_get(ca, sector_to_bucket(ca, offset)),
};
rcu_read_unlock();
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 64f0928e1137..d596ef93239f 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -237,10 +237,6 @@ static void __btree_node_free(struct btree_trans *trans, struct btree *b)
BUG_ON(b->will_make_reachable);
clear_btree_node_noevict(b);
-
- mutex_lock(&c->btree_cache.lock);
- list_move(&b->list, &c->btree_cache.freeable);
- mutex_unlock(&c->btree_cache.lock);
}
static void bch2_btree_node_free_inmem(struct btree_trans *trans,
@@ -252,12 +248,12 @@ static void bch2_btree_node_free_inmem(struct btree_trans *trans,
bch2_btree_node_lock_write_nofail(trans, path, &b->c);
+ __btree_node_free(trans, b);
+
mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, b);
mutex_unlock(&c->btree_cache.lock);
- __btree_node_free(trans, b);
-
six_unlock_write(&b->c.lock);
mark_btree_node_locked_noreset(path, level, BTREE_NODE_INTENT_LOCKED);
@@ -289,7 +285,7 @@ static void bch2_btree_node_free_never_used(struct btree_update *as,
clear_btree_node_need_write(b);
mutex_lock(&c->btree_cache.lock);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
+ __bch2_btree_node_hash_remove(&c->btree_cache, b);
mutex_unlock(&c->btree_cache.lock);
BUG_ON(p->nr >= ARRAY_SIZE(p->b));
@@ -521,8 +517,7 @@ static void bch2_btree_reserve_put(struct btree_update *as, struct btree_trans *
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
__btree_node_free(trans, b);
- six_unlock_write(&b->c.lock);
- six_unlock_intent(&b->c.lock);
+ bch2_btree_node_to_freelist(c, b);
}
}
}
@@ -1434,6 +1429,15 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
}
}
+static bool key_deleted_in_insert(struct keylist *insert_keys, struct bpos pos)
+{
+ if (insert_keys)
+ for_each_keylist_key(insert_keys, k)
+ if (bkey_deleted(&k->k) && bpos_eq(k->k.p, pos))
+ return true;
+ return false;
+}
+
/*
* Move keys from n1 (original replacement node, now lower node) to n2 (higher
* node)
@@ -1441,7 +1445,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as,
static void __btree_split_node(struct btree_update *as,
struct btree_trans *trans,
struct btree *b,
- struct btree *n[2])
+ struct btree *n[2],
+ struct keylist *insert_keys)
{
struct bkey_packed *k;
struct bpos n1_pos = POS_MIN;
@@ -1476,7 +1481,8 @@ static void __btree_split_node(struct btree_update *as,
if (b->c.level &&
u64s < n1_u64s &&
u64s + k->u64s >= n1_u64s &&
- bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
+ (bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p) ||
+ key_deleted_in_insert(insert_keys, uk.p)))
n1_u64s += k->u64s;
i = u64s >= n1_u64s;
@@ -1603,7 +1609,7 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
n[0] = n1 = bch2_btree_node_alloc(as, trans, b->c.level);
n[1] = n2 = bch2_btree_node_alloc(as, trans, b->c.level);
- __btree_split_node(as, trans, b, n);
+ __btree_split_node(as, trans, b, n, keys);
if (keys) {
btree_split_insert_keys(as, trans, path, n1, keys);
@@ -2392,7 +2398,8 @@ static int __bch2_btree_node_update_key(struct btree_trans *trans,
if (new_hash) {
mutex_lock(&c->btree_cache.lock);
bch2_btree_node_hash_remove(&c->btree_cache, new_hash);
- bch2_btree_node_hash_remove(&c->btree_cache, b);
+
+ __bch2_btree_node_hash_remove(&c->btree_cache, b);
bkey_copy(&b->key, new_key);
ret = __bch2_btree_node_hash_insert(&c->btree_cache, b);
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index 3f56b584f8ec..1639c60dffa0 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -277,6 +277,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
bool accounting_replay_done = test_bit(BCH_FS_accounting_replay_done, &c->flags);
int ret = 0;
+ ret = bch2_journal_error(&c->journal);
+ if (ret)
+ return ret;
+
bch2_trans_unlock(trans);
bch2_trans_begin(trans);
@@ -491,7 +495,8 @@ static int fetch_wb_keys_from_journal(struct bch_fs *c, u64 seq)
return ret;
}
-static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq)
+static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq,
+ bool *did_work)
{
struct bch_fs *c = trans->c;
struct btree_write_buffer *wb = &c->btree_write_buffer;
@@ -502,6 +507,8 @@ static int btree_write_buffer_flush_seq(struct btree_trans *trans, u64 seq)
fetch_from_journal_err = fetch_wb_keys_from_journal(c, seq);
+ *did_work |= wb->inc.keys.nr || wb->flushing.keys.nr;
+
/*
* On memory allocation failure, bch2_btree_write_buffer_flush_locked()
* is not guaranteed to empty wb->inc:
@@ -521,17 +528,34 @@ static int bch2_btree_write_buffer_journal_flush(struct journal *j,
struct journal_entry_pin *_pin, u64 seq)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ bool did_work = false;
- return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq));
+ return bch2_trans_run(c, btree_write_buffer_flush_seq(trans, seq, &did_work));
}
int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
+ bool did_work = false;
trace_and_count(c, write_buffer_flush_sync, trans, _RET_IP_);
- return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal));
+ return btree_write_buffer_flush_seq(trans, journal_cur_seq(&c->journal), &did_work);
+}
+
+/*
+ * The write buffer requires flushing when going RO: keys in the journal for the
+ * write buffer don't have a journal pin yet
+ */
+bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *c)
+{
+ if (bch2_journal_error(&c->journal))
+ return false;
+
+ bool did_work = false;
+ bch2_trans_run(c, btree_write_buffer_flush_seq(trans,
+ journal_cur_seq(&c->journal), &did_work));
+ return did_work;
}
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *trans)
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index 725e79654216..d535cea28bde 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -21,6 +21,7 @@ static inline bool bch2_btree_write_buffer_must_wait(struct bch_fs *c)
struct btree_trans;
int bch2_btree_write_buffer_flush_sync(struct btree_trans *);
+bool bch2_btree_write_buffer_flush_going_ro(struct bch_fs *);
int bch2_btree_write_buffer_flush_nocheck_rw(struct btree_trans *);
int bch2_btree_write_buffer_tryflush(struct btree_trans *);
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index fd5e6ccad45e..ccc78bfe2fd4 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -103,12 +103,18 @@ static inline u8 *bucket_gen(struct bch_dev *ca, size_t b)
return gens->b + b;
}
-static inline u8 bucket_gen_get(struct bch_dev *ca, size_t b)
+static inline int bucket_gen_get_rcu(struct bch_dev *ca, size_t b)
+{
+ u8 *gen = bucket_gen(ca, b);
+ return gen ? *gen : -1;
+}
+
+static inline int bucket_gen_get(struct bch_dev *ca, size_t b)
{
rcu_read_lock();
- u8 gen = *bucket_gen(ca, b);
+ int ret = bucket_gen_get_rcu(ca, b);
rcu_read_unlock();
- return gen;
+ return ret;
}
static inline size_t PTR_BUCKET_NR(const struct bch_dev *ca,
@@ -169,10 +175,8 @@ static inline int gen_after(u8 a, u8 b)
static inline int dev_ptr_stale_rcu(struct bch_dev *ca, const struct bch_extent_ptr *ptr)
{
- u8 *gen = bucket_gen(ca, PTR_BUCKET_NR(ca, ptr));
- if (!gen)
- return -1;
- return gen_after(*gen, ptr->gen);
+ int gen = bucket_gen_get_rcu(ca, PTR_BUCKET_NR(ca, ptr));
+ return gen < 0 ? gen : gen_after(gen, ptr->gen);
}
/**
@@ -184,7 +188,6 @@ static inline int dev_ptr_stale(struct bch_dev *ca, const struct bch_extent_ptr
rcu_read_lock();
int ret = dev_ptr_stale_rcu(ca, ptr);
rcu_read_unlock();
-
return ret;
}
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index a6ee0beee6b0..8e75a852b358 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -236,7 +236,8 @@ static int __bch2_data_update_index_update(struct btree_trans *trans,
if (((1U << i) & m->data_opts.rewrite_ptrs) &&
(ptr = bch2_extent_has_ptr(old, p, bkey_i_to_s(insert))) &&
!ptr->cached) {
- bch2_extent_ptr_set_cached(bkey_i_to_s(insert), ptr);
+ bch2_extent_ptr_set_cached(c, &m->op.opts,
+ bkey_i_to_s(insert), ptr);
rewrites_found |= 1U << i;
}
i++;
@@ -284,7 +285,8 @@ restart_drop_extra_replicas:
durability - ptr_durability >= m->op.opts.data_replicas) {
durability -= ptr_durability;
- bch2_extent_ptr_set_cached(bkey_i_to_s(insert), &entry->ptr);
+ bch2_extent_ptr_set_cached(c, &m->op.opts,
+ bkey_i_to_s(insert), &entry->ptr);
goto restart_drop_extra_replicas;
}
}
@@ -295,7 +297,7 @@ restart_drop_extra_replicas:
bch2_extent_ptr_decoded_append(insert, &p);
bch2_bkey_narrow_crcs(insert, (struct bch_extent_crc_unpacked) { 0 });
- bch2_extent_normalize(c, bkey_i_to_s(insert));
+ bch2_extent_normalize_by_opts(c, &m->op.opts, bkey_i_to_s(insert));
ret = bch2_sum_sector_overwrites(trans, &iter, insert,
&should_check_enospc,
@@ -558,7 +560,8 @@ void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
int bch2_extent_drop_ptrs(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
- struct data_update_opts data_opts)
+ struct bch_io_opts *io_opts,
+ struct data_update_opts *data_opts)
{
struct bch_fs *c = trans->c;
struct bkey_i *n;
@@ -569,11 +572,11 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
if (ret)
return ret;
- while (data_opts.kill_ptrs) {
- unsigned i = 0, drop = __fls(data_opts.kill_ptrs);
+ while (data_opts->kill_ptrs) {
+ unsigned i = 0, drop = __fls(data_opts->kill_ptrs);
bch2_bkey_drop_ptrs_noerror(bkey_i_to_s(n), ptr, i++ == drop);
- data_opts.kill_ptrs ^= 1U << drop;
+ data_opts->kill_ptrs ^= 1U << drop;
}
/*
@@ -581,7 +584,7 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
* will do the appropriate thing with it (turning it into a
* KEY_TYPE_error key, or just a discard if it was a cached extent)
*/
- bch2_extent_normalize(c, bkey_i_to_s(n));
+ bch2_extent_normalize_by_opts(c, io_opts, bkey_i_to_s(n));
/*
* Since we're not inserting through an extent iterator
@@ -720,7 +723,7 @@ int bch2_data_update_init(struct btree_trans *trans,
m->data_opts.rewrite_ptrs = 0;
/* if iter == NULL, it's just a promote */
if (iter)
- ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts);
+ ret = bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &m->data_opts);
goto out;
}
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 8d36365bdea8..e4b50723428e 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -40,7 +40,8 @@ void bch2_data_update_read_done(struct data_update *,
int bch2_extent_drop_ptrs(struct btree_trans *,
struct btree_iter *,
struct bkey_s_c,
- struct data_update_opts);
+ struct bch_io_opts *,
+ struct data_update_opts *);
void bch2_data_update_exit(struct data_update *);
int bch2_data_update_init(struct btree_trans *, struct btree_iter *,
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index a0aa5bb467d9..749dcf368841 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1870,6 +1870,10 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
}
h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
+ if (!h) {
+ h = ERR_PTR(-BCH_ERR_ENOMEM_stripe_head_alloc);
+ goto err;
+ }
found:
if (h->rw_devs_change_count != c->rw_devs_change_count)
ec_stripe_head_devs_update(c, h);
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index 649263516ab1..9c4fe5cdbfb7 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -83,6 +83,8 @@
x(ENOMEM, ENOMEM_fs_other_alloc) \
x(ENOMEM, ENOMEM_dev_alloc) \
x(ENOMEM, ENOMEM_disk_accounting) \
+ x(ENOMEM, ENOMEM_stripe_head_alloc) \
+ x(ENOMEM, ENOMEM_journal_read_bucket) \
x(ENOSPC, ENOSPC_disk_reservation) \
x(ENOSPC, ENOSPC_bucket_alloc) \
x(ENOSPC, ENOSPC_disk_label_add) \
@@ -222,6 +224,7 @@
x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_type) \
x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_nr_superblocks) \
x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_superblocks_overlap) \
+ x(BCH_ERR_invalid_sb_layout, invalid_sb_layout_sb_max_size_bits) \
x(BCH_ERR_invalid_sb, invalid_sb_members_missing) \
x(BCH_ERR_invalid_sb, invalid_sb_members) \
x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index cc0d22085aef..37e3d69bec06 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -978,31 +978,54 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke
return NULL;
}
-void bch2_extent_ptr_set_cached(struct bkey_s k, struct bch_extent_ptr *ptr)
+static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
+ struct bch_extent_ptr *ptr)
+{
+ if (!opts->promote_target ||
+ !bch2_dev_in_target(c, ptr->dev, opts->promote_target))
+ return false;
+
+ struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
+
+ return ca && bch2_dev_is_readable(ca) && !dev_ptr_stale_rcu(ca, ptr);
+}
+
+void bch2_extent_ptr_set_cached(struct bch_fs *c,
+ struct bch_io_opts *opts,
+ struct bkey_s k,
+ struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry;
- union bch_extent_entry *ec = NULL;
+ struct extent_ptr_decoded p;
- bkey_extent_entry_for_each(ptrs, entry) {
+ rcu_read_lock();
+ if (!want_cached_ptr(c, opts, ptr)) {
+ bch2_bkey_drop_ptr_noerror(k, ptr);
+ goto out;
+ }
+
+ /*
+ * Stripes can't contain cached data, for - reasons.
+ *
+ * Possibly something we can fix in the future?
+ */
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
if (&entry->ptr == ptr) {
- ptr->cached = true;
- if (ec)
- extent_entry_drop(k, ec);
- return;
+ if (p.has_ec)
+ bch2_bkey_drop_ptr_noerror(k, ptr);
+ else
+ ptr->cached = true;
+ goto out;
}
- if (extent_entry_is_stripe_ptr(entry))
- ec = entry;
- else if (extent_entry_is_ptr(entry))
- ec = NULL;
- }
-
BUG();
+out:
+ rcu_read_unlock();
}
/*
- * bch_extent_normalize - clean up an extent, dropping stale pointers etc.
+ * bch2_extent_normalize - clean up an extent, dropping stale pointers etc.
*
* Returns true if @k should be dropped entirely
*
@@ -1016,8 +1039,39 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
rcu_read_lock();
bch2_bkey_drop_ptrs(k, ptr,
ptr->cached &&
- (ca = bch2_dev_rcu(c, ptr->dev)) &&
- dev_ptr_stale_rcu(ca, ptr) > 0);
+ (!(ca = bch2_dev_rcu(c, ptr->dev)) ||
+ dev_ptr_stale_rcu(ca, ptr) > 0));
+ rcu_read_unlock();
+
+ return bkey_deleted(k.k);
+}
+
+/*
+ * bch2_extent_normalize_by_opts - clean up an extent, dropping stale pointers etc.
+ *
+ * Like bch2_extent_normalize(), but also only keeps a single cached pointer on
+ * the promote target.
+ */
+bool bch2_extent_normalize_by_opts(struct bch_fs *c,
+ struct bch_io_opts *opts,
+ struct bkey_s k)
+{
+ struct bkey_ptrs ptrs;
+ bool have_cached_ptr;
+
+ rcu_read_lock();
+restart_drop_ptrs:
+ ptrs = bch2_bkey_ptrs(k);
+ have_cached_ptr = false;
+
+ bkey_for_each_ptr(ptrs, ptr)
+ if (ptr->cached) {
+ if (have_cached_ptr || !want_cached_ptr(c, opts, ptr)) {
+ bch2_bkey_drop_ptr(k, ptr);
+ goto restart_drop_ptrs;
+ }
+ have_cached_ptr = true;
+ }
rcu_read_unlock();
return bkey_deleted(k.k);
@@ -1310,7 +1364,7 @@ void bch2_ptr_swab(struct bkey_s k)
for (entry = ptrs.start;
entry < ptrs.end;
entry = extent_entry_next(entry)) {
- switch (extent_entry_type(entry)) {
+ switch (__extent_entry_type(entry)) {
case BCH_EXTENT_ENTRY_ptr:
break;
case BCH_EXTENT_ENTRY_crc32:
@@ -1330,6 +1384,9 @@ void bch2_ptr_swab(struct bkey_s k)
break;
case BCH_EXTENT_ENTRY_rebalance:
break;
+ default:
+ /* Bad entry type: will be caught by validate() */
+ return;
}
}
}
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 923a5f1849a8..bcffcf60aaaf 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -686,9 +686,12 @@ bool bch2_extents_match(struct bkey_s_c, struct bkey_s_c);
struct bch_extent_ptr *
bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
-void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
+void bch2_extent_ptr_set_cached(struct bch_fs *, struct bch_io_opts *,
+ struct bkey_s, struct bch_extent_ptr *);
+bool bch2_extent_normalize_by_opts(struct bch_fs *, struct bch_io_opts *, struct bkey_s);
bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+
void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
struct bkey_s_c);
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 15d3f073b824..2456c41b215e 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -587,7 +587,7 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
POS(inode->v.i_ino, start_sector),
BTREE_ITER_slots|BTREE_ITER_intent);
- while (!ret && bkey_lt(iter.pos, end_pos)) {
+ while (!ret) {
s64 i_sectors_delta = 0;
struct quota_res quota_res = { 0 };
struct bkey_s_c k;
@@ -598,6 +598,9 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
bch2_trans_begin(trans);
+ if (bkey_ge(iter.pos, end_pos))
+ break;
+
ret = bch2_subvolume_get_snapshot(trans,
inode->ei_inum.subvol, &snapshot);
if (ret)
@@ -634,12 +637,15 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
if (bch2_clamp_data_hole(&inode->v,
&hole_start,
&hole_end,
- opts.data_replicas, true))
+ opts.data_replicas, true)) {
ret = drop_locks_do(trans,
(bch2_clamp_data_hole(&inode->v,
&hole_start,
&hole_end,
opts.data_replicas, false), 0));
+ if (ret)
+ goto bkey_err;
+ }
bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
if (ret)
@@ -667,10 +673,13 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
bch2_i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
if (bch2_mark_pagecache_reserved(inode, &hole_start,
- iter.pos.offset, true))
- drop_locks_do(trans,
+ iter.pos.offset, true)) {
+ ret = drop_locks_do(trans,
bch2_mark_pagecache_reserved(inode, &hole_start,
iter.pos.offset, false));
+ if (ret)
+ goto bkey_err;
+ }
bkey_err:
bch2_quota_reservation_put(c, inode, &quota_res);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index fc246f342820..b3b934a87c6d 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -262,7 +262,8 @@ err:
bio_free_pages(&(*rbio)->bio);
kfree(*rbio);
*rbio = NULL;
- kfree(op);
+ /* We may have added to the rhashtable and thus need rcu freeing: */
+ kfree_rcu(op, rcu);
bch2_write_ref_put(c, BCH_WRITE_REF_promote);
return ERR_PTR(ret);
}
@@ -802,16 +803,15 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
PTR_BUCKET_POS(ca, &ptr),
BTREE_ITER_cached);
- u8 *gen = bucket_gen(ca, iter.pos.offset);
- if (gen) {
-
+ int gen = bucket_gen_get(ca, iter.pos.offset);
+ if (gen >= 0) {
prt_printf(&buf, "Attempting to read from stale dirty pointer:\n");
printbuf_indent_add(&buf, 2);
bch2_bkey_val_to_text(&buf, c, k);
prt_newline(&buf);
- prt_printf(&buf, "memory gen: %u", *gen);
+ prt_printf(&buf, "memory gen: %u", gen);
ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
if (!ret) {
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index 8609e25e450f..96720adcfee0 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -1300,11 +1300,8 @@ retry:
bucket_to_u64(i->b),
BUCKET_NOCOW_LOCK_UPDATE);
- rcu_read_lock();
- u8 *gen = bucket_gen(ca, i->b.offset);
- stale = !gen ? -1 : gen_after(*gen, i->gen);
- rcu_read_unlock();
-
+ int gen = bucket_gen_get(ca, i->b.offset);
+ stale = gen < 0 ? gen : gen_after(gen, i->gen);
if (unlikely(stale)) {
stale_at = i;
goto err_bucket_stale;
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 954f6a96e0f4..fb35dd336331 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -708,6 +708,9 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs
container_of(entry, struct jset_entry_dev_usage, entry);
unsigned i, nr_types = jset_entry_dev_usage_nr_types(u);
+ if (vstruct_bytes(entry) < sizeof(*u))
+ return;
+
prt_printf(out, "dev=%u", le32_to_cpu(u->dev));
printbuf_indent_add(out, 2);
@@ -1012,6 +1015,8 @@ reread:
nr_bvecs = buf_pages(buf->data, sectors_read << 9);
bio = bio_kmalloc(nr_bvecs, GFP_KERNEL);
+ if (!bio)
+ return -BCH_ERR_ENOMEM_journal_read_bucket;
bio_init(bio, ca->disk_sb.bdev, bio->bi_inline_vecs, nr_bvecs, REQ_OP_READ);
bio->bi_iter.bi_sector = offset;
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 8c456d8b8b99..0ef4a86850bb 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -266,7 +266,7 @@ int bch2_move_extent(struct moving_context *ctxt,
if (!data_opts.rewrite_ptrs &&
!data_opts.extra_replicas) {
if (data_opts.kill_ptrs)
- return bch2_extent_drop_ptrs(trans, iter, k, data_opts);
+ return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
return 0;
}
diff --git a/fs/bcachefs/opts.c b/fs/bcachefs/opts.c
index 6673cbd8bdb9..0e2ee262fbd4 100644
--- a/fs/bcachefs/opts.c
+++ b/fs/bcachefs/opts.c
@@ -226,7 +226,7 @@ const struct bch_option bch2_opt_table[] = {
#define OPT_UINT(_min, _max) .type = BCH_OPT_UINT, \
.min = _min, .max = _max
#define OPT_STR(_choices) .type = BCH_OPT_STR, \
- .min = 0, .max = ARRAY_SIZE(_choices), \
+ .min = 0, .max = ARRAY_SIZE(_choices) - 1, \
.choices = _choices
#define OPT_STR_NOLIMIT(_choices) .type = BCH_OPT_STR, \
.min = 0, .max = U64_MAX, \
@@ -428,7 +428,7 @@ void bch2_opt_to_text(struct printbuf *out,
prt_printf(out, "%lli", v);
break;
case BCH_OPT_STR:
- if (v < opt->min || v >= opt->max - 1)
+ if (v < opt->min || v >= opt->max)
prt_printf(out, "(invalid option %lli)", v);
else if (flags & OPT_SHOW_FULL_LIST)
prt_string_option(out, opt->choices, v);
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 454b5a32dd7f..3c7f941dde39 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -94,11 +94,10 @@ static void bch2_reconstruct_alloc(struct bch_fs *c)
__set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
-
c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
@@ -863,6 +862,13 @@ use_clean:
if (ret)
goto err;
+ /*
+ * Normally set by the appropriate recovery pass: when cleared, this
+ * indicates we're in early recovery and btree updates should be done by
+ * being applied to the journal replay keys. _Must_ be cleared before
+ * multithreaded use:
+ */
+ set_bit(BCH_FS_may_go_rw, &c->flags);
clear_bit(BCH_FS_fsck_running, &c->flags);
/* in case we don't run journal replay, i.e. norecovery mode */
@@ -1002,6 +1008,7 @@ int bch2_fs_initialize(struct bch_fs *c)
struct bch_inode_unpacked root_inode, lostfound_inode;
struct bkey_inode_buf packed_inode;
struct qstr lostfound = QSTR("lost+found");
+ struct bch_member *m;
int ret;
bch_notice(c, "initializing new filesystem");
@@ -1018,6 +1025,14 @@ int bch2_fs_initialize(struct bch_fs *c)
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
bch2_write_super(c);
}
+
+ for_each_member_device(c, ca) {
+ m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
+ SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false);
+ ca->mi = bch2_mi_to_cpu(m);
+ }
+
+ bch2_write_super(c);
mutex_unlock(&c->sb_lock);
c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
index 735b8adc8f9d..dff589ddc984 100644
--- a/fs/bcachefs/recovery_passes.c
+++ b/fs/bcachefs/recovery_passes.c
@@ -27,6 +27,12 @@ const char * const bch2_recovery_passes[] = {
NULL
};
+/* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
+static int bch2_recovery_pass_empty(struct bch_fs *c)
+{
+ return 0;
+}
+
static int bch2_set_may_go_rw(struct bch_fs *c)
{
struct journal_keys *keys = &c->journal_keys;
@@ -221,6 +227,12 @@ int bch2_run_recovery_passes(struct bch_fs *c)
{
int ret = 0;
+ /*
+ * We can't allow set_may_go_rw to be excluded; that would cause us to
+ * use the journal replay keys for updates where it's not expected.
+ */
+ c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
+
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
if (c->opts.recovery_pass_last &&
c->curr_recovery_pass > c->opts.recovery_pass_last)
diff --git a/fs/bcachefs/recovery_passes_types.h b/fs/bcachefs/recovery_passes_types.h
index 9d96c06e365c..94dc20ca2065 100644
--- a/fs/bcachefs/recovery_passes_types.h
+++ b/fs/bcachefs/recovery_passes_types.h
@@ -13,6 +13,7 @@
* must never change:
*/
#define BCH_RECOVERY_PASSES() \
+ x(recovery_pass_empty, 41, PASS_SILENT) \
x(scan_for_btree_nodes, 37, 0) \
x(check_topology, 4, 0) \
x(accounting_read, 39, PASS_ALWAYS) \
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index ae715ff658e8..8767c33c2b51 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -143,6 +143,9 @@ UPGRADE_TABLE()
static int have_stripes(struct bch_fs *c)
{
+ if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b))
+ return 0;
+
return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b);
}
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index 937275d061fe..9feb6739f77a 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -136,7 +136,9 @@ enum bch_fsck_flags {
x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \
x(need_discard_freespace_key_bad, 124, 0) \
+ x(discarding_bucket_not_in_need_discard_btree, 291, 0) \
x(backpointer_bucket_offset_wrong, 125, 0) \
+ x(backpointer_level_bad, 294, 0) \
x(backpointer_to_missing_device, 126, 0) \
x(backpointer_to_missing_alloc, 127, 0) \
x(backpointer_to_missing_ptr, 128, 0) \
@@ -177,7 +179,9 @@ enum bch_fsck_flags {
x(ptr_stripe_redundant, 163, 0) \
x(reservation_key_nr_replicas_invalid, 164, 0) \
x(reflink_v_refcount_wrong, 165, 0) \
+ x(reflink_v_pos_bad, 292, 0) \
x(reflink_p_to_missing_reflink_v, 166, 0) \
+ x(reflink_refcount_underflow, 293, 0) \
x(stripe_pos_bad, 167, 0) \
x(stripe_val_size_bad, 168, 0) \
x(stripe_csum_granularity_bad, 290, 0) \
@@ -302,7 +306,7 @@ enum bch_fsck_flags {
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
- x(MAX, 291, 0)
+ x(MAX, 295, 0)
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index fb08dd680dac..116131f95815 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -163,7 +163,7 @@ static int validate_member(struct printbuf *err,
return -BCH_ERR_invalid_sb_members;
}
- if (m.btree_bitmap_shift >= 64) {
+ if (m.btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX) {
prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift);
return -BCH_ERR_invalid_sb_members;
}
@@ -450,7 +450,7 @@ static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, uns
m->btree_bitmap_shift += resize;
}
- BUG_ON(m->btree_bitmap_shift > 57);
+ BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX);
BUG_ON(end > 64ULL << m->btree_bitmap_shift);
for (unsigned bit = start >> m->btree_bitmap_shift;
diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h
index d727d2dfda08..2adf1221a440 100644
--- a/fs/bcachefs/sb-members_format.h
+++ b/fs/bcachefs/sb-members_format.h
@@ -66,6 +66,12 @@ struct bch_member {
};
/*
+ * btree_allocated_bitmap can represent sector addresses of a u64: it itself has
+ * 64 elements, so 64 - ilog2(64)
+ */
+#define BCH_MI_BTREE_BITMAP_SHIFT_MAX 58
+
+/*
* This limit comes from the bucket_gens array - it's a single allocation, and
* kernel allocation are limited to INT_MAX
*/
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index ce7410d72089..7c71594f6a8b 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -287,6 +287,11 @@ static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out
return -BCH_ERR_invalid_sb_layout_nr_superblocks;
}
+ if (layout->sb_max_size_bits > BCH_SB_LAYOUT_SIZE_BITS_MAX) {
+ prt_printf(out, "Invalid superblock layout: max_size_bits too high");
+ return -BCH_ERR_invalid_sb_layout_sb_max_size_bits;
+ }
+
max_sectors = 1 << layout->sb_max_size_bits;
prev_offset = le64_to_cpu(layout->sb_offset[0]);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 657fd3759e7b..a6ed9a0bf1c7 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -272,6 +272,7 @@ static void __bch2_fs_read_only(struct bch_fs *c)
clean_passes++;
if (bch2_btree_interior_updates_flush(c) ||
+ bch2_btree_write_buffer_flush_going_ro(c) ||
bch2_journal_flush_all_pins(&c->journal) ||
bch2_btree_flush_all_writes(c) ||
seq != atomic64_read(&c->journal.seq)) {
diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
index 315038a0a92d..fb5c1543e52f 100644
--- a/fs/bcachefs/tests.c
+++ b/fs/bcachefs/tests.c
@@ -809,6 +809,11 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname,
unsigned i;
u64 time;
+ if (nr == 0 || nr_threads == 0) {
+ pr_err("nr of iterations or threads is not allowed to be 0");
+ return -EINVAL;
+ }
+
atomic_set(&j.ready, nr_threads);
init_waitqueue_head(&j.ready_wait);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index f92f108840f5..8f430ff8e445 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -11,12 +11,13 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/errno.h>
#include <linux/stat.h>
#include <linux/nls.h>
#include <linux/buffer_head.h>
#include <linux/vfs.h>
-#include <linux/parser.h>
#include <linux/namei.h>
#include <linux/sched.h>
#include <linux/cred.h>
@@ -54,22 +55,20 @@ static int befs_utf2nls(struct super_block *sb, const char *in, int in_len,
static int befs_nls2utf(struct super_block *sb, const char *in, int in_len,
char **out, int *out_len);
static void befs_put_super(struct super_block *);
-static int befs_remount(struct super_block *, int *, char *);
static int befs_statfs(struct dentry *, struct kstatfs *);
static int befs_show_options(struct seq_file *, struct dentry *);
-static int parse_options(char *, struct befs_mount_options *);
static struct dentry *befs_fh_to_dentry(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type);
static struct dentry *befs_fh_to_parent(struct super_block *sb,
struct fid *fid, int fh_len, int fh_type);
static struct dentry *befs_get_parent(struct dentry *child);
+static void befs_free_fc(struct fs_context *fc);
static const struct super_operations befs_sops = {
.alloc_inode = befs_alloc_inode, /* allocate a new inode */
.free_inode = befs_free_inode, /* deallocate an inode */
.put_super = befs_put_super, /* uninit super */
.statfs = befs_statfs, /* statfs */
- .remount_fs = befs_remount,
.show_options = befs_show_options,
};
@@ -672,92 +671,53 @@ static struct dentry *befs_get_parent(struct dentry *child)
}
enum {
- Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err,
+ Opt_uid, Opt_gid, Opt_charset, Opt_debug,
};
-static const match_table_t befs_tokens = {
- {Opt_uid, "uid=%d"},
- {Opt_gid, "gid=%d"},
- {Opt_charset, "iocharset=%s"},
- {Opt_debug, "debug"},
- {Opt_err, NULL}
+static const struct fs_parameter_spec befs_param_spec[] = {
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
+ fsparam_string ("iocharset", Opt_charset),
+ fsparam_flag ("debug", Opt_debug),
+ {}
};
static int
-parse_options(char *options, struct befs_mount_options *opts)
+befs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int option;
- kuid_t uid;
- kgid_t gid;
-
- /* Initialize options */
- opts->uid = GLOBAL_ROOT_UID;
- opts->gid = GLOBAL_ROOT_GID;
- opts->use_uid = 0;
- opts->use_gid = 0;
- opts->iocharset = NULL;
- opts->debug = 0;
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
-
- if (!*p)
- continue;
-
- token = match_token(p, befs_tokens, args);
- switch (token) {
- case Opt_uid:
- if (match_int(&args[0], &option))
- return 0;
- uid = INVALID_UID;
- if (option >= 0)
- uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(uid)) {
- pr_err("Invalid uid %d, "
- "using default\n", option);
- break;
- }
- opts->uid = uid;
- opts->use_uid = 1;
- break;
- case Opt_gid:
- if (match_int(&args[0], &option))
- return 0;
- gid = INVALID_GID;
- if (option >= 0)
- gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(gid)) {
- pr_err("Invalid gid %d, "
- "using default\n", option);
- break;
- }
- opts->gid = gid;
- opts->use_gid = 1;
- break;
- case Opt_charset:
- kfree(opts->iocharset);
- opts->iocharset = match_strdup(&args[0]);
- if (!opts->iocharset) {
- pr_err("allocation failure for "
- "iocharset string\n");
- return 0;
- }
- break;
- case Opt_debug:
- opts->debug = 1;
- break;
- default:
- pr_err("Unrecognized mount option \"%s\" "
- "or missing value\n", p);
- return 0;
- }
+ struct befs_mount_options *opts = fc->fs_private;
+ int token;
+ struct fs_parse_result result;
+
+ /* befs ignores all options on remount */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)
+ return 0;
+
+ token = fs_parse(fc, befs_param_spec, param, &result);
+ if (token < 0)
+ return token;
+
+ switch (token) {
+ case Opt_uid:
+ opts->uid = result.uid;
+ opts->use_uid = 1;
+ break;
+ case Opt_gid:
+ opts->gid = result.gid;
+ opts->use_gid = 1;
+ break;
+ case Opt_charset:
+ kfree(opts->iocharset);
+ opts->iocharset = param->string;
+ param->string = NULL;
+ break;
+ case Opt_debug:
+ opts->debug = 1;
+ break;
+ default:
+ return -EINVAL;
}
- return 1;
+ return 0;
}
static int befs_show_options(struct seq_file *m, struct dentry *root)
@@ -793,6 +753,21 @@ befs_put_super(struct super_block *sb)
sb->s_fs_info = NULL;
}
+/*
+ * Copy the parsed options into the sbi mount_options member
+ */
+static void
+befs_set_options(struct befs_sb_info *sbi, struct befs_mount_options *opts)
+{
+ sbi->mount_opts.uid = opts->uid;
+ sbi->mount_opts.gid = opts->gid;
+ sbi->mount_opts.use_uid = opts->use_uid;
+ sbi->mount_opts.use_gid = opts->use_gid;
+ sbi->mount_opts.debug = opts->debug;
+ sbi->mount_opts.iocharset = opts->iocharset;
+ opts->iocharset = NULL;
+}
+
/* Allocate private field of the superblock, fill it.
*
* Finish filling the public superblock fields
@@ -800,7 +775,7 @@ befs_put_super(struct super_block *sb)
* Load a set of NLS translations if needed.
*/
static int
-befs_fill_super(struct super_block *sb, void *data, int silent)
+befs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct buffer_head *bh;
struct befs_sb_info *befs_sb;
@@ -810,6 +785,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
const unsigned long sb_block = 0;
const off_t x86_sb_off = 512;
int blocksize;
+ struct befs_mount_options *parsed_opts = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL);
if (sb->s_fs_info == NULL)
@@ -817,11 +794,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
befs_sb = BEFS_SB(sb);
- if (!parse_options((char *) data, &befs_sb->mount_opts)) {
- if (!silent)
- befs_error(sb, "cannot parse mount options");
- goto unacquire_priv_sbp;
- }
+ befs_set_options(befs_sb, parsed_opts);
befs_debug(sb, "---> %s", __func__);
@@ -934,10 +907,10 @@ unacquire_none:
}
static int
-befs_remount(struct super_block *sb, int *flags, char *data)
+befs_reconfigure(struct fs_context *fc)
{
- sync_filesystem(sb);
- if (!(*flags & SB_RDONLY))
+ sync_filesystem(fc->root->d_sb);
+ if (!(fc->sb_flags & SB_RDONLY))
return -EINVAL;
return 0;
}
@@ -965,19 +938,51 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static struct dentry *
-befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
- void *data)
+static int befs_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, befs_fill_super);
+}
+
+static const struct fs_context_operations befs_context_ops = {
+ .parse_param = befs_parse_param,
+ .get_tree = befs_get_tree,
+ .reconfigure = befs_reconfigure,
+ .free = befs_free_fc,
+};
+
+static int befs_init_fs_context(struct fs_context *fc)
+{
+ struct befs_mount_options *opts;
+
+ opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+
+ /* Initialize options */
+ opts->uid = GLOBAL_ROOT_UID;
+ opts->gid = GLOBAL_ROOT_GID;
+
+ fc->fs_private = opts;
+ fc->ops = &befs_context_ops;
+
+ return 0;
+}
+
+static void befs_free_fc(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
+ struct befs_mount_options *opts = fc->fs_private;
+
+ kfree(opts->iocharset);
+ kfree(fc->fs_private);
}
static struct file_system_type befs_fs_type = {
.owner = THIS_MODULE,
.name = "befs",
- .mount = befs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = befs_init_fs_context,
+ .parameters = befs_param_spec,
};
MODULE_ALIAS_FS("befs");
diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
index fec5c6cde0a7..7e0f9600b80c 100644
--- a/fs/btrfs/bio.c
+++ b/fs/btrfs/bio.c
@@ -49,6 +49,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
bbio->end_io = end_io;
bbio->private = private;
atomic_set(&bbio->pending_ios, 1);
+ WRITE_ONCE(bbio->status, BLK_STS_OK);
}
/*
@@ -113,41 +114,29 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
}
}
-static void btrfs_orig_write_end_io(struct bio *bio);
-
-static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
- struct btrfs_bio *orig_bbio)
-{
- /*
- * For writes we tolerate nr_mirrors - 1 write failures, so we can't
- * just blindly propagate a write failure here. Instead increment the
- * error count in the original I/O context so that it is guaranteed to
- * be larger than the error tolerance.
- */
- if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
- struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
- struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
-
- atomic_add(orig_bioc->max_errors, &orig_bioc->error);
- } else {
- orig_bbio->bio.bi_status = bbio->bio.bi_status;
- }
-}
-
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
struct btrfs_bio *orig_bbio = bbio->private;
- if (bbio->bio.bi_status)
- btrfs_bbio_propagate_error(bbio, orig_bbio);
btrfs_cleanup_bio(bbio);
bbio = orig_bbio;
}
- if (atomic_dec_and_test(&bbio->pending_ios))
+ /*
+ * At this point, bbio always points to the original btrfs_bio. Save
+ * the first error in it.
+ */
+ if (status != BLK_STS_OK)
+ cmpxchg(&bbio->status, BLK_STS_OK, status);
+
+ if (atomic_dec_and_test(&bbio->pending_ios)) {
+ /* Load split bio's error which might be set above. */
+ if (status == BLK_STS_OK)
+ bbio->bio.bi_status = READ_ONCE(bbio->status);
__btrfs_bio_end_io(bbio);
+ }
}
static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
index e48612340745..e2fe16074ad6 100644
--- a/fs/btrfs/bio.h
+++ b/fs/btrfs/bio.h
@@ -79,6 +79,9 @@ struct btrfs_bio {
/* File system that this I/O operates on. */
struct btrfs_fs_info *fs_info;
+ /* Save the first error status of split bio. */
+ blk_status_t status;
+
/*
* This member must come last, bio_alloc_bioset will allocate enough
* bytes for entire btrfs_bio but relies on bio being last.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 317a3712270f..307dedf95c70 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -744,16 +744,11 @@ const char *btrfs_super_csum_driver(u16 csum_type);
size_t __attribute_const__ btrfs_get_num_csums(void);
/*
- * We use page status Private2 to indicate there is an ordered extent with
+ * We use folio flag owner_2 to indicate there is an ordered extent with
* unfinished IO.
- *
- * Rename the Private2 accessors to Ordered, to improve readability.
*/
-#define PageOrdered(page) PagePrivate2(page)
-#define SetPageOrdered(page) SetPagePrivate2(page)
-#define ClearPageOrdered(page) ClearPagePrivate2(page)
-#define folio_test_ordered(folio) folio_test_private_2(folio)
-#define folio_set_ordered(folio) folio_set_private_2(folio)
-#define folio_clear_ordered(folio) folio_clear_private_2(folio)
+#define folio_test_ordered(folio) folio_test_owner_2(folio)
+#define folio_set_ordered(folio) folio_set_owner_2(folio)
+#define folio_clear_ordered(folio) folio_clear_owner_2(folio)
#endif
diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
index b95ef44c326b..968dae953948 100644
--- a/fs/btrfs/defrag.c
+++ b/fs/btrfs/defrag.c
@@ -763,12 +763,12 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
* We can get a merged extent, in that case, we need to re-search
* tree to get the original em for defrag.
*
- * If @newer_than is 0 or em::generation < newer_than, we can trust
- * this em, as either we don't care about the generation, or the
- * merged extent map will be rejected anyway.
+ * This is because even if we have adjacent extents that are contiguous
+ * and compatible (same type and flags), we still want to defrag them
+ * so that we use less metadata (extent items in the extent tree and
+ * file extent items in the inode's subvolume tree).
*/
- if (em && (em->flags & EXTENT_FLAG_MERGED) &&
- newer_than && em->generation >= newer_than) {
+ if (em && (em->flags & EXTENT_FLAG_MERGED)) {
free_extent_map(em);
em = NULL;
}
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 115b90d29b1d..cab94d141f66 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -298,7 +298,7 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1,
if (ref1->ref_root < ref2->ref_root)
return -1;
if (ref1->ref_root > ref2->ref_root)
- return -1;
+ return 1;
if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY)
ret = comp_data_refs(ref1, ref2);
}
@@ -649,7 +649,7 @@ static bool insert_delayed_ref(struct btrfs_trans_handle *trans,
&href->ref_add_list);
else if (ref->action == BTRFS_DROP_DELAYED_REF) {
ASSERT(!list_empty(&exist->add_list));
- list_del(&exist->add_list);
+ list_del_init(&exist->add_list);
} else {
ASSERT(0);
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 872cca54cc6c..42c9899d9241 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -786,7 +786,7 @@ static void submit_extent_folio(struct btrfs_bio_ctrl *bio_ctrl,
}
if (bio_ctrl->wbc)
- wbc_account_cgroup_owner(bio_ctrl->wbc, &folio->page,
+ wbc_account_cgroup_owner(bio_ctrl->wbc, folio,
len);
size -= len;
@@ -1708,7 +1708,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
ret = bio_add_folio(&bbio->bio, folio, eb->len,
eb->start - folio_pos(folio));
ASSERT(ret);
- wbc_account_cgroup_owner(wbc, folio_page(folio, 0), eb->len);
+ wbc_account_cgroup_owner(wbc, folio, eb->len);
folio_unlock(folio);
} else {
int num_folios = num_extent_folios(eb);
@@ -1722,8 +1722,7 @@ static noinline_for_stack void write_one_eb(struct extent_buffer *eb,
folio_start_writeback(folio);
ret = bio_add_folio(&bbio->bio, folio, eb->folio_size, 0);
ASSERT(ret);
- wbc_account_cgroup_owner(wbc, folio_page(folio, 0),
- eb->folio_size);
+ wbc_account_cgroup_owner(wbc, folio, eb->folio_size);
wbc->nr_to_write -= folio_nr_pages(folio);
folio_unlock(folio);
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 668c617444a5..1d93e1202c33 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -230,7 +230,12 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
if (extent_map_end(prev) != next->start)
return false;
- if (prev->flags != next->flags)
+ /*
+ * The merged flag is not an on-disk flag, it just indicates we had the
+ * extent maps of 2 (or more) adjacent extents merged, so factor it out.
+ */
+ if ((prev->flags & ~EXTENT_FLAG_MERGED) !=
+ (next->flags & ~EXTENT_FLAG_MERGED))
return false;
if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4fb521d91b06..e5384ceb8acf 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1120,26 +1120,6 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
btrfs_drew_write_unlock(&inode->root->snapshot_lock);
}
-static void update_time_for_write(struct inode *inode)
-{
- struct timespec64 now, ts;
-
- if (IS_NOCMTIME(inode))
- return;
-
- now = current_time(inode);
- ts = inode_get_mtime(inode);
- if (!timespec64_equal(&ts, &now))
- inode_set_mtime_to_ts(inode, now);
-
- ts = inode_get_ctime(inode);
- if (!timespec64_equal(&ts, &now))
- inode_set_ctime_to_ts(inode, now);
-
- if (IS_I_VERSION(inode))
- inode_inc_iversion(inode);
-}
-
int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count)
{
struct file *file = iocb->ki_filp;
@@ -1170,7 +1150,10 @@ int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count)
* need to start yet another transaction to update the inode as we will
* update the inode when we finish writing whatever data we write.
*/
- update_time_for_write(inode);
+ if (!IS_NOCMTIME(inode)) {
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ inode_inc_iversion(inode);
+ }
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da51edbad6a0..e41bc7842577 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1513,7 +1513,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
* (which the caller expects to stay locked), don't clear any
* dirty bits and don't set any writeback bits
*
- * Do set the Ordered (Private2) bit so we know this page was
+ * Do set the Ordered flag so we know this page was
* properly setup for writepage.
*/
page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
@@ -1618,7 +1618,7 @@ out_unlock:
clear_bits |= EXTENT_CLEAR_DATA_RESV;
extent_clear_unlock_delalloc(inode, start, end, locked_folio,
&cached, clear_bits, page_ops);
- btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
+ btrfs_qgroup_free_data(inode, NULL, start, end - start + 1, NULL);
}
return ret;
}
@@ -1729,7 +1729,7 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
* need full accuracy. Just account the whole thing
* against the first page.
*/
- wbc_account_cgroup_owner(wbc, &locked_folio->page,
+ wbc_account_cgroup_owner(wbc, locked_folio,
cur_end - start);
async_chunk[i].locked_folio = locked_folio;
locked_folio = NULL;
@@ -7294,7 +7294,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
*
* But already submitted bio can still be finished on this folio.
* Furthermore, endio function won't skip folio which has Ordered
- * (Private2) already cleared, so it's possible for endio and
+ * already cleared, so it's possible for endio and
* invalidate_folio to do the same ordered extent accounting twice
* on one folio.
*
@@ -7360,7 +7360,7 @@ static void btrfs_invalidate_folio(struct folio *folio, size_t offset,
range_len = range_end + 1 - cur;
if (!btrfs_folio_test_ordered(fs_info, folio, cur, range_len)) {
/*
- * If Ordered (Private2) is cleared, it means endio has
+ * If Ordered is cleared, it means endio has
* already been executed for the range.
* We can't delete the extent states as
* btrfs_finish_ordered_io() may still use some of them.
@@ -7433,7 +7433,7 @@ next:
}
/*
* We have iterated through all ordered extents of the page, the page
- * should not have Ordered (Private2) anymore, or the above iteration
+ * should not have Ordered anymore, or the above iteration
* did something wrong.
*/
ASSERT(!folio_test_ordered(folio));
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 2104d60c2161..95c8499a159a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -346,10 +346,10 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
ASSERT(file_offset + len <= folio_pos(folio) + folio_size(folio));
/*
- * Ordered (Private2) bit indicates whether we still have
+ * Ordered flag indicates whether we still have
* pending io unfinished for the ordered extent.
*
- * If there's no such bit, we need to skip to next range.
+ * If it's not set, we need to skip to next range.
*/
if (!btrfs_folio_test_ordered(fs_info, folio, file_offset, len))
return false;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 926d7a9ed99d..881d62d81b9f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1979,25 +1979,10 @@ error:
* fsconfig(FSCONFIG_SET_FLAG, "ro"). This option is seen by the filesystem
* in fc->sb_flags.
*
- * This disambiguation has rather positive consequences. Mounting a subvolume
- * ro will not also turn the superblock ro. Only the mount for the subvolume
- * will become ro.
- *
- * So, if the superblock creation request comes from the new mount API the
- * caller must have explicitly done:
- *
- * fsconfig(FSCONFIG_SET_FLAG, "ro")
- * fsmount/mount_setattr(MOUNT_ATTR_RDONLY)
- *
- * IOW, at some point the caller must have explicitly turned the whole
- * superblock ro and we shouldn't just undo it like we did for the old mount
- * API. In any case, it lets us avoid the hack in the new mount API.
- *
- * Consequently, the remounting hack must only be used for requests originating
- * from the old mount API and should be marked for full deprecation so it can be
- * turned off in a couple of years.
- *
- * The new mount API has no reason to support this hack.
+ * But, currently the util-linux mount command already utilizes the new mount
+ * API and is still setting fsconfig(FSCONFIG_SET_FLAG, "ro") no matter if it's
+ * btrfs or not, setting the whole super block RO. To make per-subvolume mounting
+ * work with different options work we need to keep backward compatibility.
*/
static struct vfsmount *btrfs_reconfigure_for_mount(struct fs_context *fc)
{
@@ -2019,7 +2004,7 @@ static struct vfsmount *btrfs_reconfigure_for_mount(struct fs_context *fc)
if (IS_ERR(mnt))
return mnt;
- if (!fc->oldapi || !ro2rw)
+ if (!ro2rw)
return mnt;
/* We need to convert to rw, call reconfigure. */
@@ -2206,7 +2191,8 @@ static struct file_system_type btrfs_fs_type = {
.init_fs_context = btrfs_init_fs_context,
.parameters = btrfs_fs_parameters,
.kill_sb = btrfs_kill_super,
- .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
+ .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA |
+ FS_ALLOW_IDMAP | FS_MGTIME,
};
MODULE_ALIAS_FS("btrfs");
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8f340ad1d938..eb51b609190f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1105,6 +1105,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
if (device->bdev) {
fs_devices->open_devices--;
device->bdev = NULL;
+ device->bdev_file = NULL;
}
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
btrfs_destroy_dev_zone_info(device);
diff --git a/fs/buffer.c b/fs/buffer.c
index 1fc9a50def0b..bb4a31b9559d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1649,6 +1649,7 @@ void block_invalidate_folio(struct folio *folio, size_t offset, size_t length)
if (length == folio_size(folio))
filemap_release_folio(folio, 0);
out:
+ folio_clear_mappedtodisk(folio);
return;
}
EXPORT_SYMBOL(block_invalidate_folio);
@@ -2803,7 +2804,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_write_hint = write_hint;
- __bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
+ bio_add_folio_nofail(bio, bh->b_folio, bh->b_size, bh_offset(bh));
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
@@ -2813,7 +2814,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
if (wbc) {
wbc_init_bio(wbc, bio);
- wbc_account_cgroup_owner(wbc, bh->b_page, bh->b_size);
+ wbc_account_cgroup_owner(wbc, bh->b_folio, bh->b_size);
}
submit_bio(bio);
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 35ba2117a6f6..3e63cfe15874 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -327,6 +327,8 @@ static void cachefiles_commit_object(struct cachefiles_object *object,
static void cachefiles_clean_up_object(struct cachefiles_object *object,
struct cachefiles_cache *cache)
{
+ struct file *file;
+
if (test_bit(FSCACHE_COOKIE_RETIRED, &object->cookie->flags)) {
if (!test_bit(CACHEFILES_OBJECT_USING_TMPFILE, &object->flags)) {
cachefiles_see_object(object, cachefiles_obj_see_clean_delete);
@@ -342,10 +344,14 @@ static void cachefiles_clean_up_object(struct cachefiles_object *object,
}
cachefiles_unmark_inode_in_use(object, object->file);
- if (object->file) {
- fput(object->file);
- object->file = NULL;
- }
+
+ spin_lock(&object->lock);
+ file = object->file;
+ object->file = NULL;
+ spin_unlock(&object->lock);
+
+ if (file)
+ fput(file);
}
/*
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 2b3f9935dbb4..7cf59713f0f7 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -691,11 +691,6 @@ bool cachefiles_commit_tmpfile(struct cachefiles_cache *cache,
}
if (!d_is_negative(dentry)) {
- if (d_backing_inode(dentry) == file_inode(object->file)) {
- success = true;
- goto out_dput;
- }
-
ret = cachefiles_unlink(volume->cache, object, fan, dentry,
FSCACHE_OBJECT_IS_STALE);
if (ret < 0)
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
index 470c96658385..fe3de9ad57bf 100644
--- a/fs/cachefiles/ondemand.c
+++ b/fs/cachefiles/ondemand.c
@@ -60,26 +60,36 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb,
{
struct cachefiles_object *object = kiocb->ki_filp->private_data;
struct cachefiles_cache *cache = object->volume->cache;
- struct file *file = object->file;
- size_t len = iter->count;
+ struct file *file;
+ size_t len = iter->count, aligned_len = len;
loff_t pos = kiocb->ki_pos;
const struct cred *saved_cred;
int ret;
- if (!file)
+ spin_lock(&object->lock);
+ file = object->file;
+ if (!file) {
+ spin_unlock(&object->lock);
return -ENOBUFS;
+ }
+ get_file(file);
+ spin_unlock(&object->lock);
cachefiles_begin_secure(cache, &saved_cred);
- ret = __cachefiles_prepare_write(object, file, &pos, &len, len, true);
+ ret = __cachefiles_prepare_write(object, file, &pos, &aligned_len, len, true);
cachefiles_end_secure(cache, saved_cred);
if (ret < 0)
- return ret;
+ goto out;
trace_cachefiles_ondemand_fd_write(object, file_inode(file), pos, len);
ret = __cachefiles_write(object, file, pos, iter, NULL, NULL);
- if (!ret)
+ if (!ret) {
ret = len;
+ kiocb->ki_pos += ret;
+ }
+out:
+ fput(file);
return ret;
}
@@ -87,12 +97,22 @@ static loff_t cachefiles_ondemand_fd_llseek(struct file *filp, loff_t pos,
int whence)
{
struct cachefiles_object *object = filp->private_data;
- struct file *file = object->file;
+ struct file *file;
+ loff_t ret;
- if (!file)
+ spin_lock(&object->lock);
+ file = object->file;
+ if (!file) {
+ spin_unlock(&object->lock);
return -ENOBUFS;
+ }
+ get_file(file);
+ spin_unlock(&object->lock);
- return vfs_llseek(file, pos, whence);
+ ret = vfs_llseek(file, pos, whence);
+ fput(file);
+
+ return ret;
}
static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl,
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c2a9e2cc03de..4c82348fe1e6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1054,7 +1054,9 @@ get_more_pages:
if (!nr_folios && !locked_pages)
break;
for (i = 0; i < nr_folios && locked_pages < max_pages; i++) {
- page = &fbatch.folios[i]->page;
+ struct folio *folio = fbatch.folios[i];
+
+ page = &folio->page;
doutc(cl, "? %p idx %lu\n", page, page->index);
if (locked_pages == 0)
lock_page(page); /* first page */
@@ -1081,8 +1083,6 @@ get_more_pages:
continue;
}
if (page_offset(page) >= ceph_wbc.i_size) {
- struct folio *folio = page_folio(page);
-
doutc(cl, "folio at %lu beyond eof %llu\n",
folio->index, ceph_wbc.i_size);
if ((ceph_wbc.size_stable ||
@@ -1098,16 +1098,16 @@ get_more_pages:
unlock_page(page);
break;
}
- if (PageWriteback(page) ||
- PagePrivate2(page) /* [DEPRECATED] */) {
+ if (folio_test_writeback(folio) ||
+ folio_test_private_2(folio) /* [DEPRECATED] */) {
if (wbc->sync_mode == WB_SYNC_NONE) {
- doutc(cl, "%p under writeback\n", page);
- unlock_page(page);
+ doutc(cl, "%p under writeback\n", folio);
+ folio_unlock(folio);
continue;
}
- doutc(cl, "waiting on writeback %p\n", page);
- wait_on_page_writeback(page);
- folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
+ doutc(cl, "waiting on writeback %p\n", folio);
+ folio_wait_writeback(folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
}
if (!clear_page_dirty_for_io(page)) {
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 57cc096c498a..c2ddb998f3c9 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -562,8 +562,8 @@ int cdev_device_add(struct cdev *cdev, struct device *dev)
/**
* cdev_device_del() - inverse of cdev_device_add
- * @dev: the device structure
* @cdev: the cdev structure
+ * @dev: the device structure
*
* cdev_device_del() is a helper function to call cdev_del and device_del.
* It should be used whenever cdev_device_add is used.
diff --git a/fs/coredump.c b/fs/coredump.c
index 45737b43dda5..d48edb37bc35 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -951,6 +951,7 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
} else {
dump_skip(cprm, PAGE_SIZE);
}
+ cond_resched();
}
dump_page_free(dump_page);
return 1;
diff --git a/fs/dax.c b/fs/dax.c
index c62acd2812f8..21b47402b3dc 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1262,35 +1262,46 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
{
struct iomap *iomap = &iter->iomap;
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- loff_t pos = iter->pos;
- loff_t length = iomap_length(iter);
+ loff_t copy_pos = iter->pos;
+ u64 copy_len = iomap_length(iter);
+ u32 mod;
int id = 0;
s64 ret = 0;
void *daddr = NULL, *saddr = NULL;
- /* don't bother with blocks that are not shared to start with */
- if (!(iomap->flags & IOMAP_F_SHARED))
- return length;
+ if (!iomap_want_unshare_iter(iter))
+ return iomap_length(iter);
+
+ /*
+ * Extend the file range to be aligned to fsblock/pagesize, because
+ * we need to copy entire blocks, not just the byte range specified.
+ * Invalidate the mapping because we're about to CoW.
+ */
+ mod = offset_in_page(copy_pos);
+ if (mod) {
+ copy_len += mod;
+ copy_pos -= mod;
+ }
+
+ mod = offset_in_page(copy_pos + copy_len);
+ if (mod)
+ copy_len += PAGE_SIZE - mod;
+
+ invalidate_inode_pages2_range(iter->inode->i_mapping,
+ copy_pos >> PAGE_SHIFT,
+ (copy_pos + copy_len - 1) >> PAGE_SHIFT);
id = dax_read_lock();
- ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL);
+ ret = dax_iomap_direct_access(iomap, copy_pos, copy_len, &daddr, NULL);
if (ret < 0)
goto out_unlock;
- /* zero the distance if srcmap is HOLE or UNWRITTEN */
- if (srcmap->flags & IOMAP_F_SHARED || srcmap->type == IOMAP_UNWRITTEN) {
- memset(daddr, 0, length);
- dax_flush(iomap->dax_dev, daddr, length);
- ret = length;
- goto out_unlock;
- }
-
- ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL);
+ ret = dax_iomap_direct_access(srcmap, copy_pos, copy_len, &saddr, NULL);
if (ret < 0)
goto out_unlock;
- if (copy_mc_to_kernel(daddr, saddr, length) == 0)
- ret = length;
+ if (copy_mc_to_kernel(daddr, saddr, copy_len) == 0)
+ ret = iomap_length(iter);
else
ret = -EIO;
diff --git a/fs/dcache.c b/fs/dcache.c
index 0f6b16ba30d0..0099077a2982 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -135,6 +135,7 @@ struct dentry_stat_t {
static DEFINE_PER_CPU(long, nr_dentry);
static DEFINE_PER_CPU(long, nr_dentry_unused);
static DEFINE_PER_CPU(long, nr_dentry_negative);
+static int dentry_negative_policy;
#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
/* Statistics gathering. */
@@ -199,6 +200,15 @@ static struct ctl_table fs_dcache_sysctls[] = {
.mode = 0444,
.proc_handler = proc_nr_dentry,
},
+ {
+ .procname = "dentry-negative",
+ .data = &dentry_negative_policy,
+ .maxlen = sizeof(dentry_negative_policy),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
};
static int __init init_fs_dcache_sysctls(void)
@@ -2039,8 +2049,8 @@ EXPORT_SYMBOL(d_obtain_root);
/**
* d_add_ci - lookup or allocate new dentry with case-exact name
- * @inode: the inode case-insensitive lookup has found
* @dentry: the negative dentry that was passed to the parent's lookup func
+ * @inode: the inode case-insensitive lookup has found
* @name: the case-exact name to be associated with the returned dentry
*
* This is to avoid filling the dcache with case-insensitive names to the
@@ -2093,8 +2103,8 @@ EXPORT_SYMBOL(d_add_ci);
/**
* d_same_name - compare dentry name with case-exact name
- * @parent: parent dentry
* @dentry: the negative dentry that was passed to the parent's lookup func
+ * @parent: parent dentry
* @name: the case-exact name to be associated with the returned dentry
*
* Return: true if names are same, or false
@@ -2401,6 +2411,8 @@ void d_delete(struct dentry * dentry)
* Are we the only user?
*/
if (dentry->d_lockref.count == 1) {
+ if (dentry_negative_policy)
+ __d_drop(dentry);
dentry->d_flags &= ~DCACHE_CANT_MOUNT;
dentry_unlink_inode(dentry);
} else {
diff --git a/fs/efs/super.c b/fs/efs/super.c
index e4421c10caeb..c59086b7eabf 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -15,7 +15,6 @@
#include <linux/vfs.h>
#include <linux/blkdev.h>
#include <linux/fs_context.h>
-#include <linux/fs_parser.h>
#include "efs.h"
#include <linux/efs_vh.h>
#include <linux/efs_fs_sb.h>
@@ -49,15 +48,6 @@ static struct pt_types sgi_pt_types[] = {
{0, NULL}
};
-enum {
- Opt_explicit_open,
-};
-
-static const struct fs_parameter_spec efs_param_spec[] = {
- fsparam_flag ("explicit-open", Opt_explicit_open),
- {}
-};
-
/*
* File system definition and registration.
*/
@@ -67,7 +57,6 @@ static struct file_system_type efs_fs_type = {
.kill_sb = efs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
.init_fs_context = efs_init_fs_context,
- .parameters = efs_param_spec,
};
MODULE_ALIAS_FS("efs");
@@ -265,7 +254,8 @@ static int efs_fill_super(struct super_block *s, struct fs_context *fc)
if (!sb_set_blocksize(s, EFS_BLOCKSIZE)) {
pr_err("device does not support %d byte blocks\n",
EFS_BLOCKSIZE);
- return -EINVAL;
+ return invalf(fc, "device does not support %d byte blocks\n",
+ EFS_BLOCKSIZE);
}
/* read the vh (volume header) block */
@@ -327,43 +317,22 @@ static int efs_fill_super(struct super_block *s, struct fs_context *fc)
return 0;
}
-static void efs_free_fc(struct fs_context *fc)
-{
- kfree(fc->fs_private);
-}
-
static int efs_get_tree(struct fs_context *fc)
{
return get_tree_bdev(fc, efs_fill_super);
}
-static int efs_parse_param(struct fs_context *fc, struct fs_parameter *param)
-{
- int token;
- struct fs_parse_result result;
-
- token = fs_parse(fc, efs_param_spec, param, &result);
- if (token < 0)
- return token;
- return 0;
-}
-
static int efs_reconfigure(struct fs_context *fc)
{
sync_filesystem(fc->root->d_sb);
+ fc->sb_flags |= SB_RDONLY;
return 0;
}
-struct efs_context {
- unsigned long s_mount_opts;
-};
-
static const struct fs_context_operations efs_context_opts = {
- .parse_param = efs_parse_param,
.get_tree = efs_get_tree,
.reconfigure = efs_reconfigure,
- .free = efs_free_fc,
};
/*
@@ -371,12 +340,6 @@ static const struct fs_context_operations efs_context_opts = {
*/
static int efs_init_fs_context(struct fs_context *fc)
{
- struct efs_context *ctx;
-
- ctx = kzalloc(sizeof(struct efs_context), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
- fc->fs_private = ctx;
fc->ops = &efs_context_opts;
return 0;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 320d586c3896..bed3dbe5b7cb 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -709,7 +709,9 @@ static int erofs_fc_get_tree(struct fs_context *fc)
if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid)
return get_tree_nodev(fc, erofs_fc_fill_super);
- ret = get_tree_bdev(fc, erofs_fc_fill_super);
+ ret = get_tree_bdev_flags(fc, erofs_fc_fill_super,
+ IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) ?
+ GET_TREE_BDEV_QUIET_LOOKUP : 0);
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
if (ret == -ENOTBLK) {
if (!fc->source)
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1ae4542f0bd8..826d21c4486b 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -823,7 +823,8 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
to_free = NULL;
head = file->f_ep;
if (head->first == &epi->fllink && !epi->fllink.next) {
- file->f_ep = NULL;
+ /* See eventpoll_release() for details. */
+ WRITE_ONCE(file->f_ep, NULL);
if (!is_file_epoll(file)) {
struct epitems_head *v;
v = container_of(head, struct epitems_head, epitems);
@@ -1002,7 +1003,7 @@ static struct file *epi_fget(const struct epitem *epi)
struct file *file;
file = epi->ffd.file;
- if (!atomic_long_inc_not_zero(&file->f_count))
+ if (!file_ref_get(&file->f_ref))
file = NULL;
return file;
}
@@ -1372,7 +1373,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
break;
}
}
- wake_up(&ep->wq);
+ if (sync)
+ wake_up_sync(&ep->wq);
+ else
+ wake_up(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
pwake++;
@@ -1603,7 +1607,8 @@ allocate:
spin_unlock(&file->f_lock);
goto allocate;
}
- file->f_ep = head;
+ /* See eventpoll_release() for details. */
+ WRITE_ONCE(file->f_ep, head);
to_free = NULL;
}
hlist_add_head_rcu(&epi->fllink, file->f_ep);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 44b0d418143c..494d443e9fc9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1729,6 +1729,10 @@ struct ext4_sb_info {
*/
struct work_struct s_sb_upd_work;
+ /* Atomic write unit values in bytes */
+ unsigned int s_awu_min;
+ unsigned int s_awu_max;
+
/* Ext4 fast commit sub transaction ID */
atomic_t s_fc_subtid;
@@ -3855,6 +3859,12 @@ static inline int ext4_buffer_uptodate(struct buffer_head *bh)
return buffer_uptodate(bh);
}
+static inline bool ext4_inode_can_atomic_write(struct inode *inode)
+{
+
+ return S_ISREG(inode->i_mode) && EXT4_SB(inode->i_sb)->s_awu_min > 0;
+}
+
extern int ext4_block_write_begin(handle_t *handle, struct folio *folio,
loff_t pos, unsigned len,
get_block_t *get_block);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index f14aed14b9cf..a7de03e47db0 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -599,6 +599,13 @@ out:
ssize_t err;
loff_t endbyte;
+ /*
+ * There is no support for atomic writes on buffered-io yet,
+ * we should never fallback to buffered-io for DIO atomic
+ * writes.
+ */
+ WARN_ON_ONCE(iocb->ki_flags & IOCB_ATOMIC);
+
offset = iocb->ki_pos;
err = ext4_buffered_write_iter(iocb, from);
if (err < 0)
@@ -692,6 +699,20 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (IS_DAX(inode))
return ext4_dax_write_iter(iocb, from);
#endif
+
+ if (iocb->ki_flags & IOCB_ATOMIC) {
+ size_t len = iov_iter_count(from);
+ int ret;
+
+ if (len < EXT4_SB(inode->i_sb)->s_awu_min ||
+ len > EXT4_SB(inode->i_sb)->s_awu_max)
+ return -EINVAL;
+
+ ret = generic_atomic_write_valid(iocb, from);
+ if (ret)
+ return ret;
+ }
+
if (iocb->ki_flags & IOCB_DIRECT)
return ext4_dio_write_iter(iocb, from);
else
@@ -884,6 +905,9 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret;
}
+ if (ext4_inode_can_atomic_write(inode))
+ filp->f_mode |= FMODE_CAN_ATOMIC_WRITE;
+
filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 54bdd4884fe6..5b9eeb74ce47 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3444,17 +3444,34 @@ static int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
return ret;
}
+static inline bool ext4_want_directio_fallback(unsigned flags, ssize_t written)
+{
+ /* must be a directio to fall back to buffered */
+ if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) !=
+ (IOMAP_WRITE | IOMAP_DIRECT))
+ return false;
+
+ /* atomic writes are all-or-nothing */
+ if (flags & IOMAP_ATOMIC)
+ return false;
+
+ /* can only try again if we wrote nothing */
+ return written == 0;
+}
+
static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
ssize_t written, unsigned flags, struct iomap *iomap)
{
/*
* Check to see whether an error occurred while writing out the data to
- * the allocated blocks. If so, return the magic error code so that we
- * fallback to buffered I/O and attempt to complete the remainder of
- * the I/O. Any blocks that may have been allocated in preparation for
- * the direct I/O will be reused during buffered I/O.
+ * the allocated blocks. If so, return the magic error code for
+ * non-atomic write so that we fallback to buffered I/O and attempt to
+ * complete the remainder of the I/O.
+ * For non-atomic writes, any blocks that may have been
+ * allocated in preparation for the direct I/O will be reused during
+ * buffered I/O. For atomic write, we never fallback to buffered-io.
*/
- if (flags & (IOMAP_WRITE | IOMAP_DIRECT) && written == 0)
+ if (ext4_want_directio_fallback(flags, written))
return -ENOTBLK;
return 0;
@@ -5578,6 +5595,18 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
}
}
+ if ((request_mask & STATX_WRITE_ATOMIC) && S_ISREG(inode->i_mode)) {
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ unsigned int awu_min = 0, awu_max = 0;
+
+ if (ext4_inode_can_atomic_write(inode)) {
+ awu_min = sbi->s_awu_min;
+ awu_max = sbi->s_awu_max;
+ }
+
+ generic_fill_statx_atomic_writes(stat, awu_min, awu_max);
+ }
+
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
if (flags & EXT4_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 790db7eac6c2..612ccbeb493b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2395,11 +2395,8 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if (fscrypt_is_nokey_name(dentry))
return -ENOKEY;
-#if IS_ENABLED(CONFIG_UNICODE)
- if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
- utf8_validate(sb->s_encoding, &dentry->d_name))
+ if (!generic_ci_validate_strict_name(dir, &dentry->d_name))
return -EINVAL;
-#endif
retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
if (retval)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index ad5543866d21..b7b9261fec3b 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -421,7 +421,7 @@ submit_and_retry:
io_submit_init_bio(io, bh);
if (!bio_add_folio(io->io_bio, io_folio, bh->b_size, bh_offset(bh)))
goto submit_and_retry;
- wbc_account_cgroup_owner(io->io_wbc, &folio->page, bh->b_size);
+ wbc_account_cgroup_owner(io->io_wbc, folio, bh->b_size);
io->io_next_block++;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 16a4ce704460..7ea7178750f2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4425,6 +4425,36 @@ static int ext4_handle_clustersize(struct super_block *sb)
return 0;
}
+/*
+ * ext4_atomic_write_init: Initializes filesystem min & max atomic write units.
+ * @sb: super block
+ * TODO: Later add support for bigalloc
+ */
+static void ext4_atomic_write_init(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct block_device *bdev = sb->s_bdev;
+
+ if (!bdev_can_atomic_write(bdev))
+ return;
+
+ if (!ext4_has_feature_extents(sb))
+ return;
+
+ sbi->s_awu_min = max(sb->s_blocksize,
+ bdev_atomic_write_unit_min_bytes(bdev));
+ sbi->s_awu_max = min(sb->s_blocksize,
+ bdev_atomic_write_unit_max_bytes(bdev));
+ if (sbi->s_awu_min && sbi->s_awu_max &&
+ sbi->s_awu_min <= sbi->s_awu_max) {
+ ext4_msg(sb, KERN_NOTICE, "Supports (experimental) DIO atomic writes awu_min: %u, awu_max: %u",
+ sbi->s_awu_min, sbi->s_awu_max);
+ } else {
+ sbi->s_awu_min = 0;
+ sbi->s_awu_max = 0;
+ }
+}
+
static void ext4_fast_commit_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -5336,6 +5366,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
spin_lock_init(&sbi->s_bdev_wb_lock);
+ ext4_atomic_write_init(sb);
ext4_fast_commit_init(sb);
sb->s_root = NULL;
@@ -7329,7 +7360,7 @@ static struct file_system_type ext4_fs_type = {
.init_fs_context = ext4_init_fs_context,
.parameters = ext4_param_specs,
.kill_sb = ext4_kill_sb,
- .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
};
MODULE_ALIAS_FS("ext4");
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 94f7b084f601..e3ce763cce18 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -711,7 +711,8 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
}
if (fio->io_wbc && !is_read_io(fio->op))
- wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
+ PAGE_SIZE);
inc_page_count(fio->sbi, is_read_io(fio->op) ?
__read_io_type(page) : WB_DATA_TYPE(fio->page, false));
@@ -911,7 +912,8 @@ alloc_new:
}
if (fio->io_wbc)
- wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
+ PAGE_SIZE);
inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
@@ -1011,7 +1013,8 @@ alloc_new:
}
if (fio->io_wbc)
- wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
+ wbc_account_cgroup_owner(fio->io_wbc, page_folio(fio->page),
+ PAGE_SIZE);
io->last_block_in_bio = fio->new_blkaddr;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 22dd9dcce7ec..a7947a615db6 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -12,7 +12,6 @@
#include <linux/fs.h>
#include <linux/filelock.h>
#include <linux/file.h>
-#include <linux/fdtable.h>
#include <linux/capability.h>
#include <linux/dnotify.h>
#include <linux/slab.h>
@@ -397,6 +396,9 @@ static long f_dupfd_query(int fd, struct file *filp)
{
CLASS(fd_raw, f)(fd);
+ if (fd_empty(f))
+ return -EBADF;
+
/*
* We can do the 'fdput()' immediately, as the only thing that
* matters is the pointer value which isn't changed by the fdput.
diff --git a/fs/file.c b/fs/file.c
index eb093e736972..fb1011cf6b4a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -20,10 +20,73 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/close_range.h>
+#include <linux/file_ref.h>
#include <net/sock.h>
#include "internal.h"
+/**
+ * __file_ref_put - Slowpath of file_ref_put()
+ * @ref: Pointer to the reference count
+ * @cnt: Current reference count
+ *
+ * Invoked when the reference count is outside of the valid zone.
+ *
+ * Return:
+ * True if this was the last reference with no future references
+ * possible. This signals the caller that it can safely schedule the
+ * object, which is protected by the reference counter, for
+ * deconstruction.
+ *
+ * False if there are still active references or the put() raced
+ * with a concurrent get()/put() pair. Caller is not allowed to
+ * deconstruct the protected object.
+ */
+bool __file_ref_put(file_ref_t *ref, unsigned long cnt)
+{
+ /* Did this drop the last reference? */
+ if (likely(cnt == FILE_REF_NOREF)) {
+ /*
+ * Carefully try to set the reference count to FILE_REF_DEAD.
+ *
+ * This can fail if a concurrent get() operation has
+ * elevated it again or the corresponding put() even marked
+ * it dead already. Both are valid situations and do not
+ * require a retry. If this fails the caller is not
+ * allowed to deconstruct the object.
+ */
+ if (!atomic_long_try_cmpxchg_release(&ref->refcnt, &cnt, FILE_REF_DEAD))
+ return false;
+
+ /*
+ * The caller can safely schedule the object for
+ * deconstruction. Provide acquire ordering.
+ */
+ smp_acquire__after_ctrl_dep();
+ return true;
+ }
+
+ /*
+ * If the reference count was already in the dead zone, then this
+ * put() operation is imbalanced. Warn, put the reference count back to
+ * DEAD and tell the caller to not deconstruct the object.
+ */
+ if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) {
+ atomic_long_set(&ref->refcnt, FILE_REF_DEAD);
+ return false;
+ }
+
+ /*
+ * This is a put() operation on a saturated refcount. Restore the
+ * mean saturation value and tell the caller to not deconstruct the
+ * object.
+ */
+ if (cnt > FILE_REF_MAXREF)
+ atomic_long_set(&ref->refcnt, FILE_REF_SATURATED);
+ return false;
+}
+EXPORT_SYMBOL_GPL(__file_ref_put);
+
unsigned int sysctl_nr_open __read_mostly = 1024*1024;
unsigned int sysctl_nr_open_min = BITS_PER_LONG;
/* our min() is unusable in constant expressions ;-/ */
@@ -89,18 +152,11 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
* 'unsigned long' in some places, but simply because that is how the Linux
* kernel bitmaps are defined to work: they are not "bits in an array of bytes",
* they are very much "bits in an array of unsigned long".
- *
- * The ALIGN(nr, BITS_PER_LONG) here is for clarity: since we just multiplied
- * by that "1024/sizeof(ptr)" before, we already know there are sufficient
- * clear low bits. Clang seems to realize that, gcc ends up being confused.
- *
- * On a 128-bit machine, the ALIGN() would actually matter. In the meantime,
- * let's consider it documentation (and maybe a test-case for gcc to improve
- * its code generation ;)
*/
-static struct fdtable * alloc_fdtable(unsigned int nr)
+static struct fdtable *alloc_fdtable(unsigned int slots_wanted)
{
struct fdtable *fdt;
+ unsigned int nr;
void *data;
/*
@@ -108,22 +164,32 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
* Allocation steps are keyed to the size of the fdarray, since it
* grows far faster than any of the other dynamic data. We try to fit
* the fdarray into comfortable page-tuned chunks: starting at 1024B
- * and growing in powers of two from there on.
+ * and growing in powers of two from there on. Since we called only
+ * with slots_wanted > BITS_PER_LONG (embedded instance in files->fdtab
+ * already gives BITS_PER_LONG slots), the above boils down to
+ * 1. use the smallest power of two large enough to give us that many
+ * slots.
+ * 2. on 32bit skip 64 and 128 - the minimal capacity we want there is
+ * 256 slots (i.e. 1Kb fd array).
+ * 3. on 64bit don't skip anything, 1Kb fd array means 128 slots there
+ * and we are never going to be asked for 64 or less.
*/
- nr /= (1024 / sizeof(struct file *));
- nr = roundup_pow_of_two(nr + 1);
- nr *= (1024 / sizeof(struct file *));
- nr = ALIGN(nr, BITS_PER_LONG);
+ if (IS_ENABLED(CONFIG_32BIT) && slots_wanted < 256)
+ nr = 256;
+ else
+ nr = roundup_pow_of_two(slots_wanted);
/*
* Note that this can drive nr *below* what we had passed if sysctl_nr_open
- * had been set lower between the check in expand_files() and here. Deal
- * with that in caller, it's cheaper that way.
+ * had been set lower between the check in expand_files() and here.
*
* We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
* bitmaps handling below becomes unpleasant, to put it mildly...
*/
- if (unlikely(nr > sysctl_nr_open))
- nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
+ if (unlikely(nr > sysctl_nr_open)) {
+ nr = round_down(sysctl_nr_open, BITS_PER_LONG);
+ if (nr < slots_wanted)
+ return ERR_PTR(-EMFILE);
+ }
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL_ACCOUNT);
if (!fdt)
@@ -152,14 +218,14 @@ out_arr:
out_fdt:
kfree(fdt);
out:
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
/*
* Expand the file descriptor table.
* This function will allocate a new fdtable and both fd array and fdset, of
* the given size.
- * Return <0 error code on error; 1 on successful completion.
+ * Return <0 error code on error; 0 on successful completion.
* The files->file_lock should be held on entry, and will be held on exit.
*/
static int expand_fdtable(struct files_struct *files, unsigned int nr)
@@ -169,7 +235,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
struct fdtable *new_fdt, *cur_fdt;
spin_unlock(&files->file_lock);
- new_fdt = alloc_fdtable(nr);
+ new_fdt = alloc_fdtable(nr + 1);
/* make sure all fd_install() have seen resize_in_progress
* or have finished their rcu_read_lock_sched() section.
@@ -178,16 +244,8 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
synchronize_rcu();
spin_lock(&files->file_lock);
- if (!new_fdt)
- return -ENOMEM;
- /*
- * extremely unlikely race - sysctl_nr_open decreased between the check in
- * caller and alloc_fdtable(). Cheaper to catch it here...
- */
- if (unlikely(new_fdt->max_fds <= nr)) {
- __free_fdtable(new_fdt);
- return -EMFILE;
- }
+ if (IS_ERR(new_fdt))
+ return PTR_ERR(new_fdt);
cur_fdt = files_fdtable(files);
BUG_ON(nr < cur_fdt->max_fds);
copy_fdtable(new_fdt, cur_fdt);
@@ -196,15 +254,14 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
/* coupled with smp_rmb() in fd_install() */
smp_wmb();
- return 1;
+ return 0;
}
/*
* Expand files.
* This function will expand the file structures, if the requested size exceeds
* the current capacity and there is room for expansion.
- * Return <0 error code on error; 0 when nothing done; 1 when files were
- * expanded and execution may have blocked.
+ * Return <0 error code on error; 0 on success.
* The files->file_lock should be held on entry, and will be held on exit.
*/
static int expand_files(struct files_struct *files, unsigned int nr)
@@ -212,14 +269,14 @@ static int expand_files(struct files_struct *files, unsigned int nr)
__acquires(files->file_lock)
{
struct fdtable *fdt;
- int expanded = 0;
+ int error;
repeat:
fdt = files_fdtable(files);
/* Do we need to expand? */
if (nr < fdt->max_fds)
- return expanded;
+ return 0;
/* Can we expand? */
if (nr >= sysctl_nr_open)
@@ -227,7 +284,6 @@ repeat:
if (unlikely(files->resize_in_progress)) {
spin_unlock(&files->file_lock);
- expanded = 1;
wait_event(files->resize_wait, !files->resize_in_progress);
spin_lock(&files->file_lock);
goto repeat;
@@ -235,27 +291,28 @@ repeat:
/* All good, so we try */
files->resize_in_progress = true;
- expanded = expand_fdtable(files, nr);
+ error = expand_fdtable(files, nr);
files->resize_in_progress = false;
wake_up_all(&files->resize_wait);
- return expanded;
-}
-
-static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt)
-{
- __set_bit(fd, fdt->close_on_exec);
+ return error;
}
-static inline void __clear_close_on_exec(unsigned int fd, struct fdtable *fdt)
+static inline void __set_close_on_exec(unsigned int fd, struct fdtable *fdt,
+ bool set)
{
- if (test_bit(fd, fdt->close_on_exec))
- __clear_bit(fd, fdt->close_on_exec);
+ if (set) {
+ __set_bit(fd, fdt->close_on_exec);
+ } else {
+ if (test_bit(fd, fdt->close_on_exec))
+ __clear_bit(fd, fdt->close_on_exec);
+ }
}
-static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
+static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt, bool set)
{
__set_bit(fd, fdt->open_fds);
+ __set_close_on_exec(fd, fdt, set);
fd /= BITS_PER_LONG;
if (!~fdt->open_fds[fd])
__set_bit(fd, fdt->full_fds_bits);
@@ -264,7 +321,9 @@ static inline void __set_open_fd(unsigned int fd, struct fdtable *fdt)
static inline void __clear_open_fd(unsigned int fd, struct fdtable *fdt)
{
__clear_bit(fd, fdt->open_fds);
- __clear_bit(fd / BITS_PER_LONG, fdt->full_fds_bits);
+ fd /= BITS_PER_LONG;
+ if (test_bit(fd, fdt->full_fds_bits))
+ __clear_bit(fd, fdt->full_fds_bits);
}
static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
@@ -306,7 +365,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
struct file **old_fds, **new_fds;
unsigned int open_files, i;
struct fdtable *old_fdt, *new_fdt;
- int error;
newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
if (!newf)
@@ -338,17 +396,10 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
if (new_fdt != &newf->fdtab)
__free_fdtable(new_fdt);
- new_fdt = alloc_fdtable(open_files - 1);
- if (!new_fdt) {
- error = -ENOMEM;
- goto out_release;
- }
-
- /* beyond sysctl_nr_open; nothing to do */
- if (unlikely(new_fdt->max_fds < open_files)) {
- __free_fdtable(new_fdt);
- error = -EMFILE;
- goto out_release;
+ new_fdt = alloc_fdtable(open_files);
+ if (IS_ERR(new_fdt)) {
+ kmem_cache_free(files_cachep, newf);
+ return ERR_CAST(new_fdt);
}
/*
@@ -389,10 +440,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, struct fd_range *punch_ho
rcu_assign_pointer(newf->fdt, new_fdt);
return newf;
-
-out_release:
- kmem_cache_free(files_cachep, newf);
- return ERR_PTR(error);
}
static struct fdtable *close_files(struct files_struct * files)
@@ -413,7 +460,7 @@ static struct fdtable *close_files(struct files_struct * files)
set = fdt->open_fds[j++];
while (set) {
if (set & 1) {
- struct file * file = xchg(&fdt->fd[i], NULL);
+ struct file *file = fdt->fd[i];
if (file) {
filp_close(file, files);
cond_resched();
@@ -470,6 +517,15 @@ static unsigned int find_next_fd(struct fdtable *fdt, unsigned int start)
unsigned int maxfd = fdt->max_fds; /* always multiple of BITS_PER_LONG */
unsigned int maxbit = maxfd / BITS_PER_LONG;
unsigned int bitbit = start / BITS_PER_LONG;
+ unsigned int bit;
+
+ /*
+ * Try to avoid looking at the second level bitmap
+ */
+ bit = find_next_zero_bit(&fdt->open_fds[bitbit], BITS_PER_LONG,
+ start & (BITS_PER_LONG - 1));
+ if (bit < BITS_PER_LONG)
+ return bit + bitbit * BITS_PER_LONG;
bitbit = find_next_zero_bit(fdt->full_fds_bits, maxbit, bitbit) * BITS_PER_LONG;
if (bitbit >= maxfd)
@@ -496,7 +552,7 @@ repeat:
if (fd < files->next_fd)
fd = files->next_fd;
- if (fd < fdt->max_fds)
+ if (likely(fd < fdt->max_fds))
fd = find_next_fd(fdt, fd);
/*
@@ -504,36 +560,22 @@ repeat:
* will limit the total number of files that can be opened.
*/
error = -EMFILE;
- if (fd >= end)
+ if (unlikely(fd >= end))
goto out;
- error = expand_files(files, fd);
- if (error < 0)
- goto out;
+ if (unlikely(fd >= fdt->max_fds)) {
+ error = expand_files(files, fd);
+ if (error < 0)
+ goto out;
- /*
- * If we needed to expand the fs array we
- * might have blocked - try again.
- */
- if (error)
goto repeat;
+ }
if (start <= files->next_fd)
files->next_fd = fd + 1;
- __set_open_fd(fd, fdt);
- if (flags & O_CLOEXEC)
- __set_close_on_exec(fd, fdt);
- else
- __clear_close_on_exec(fd, fdt);
+ __set_open_fd(fd, fdt, flags & O_CLOEXEC);
error = fd;
-#if 1
- /* Sanity check */
- if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
- printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
- rcu_assign_pointer(fdt->fd[fd], NULL);
- }
-#endif
out:
spin_unlock(&files->file_lock);
@@ -599,7 +641,7 @@ void fd_install(unsigned int fd, struct file *file)
rcu_read_unlock_sched();
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
- BUG_ON(fdt->fd[fd] != NULL);
+ WARN_ON(fdt->fd[fd] != NULL);
rcu_assign_pointer(fdt->fd[fd], file);
spin_unlock(&files->file_lock);
return;
@@ -713,7 +755,7 @@ static inline void __range_close(struct files_struct *files, unsigned int fd,
}
/**
- * __close_range() - Close all file descriptors in a given range.
+ * sys_close_range() - Close all file descriptors in a given range.
*
* @fd: starting file descriptor to close
* @max_fd: last file descriptor to close
@@ -721,8 +763,10 @@ static inline void __range_close(struct files_struct *files, unsigned int fd,
*
* This closes a range of file descriptors. All file descriptors
* from @fd up to and including @max_fd are closed.
+ * Currently, errors to close a given file descriptor are ignored.
*/
-int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
+SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
+ unsigned int, flags)
{
struct task_struct *me = current;
struct files_struct *cur_fds = me->files, *fds = NULL;
@@ -839,7 +883,7 @@ static struct file *__get_file_rcu(struct file __rcu **f)
if (!file)
return NULL;
- if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
+ if (unlikely(!file_ref_get(&file->f_ref)))
return ERR_PTR(-EAGAIN);
file_reloaded = rcu_dereference_raw(*f);
@@ -853,8 +897,8 @@ static struct file *__get_file_rcu(struct file __rcu **f)
OPTIMIZER_HIDE_VAR(file_reloaded_cmp);
/*
- * atomic_long_inc_not_zero() above provided a full memory
- * barrier when we acquired a reference.
+ * file_ref_get() above provided a full memory barrier when we
+ * acquired a reference.
*
* This is paired with the write barrier from assigning to the
* __rcu protected file pointer so that if that pointer still
@@ -952,11 +996,11 @@ static inline struct file *__fget_files_rcu(struct files_struct *files,
* We need to confirm it by incrementing the refcount
* and then check the lookup again.
*
- * atomic_long_inc_not_zero() gives us a full memory
- * barrier. We only really need an 'acquire' one to
- * protect the loads below, but we don't have that.
+ * file_ref_get() gives us a full memory barrier. We
+ * only really need an 'acquire' one to protect the
+ * loads below, but we don't have that.
*/
- if (unlikely(!atomic_long_inc_not_zero(&file->f_count)))
+ if (unlikely(!file_ref_get(&file->f_ref)))
continue;
/*
@@ -1037,29 +1081,7 @@ struct file *fget_task(struct task_struct *task, unsigned int fd)
return file;
}
-struct file *lookup_fdget_rcu(unsigned int fd)
-{
- return __fget_files_rcu(current->files, fd, 0);
-
-}
-EXPORT_SYMBOL_GPL(lookup_fdget_rcu);
-
-struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd)
-{
- /* Must be called with rcu_read_lock held */
- struct files_struct *files;
- struct file *file = NULL;
-
- task_lock(task);
- files = task->files;
- if (files)
- file = __fget_files_rcu(files, fd, 0);
- task_unlock(task);
-
- return file;
-}
-
-struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *ret_fd)
+struct file *fget_task_next(struct task_struct *task, unsigned int *ret_fd)
{
/* Must be called with rcu_read_lock held */
struct files_struct *files;
@@ -1069,17 +1091,19 @@ struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *
task_lock(task);
files = task->files;
if (files) {
+ rcu_read_lock();
for (; fd < files_fdtable(files)->max_fds; fd++) {
file = __fget_files_rcu(files, fd, 0);
if (file)
break;
}
+ rcu_read_unlock();
}
task_unlock(task);
*ret_fd = fd;
return file;
}
-EXPORT_SYMBOL(task_lookup_next_fdget_rcu);
+EXPORT_SYMBOL(fget_task_next);
/*
* Lightweight file lookup - no refcnt increment if fd table isn't shared.
@@ -1096,6 +1120,13 @@ EXPORT_SYMBOL(task_lookup_next_fdget_rcu);
*
* The fput_needed flag returned by fget_light should be passed to the
* corresponding fput_light.
+ *
+ * (As an exception to rule 2, you can call filp_close between fget_light and
+ * fput_light provided that you capture a real refcount with get_file before
+ * the call to filp_close, and ensure that this real refcount is fput *after*
+ * the fput_light call.)
+ *
+ * See also the documentation in rust/kernel/file.rs.
*/
static inline struct fd __fget_light(unsigned int fd, fmode_t mask)
{
@@ -1176,13 +1207,8 @@ void __f_unlock_pos(struct file *f)
void set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
- struct fdtable *fdt;
spin_lock(&files->file_lock);
- fdt = files_fdtable(files);
- if (flag)
- __set_close_on_exec(fd, fdt);
- else
- __clear_close_on_exec(fd, fdt);
+ __set_close_on_exec(fd, files_fdtable(files), flag);
spin_unlock(&files->file_lock);
}
@@ -1223,11 +1249,7 @@ __releases(&files->file_lock)
goto Ebusy;
get_file(file);
rcu_assign_pointer(fdt->fd[fd], file);
- __set_open_fd(fd, fdt);
- if (flags & O_CLOEXEC)
- __set_close_on_exec(fd, fdt);
- else
- __clear_close_on_exec(fd, fdt);
+ __set_open_fd(fd, fdt, flags & O_CLOEXEC);
spin_unlock(&files->file_lock);
if (tofree)
diff --git a/fs/file_table.c b/fs/file_table.c
index eed5ffad9997..976736be47cb 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -9,7 +9,6 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/file.h>
-#include <linux/fdtable.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
@@ -40,13 +39,17 @@ static struct files_stat_struct files_stat = {
/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __ro_after_init;
+static struct kmem_cache *bfilp_cachep __ro_after_init;
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
/* Container for backing file with optional user path */
struct backing_file {
struct file file;
- struct path user_path;
+ union {
+ struct path user_path;
+ freeptr_t bf_freeptr;
+ };
};
static inline struct backing_file *backing_file(struct file *f)
@@ -68,7 +71,7 @@ static inline void file_free(struct file *f)
put_cred(f->f_cred);
if (unlikely(f->f_mode & FMODE_BACKING)) {
path_put(backing_file_user_path(f));
- kfree(backing_file(f));
+ kmem_cache_free(bfilp_cachep, backing_file(f));
} else {
kmem_cache_free(filp_cachep, f);
}
@@ -165,16 +168,32 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
* the respective member when opening the file.
*/
mutex_init(&f->f_pos_lock);
- f->f_flags = flags;
- f->f_mode = OPEN_FMODE(flags);
- /* f->f_version: 0 */
+ memset(&f->f_path, 0, sizeof(f->f_path));
+ memset(&f->f_ra, 0, sizeof(f->f_ra));
+
+ f->f_flags = flags;
+ f->f_mode = OPEN_FMODE(flags);
+
+ f->f_op = NULL;
+ f->f_mapping = NULL;
+ f->private_data = NULL;
+ f->f_inode = NULL;
+ f->f_owner = NULL;
+#ifdef CONFIG_EPOLL
+ f->f_ep = NULL;
+#endif
+
+ f->f_iocb_flags = 0;
+ f->f_pos = 0;
+ f->f_wb_err = 0;
+ f->f_sb_err = 0;
/*
* We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While
* fget-rcu pattern users need to be able to handle spurious
* refcount bumps we should reinitialize the reused file first.
*/
- atomic_long_set(&f->f_count, 1);
+ file_ref_init(&f->f_ref, 1);
return 0;
}
@@ -206,7 +225,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
goto over;
}
- f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+ f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
if (unlikely(!f))
return ERR_PTR(-ENOMEM);
@@ -240,7 +259,7 @@ struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
struct file *f;
int error;
- f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+ f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
if (unlikely(!f))
return ERR_PTR(-ENOMEM);
@@ -267,13 +286,13 @@ struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
struct backing_file *ff;
int error;
- ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL);
+ ff = kmem_cache_alloc(bfilp_cachep, GFP_KERNEL);
if (unlikely(!ff))
return ERR_PTR(-ENOMEM);
error = init_file(&ff->file, flags, cred);
if (unlikely(error)) {
- kfree(ff);
+ kmem_cache_free(bfilp_cachep, ff);
return ERR_PTR(error);
}
@@ -479,7 +498,7 @@ static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
void fput(struct file *file)
{
- if (atomic_long_dec_and_test(&file->f_count)) {
+ if (file_ref_put(&file->f_ref)) {
struct task_struct *task = current;
if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) {
@@ -512,7 +531,7 @@ void fput(struct file *file)
*/
void __fput_sync(struct file *file)
{
- if (atomic_long_dec_and_test(&file->f_count))
+ if (file_ref_put(&file->f_ref))
__fput(file);
}
@@ -529,6 +548,11 @@ void __init files_init(void)
filp_cachep = kmem_cache_create("filp", sizeof(struct file), &args,
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU);
+
+ args.freeptr_offset = offsetof(struct backing_file, bf_freeptr);
+ bfilp_cachep = kmem_cache_create("bfilp", sizeof(struct backing_file),
+ &args, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
+ SLAB_ACCOUNT | SLAB_TYPESAFE_BY_RCU);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}
diff --git a/fs/freevxfs/vxfs_dir.h b/fs/freevxfs/vxfs_dir.h
index fbcd603365ad..8c67627f2a3d 100644
--- a/fs/freevxfs/vxfs_dir.h
+++ b/fs/freevxfs/vxfs_dir.h
@@ -25,7 +25,7 @@
struct vxfs_dirblk {
__fs16 d_free; /* free space in dirblock */
__fs16 d_nhash; /* no of hash chains */
- __fs16 d_hash[1]; /* hash chain */
+ __fs16 d_hash[]; /* hash chain */
};
/*
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d8bec3c1bb1f..3cd99e2dc6ac 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -290,7 +290,6 @@ void __inode_attach_wb(struct inode *inode, struct folio *folio)
if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
wb_put(wb);
}
-EXPORT_SYMBOL_GPL(__inode_attach_wb);
/**
* inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
@@ -731,8 +730,9 @@ bool cleanup_offline_cgwb(struct bdi_writeback *wb)
* writeback completion, wbc_detach_inode() should be called. This is used
* to track the cgroup writeback context.
*/
-void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
- struct inode *inode)
+static void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
+ struct inode *inode)
+ __releases(&inode->i_lock)
{
if (!inode_cgwb_enabled(inode)) {
spin_unlock(&inode->i_lock);
@@ -762,7 +762,24 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
inode_switch_wbs(inode, wbc->wb_id);
}
-EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);
+
+/**
+ * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite
+ * @wbc: writeback_control of interest
+ * @inode: target inode
+ *
+ * This function is to be used by __filemap_fdatawrite_range(), which is an
+ * alternative entry point into writeback code, and first ensures @inode is
+ * associated with a bdi_writeback and attaches it to @wbc.
+ */
+void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
+ struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ inode_attach_wb(inode, NULL);
+ wbc_attach_and_unlock_inode(wbc, inode);
+}
+EXPORT_SYMBOL_GPL(wbc_attach_fdatawrite_inode);
/**
* wbc_detach_inode - disassociate wbc from inode and perform foreign detection
@@ -890,17 +907,16 @@ EXPORT_SYMBOL_GPL(wbc_detach_inode);
/**
* wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
* @wbc: writeback_control of the writeback in progress
- * @page: page being written out
+ * @folio: folio being written out
* @bytes: number of bytes being written out
*
- * @bytes from @page are about to written out during the writeback
+ * @bytes from @folio are about to written out during the writeback
* controlled by @wbc. Keep the book for foreign inode detection. See
* wbc_detach_inode().
*/
-void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
+void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio,
size_t bytes)
{
- struct folio *folio;
struct cgroup_subsys_state *css;
int id;
@@ -913,7 +929,6 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
if (!wbc->wb || wbc->no_cgroup_owner)
return;
- folio = page_folio(page);
css = mem_cgroup_css_from_folio(folio);
/* dead cgroups shouldn't contribute to inode ownership arbitration */
if (!(css->flags & CSS_ONLINE))
@@ -1227,6 +1242,13 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
}
}
+static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
+ struct inode *inode)
+ __releases(&inode->i_lock)
+{
+ spin_unlock(&inode->i_lock);
+}
+
#endif /* CONFIG_CGROUP_WRITEBACK */
/*
diff --git a/fs/fs_parser.c b/fs/fs_parser.c
index 24727ec34e5a..16fa61ef56bf 100644
--- a/fs/fs_parser.c
+++ b/fs/fs_parser.c
@@ -156,6 +156,7 @@ int fs_lookup_param(struct fs_context *fc,
f = getname_kernel(param->string);
if (IS_ERR(f))
return PTR_ERR(f);
+ param->dirfd = AT_FDCWD;
put_f = true;
break;
case fs_value_is_filename:
@@ -308,6 +309,26 @@ int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p,
}
EXPORT_SYMBOL(fs_param_is_fd);
+int fs_param_is_file_or_string(struct p_log *log,
+ const struct fs_parameter_spec *p,
+ struct fs_parameter *param,
+ struct fs_parse_result *result)
+{
+ switch (param->type) {
+ case fs_value_is_string:
+ return fs_param_is_string(log, p, param, result);
+ case fs_value_is_file:
+ result->uint_32 = param->dirfd;
+ if (result->uint_32 <= INT_MAX)
+ return 0;
+ break;
+ default:
+ break;
+ }
+ return fs_param_bad_value(log, param);
+}
+EXPORT_SYMBOL(fs_param_is_file_or_string);
+
int fs_param_is_uid(struct p_log *log, const struct fs_parameter_spec *p,
struct fs_parameter *param, struct fs_parse_result *result)
{
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index d418d8b5367f..3334c394ce9c 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -190,6 +190,5 @@ const struct export_operations gfs2_export_ops = {
.fh_to_parent = gfs2_fh_to_parent,
.get_name = gfs2_get_name,
.get_parent = gfs2_get_parent,
- .flags = EXPORT_OP_ASYNC_LOCK,
};
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index f7dd64856c9b..1e73cf87ff88 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1586,6 +1586,7 @@ const struct file_operations gfs2_file_fops = {
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
+ .fop_flags = FOP_ASYNC_LOCK,
};
const struct file_operations gfs2_dir_fops = {
@@ -1598,6 +1599,7 @@ const struct file_operations gfs2_dir_fops = {
.lock = gfs2_lock,
.flock = gfs2_flock,
.llseek = default_llseek,
+ .fop_flags = FOP_ASYNC_LOCK,
};
#endif /* CONFIG_GFS2_FS_LOCKING_DLM */
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 269c3bc7fced..4701c4aafbf4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -34,7 +34,6 @@
#include <linux/lockref.h>
#include <linux/rhashtable.h>
#include <linux/pid_namespace.h>
-#include <linux/fdtable.h>
#include <linux/file.h>
#include "gfs2.h"
@@ -2768,25 +2767,18 @@ static struct file *gfs2_glockfd_next_file(struct gfs2_glockfd_iter *i)
i->file = NULL;
}
- rcu_read_lock();
for(;; i->fd++) {
- struct inode *inode;
-
- i->file = task_lookup_next_fdget_rcu(i->task, &i->fd);
+ i->file = fget_task_next(i->task, &i->fd);
if (!i->file) {
i->fd = 0;
break;
}
- inode = file_inode(i->file);
- if (inode->i_sb == i->sb)
+ if (file_inode(i->file)->i_sb == i->sb)
break;
- rcu_read_unlock();
fput(i->file);
- rcu_read_lock();
}
- rcu_read_unlock();
return i->file;
}
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index eeac99765f0d..3bee9b5dba5e 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -15,10 +15,11 @@
#include <linux/module.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/mount.h>
#include <linux/init.h>
#include <linux/nls.h>
-#include <linux/parser.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/vfs.h>
@@ -111,21 +112,24 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static int hfs_remount(struct super_block *sb, int *flags, char *data)
+static int hfs_reconfigure(struct fs_context *fc)
{
+ struct super_block *sb = fc->root->d_sb;
+
sync_filesystem(sb);
- *flags |= SB_NODIRATIME;
- if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
+ fc->sb_flags |= SB_NODIRATIME;
+ if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb))
return 0;
- if (!(*flags & SB_RDONLY)) {
+
+ if (!(fc->sb_flags & SB_RDONLY)) {
if (!(HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_UNMNT))) {
pr_warn("filesystem was not cleanly unmounted, running fsck.hfs is recommended. leaving read-only.\n");
sb->s_flags |= SB_RDONLY;
- *flags |= SB_RDONLY;
+ fc->sb_flags |= SB_RDONLY;
} else if (HFS_SB(sb)->mdb->drAtrb & cpu_to_be16(HFS_SB_ATTRIB_SLOCK)) {
pr_warn("filesystem is marked locked, leaving read-only.\n");
sb->s_flags |= SB_RDONLY;
- *flags |= SB_RDONLY;
+ fc->sb_flags |= SB_RDONLY;
}
}
return 0;
@@ -180,7 +184,6 @@ static const struct super_operations hfs_super_operations = {
.put_super = hfs_put_super,
.sync_fs = hfs_sync_fs,
.statfs = hfs_statfs,
- .remount_fs = hfs_remount,
.show_options = hfs_show_options,
};
@@ -188,181 +191,112 @@ enum {
opt_uid, opt_gid, opt_umask, opt_file_umask, opt_dir_umask,
opt_part, opt_session, opt_type, opt_creator, opt_quiet,
opt_codepage, opt_iocharset,
- opt_err
};
-static const match_table_t tokens = {
- { opt_uid, "uid=%u" },
- { opt_gid, "gid=%u" },
- { opt_umask, "umask=%o" },
- { opt_file_umask, "file_umask=%o" },
- { opt_dir_umask, "dir_umask=%o" },
- { opt_part, "part=%u" },
- { opt_session, "session=%u" },
- { opt_type, "type=%s" },
- { opt_creator, "creator=%s" },
- { opt_quiet, "quiet" },
- { opt_codepage, "codepage=%s" },
- { opt_iocharset, "iocharset=%s" },
- { opt_err, NULL }
+static const struct fs_parameter_spec hfs_param_spec[] = {
+ fsparam_u32 ("uid", opt_uid),
+ fsparam_u32 ("gid", opt_gid),
+ fsparam_u32oct ("umask", opt_umask),
+ fsparam_u32oct ("file_umask", opt_file_umask),
+ fsparam_u32oct ("dir_umask", opt_dir_umask),
+ fsparam_u32 ("part", opt_part),
+ fsparam_u32 ("session", opt_session),
+ fsparam_string ("type", opt_type),
+ fsparam_string ("creator", opt_creator),
+ fsparam_flag ("quiet", opt_quiet),
+ fsparam_string ("codepage", opt_codepage),
+ fsparam_string ("iocharset", opt_iocharset),
+ {}
};
-static inline int match_fourchar(substring_t *arg, u32 *result)
-{
- if (arg->to - arg->from != 4)
- return -EINVAL;
- memcpy(result, arg->from, 4);
- return 0;
-}
-
/*
- * parse_options()
+ * hfs_parse_param()
*
- * adapted from linux/fs/msdos/inode.c written 1992,93 by Werner Almesberger
- * This function is called by hfs_read_super() to parse the mount options.
+ * This function is called by the vfs to parse the mount options.
*/
-static int parse_options(char *options, struct hfs_sb_info *hsb)
+static int hfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int tmp, token;
-
- /* initialize the sb with defaults */
- hsb->s_uid = current_uid();
- hsb->s_gid = current_gid();
- hsb->s_file_umask = 0133;
- hsb->s_dir_umask = 0022;
- hsb->s_type = hsb->s_creator = cpu_to_be32(0x3f3f3f3f); /* == '????' */
- hsb->s_quiet = 0;
- hsb->part = -1;
- hsb->session = -1;
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case opt_uid:
- if (match_int(&args[0], &tmp)) {
- pr_err("uid requires an argument\n");
- return 0;
- }
- hsb->s_uid = make_kuid(current_user_ns(), (uid_t)tmp);
- if (!uid_valid(hsb->s_uid)) {
- pr_err("invalid uid %d\n", tmp);
- return 0;
- }
- break;
- case opt_gid:
- if (match_int(&args[0], &tmp)) {
- pr_err("gid requires an argument\n");
- return 0;
- }
- hsb->s_gid = make_kgid(current_user_ns(), (gid_t)tmp);
- if (!gid_valid(hsb->s_gid)) {
- pr_err("invalid gid %d\n", tmp);
- return 0;
- }
- break;
- case opt_umask:
- if (match_octal(&args[0], &tmp)) {
- pr_err("umask requires a value\n");
- return 0;
- }
- hsb->s_file_umask = (umode_t)tmp;
- hsb->s_dir_umask = (umode_t)tmp;
- break;
- case opt_file_umask:
- if (match_octal(&args[0], &tmp)) {
- pr_err("file_umask requires a value\n");
- return 0;
- }
- hsb->s_file_umask = (umode_t)tmp;
- break;
- case opt_dir_umask:
- if (match_octal(&args[0], &tmp)) {
- pr_err("dir_umask requires a value\n");
- return 0;
- }
- hsb->s_dir_umask = (umode_t)tmp;
- break;
- case opt_part:
- if (match_int(&args[0], &hsb->part)) {
- pr_err("part requires an argument\n");
- return 0;
- }
- break;
- case opt_session:
- if (match_int(&args[0], &hsb->session)) {
- pr_err("session requires an argument\n");
- return 0;
- }
- break;
- case opt_type:
- if (match_fourchar(&args[0], &hsb->s_type)) {
- pr_err("type requires a 4 character value\n");
- return 0;
- }
- break;
- case opt_creator:
- if (match_fourchar(&args[0], &hsb->s_creator)) {
- pr_err("creator requires a 4 character value\n");
- return 0;
- }
- break;
- case opt_quiet:
- hsb->s_quiet = 1;
- break;
- case opt_codepage:
- if (hsb->nls_disk) {
- pr_err("unable to change codepage\n");
- return 0;
- }
- p = match_strdup(&args[0]);
- if (p)
- hsb->nls_disk = load_nls(p);
- if (!hsb->nls_disk) {
- pr_err("unable to load codepage \"%s\"\n", p);
- kfree(p);
- return 0;
- }
- kfree(p);
- break;
- case opt_iocharset:
- if (hsb->nls_io) {
- pr_err("unable to change iocharset\n");
- return 0;
- }
- p = match_strdup(&args[0]);
- if (p)
- hsb->nls_io = load_nls(p);
- if (!hsb->nls_io) {
- pr_err("unable to load iocharset \"%s\"\n", p);
- kfree(p);
- return 0;
- }
- kfree(p);
- break;
- default:
- return 0;
- }
- }
+ struct hfs_sb_info *hsb = fc->s_fs_info;
+ struct fs_parse_result result;
+ int opt;
+
+ /* hfs does not honor any fs-specific options on remount */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE)
+ return 0;
- if (hsb->nls_disk && !hsb->nls_io) {
- hsb->nls_io = load_nls_default();
+ opt = fs_parse(fc, hfs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case opt_uid:
+ hsb->s_uid = result.uid;
+ break;
+ case opt_gid:
+ hsb->s_gid = result.gid;
+ break;
+ case opt_umask:
+ hsb->s_file_umask = (umode_t)result.uint_32;
+ hsb->s_dir_umask = (umode_t)result.uint_32;
+ break;
+ case opt_file_umask:
+ hsb->s_file_umask = (umode_t)result.uint_32;
+ break;
+ case opt_dir_umask:
+ hsb->s_dir_umask = (umode_t)result.uint_32;
+ break;
+ case opt_part:
+ hsb->part = result.uint_32;
+ break;
+ case opt_session:
+ hsb->session = result.uint_32;
+ break;
+ case opt_type:
+ if (strlen(param->string) != 4) {
+ pr_err("type requires a 4 character value\n");
+ return -EINVAL;
+ }
+ memcpy(&hsb->s_type, param->string, 4);
+ break;
+ case opt_creator:
+ if (strlen(param->string) != 4) {
+ pr_err("creator requires a 4 character value\n");
+ return -EINVAL;
+ }
+ memcpy(&hsb->s_creator, param->string, 4);
+ break;
+ case opt_quiet:
+ hsb->s_quiet = 1;
+ break;
+ case opt_codepage:
+ if (hsb->nls_disk) {
+ pr_err("unable to change codepage\n");
+ return -EINVAL;
+ }
+ hsb->nls_disk = load_nls(param->string);
+ if (!hsb->nls_disk) {
+ pr_err("unable to load codepage \"%s\"\n",
+ param->string);
+ return -EINVAL;
+ }
+ break;
+ case opt_iocharset:
+ if (hsb->nls_io) {
+ pr_err("unable to change iocharset\n");
+ return -EINVAL;
+ }
+ hsb->nls_io = load_nls(param->string);
if (!hsb->nls_io) {
- pr_err("unable to load default iocharset\n");
- return 0;
+ pr_err("unable to load iocharset \"%s\"\n",
+ param->string);
+ return -EINVAL;
}
+ break;
+ default:
+ return -EINVAL;
}
- hsb->s_dir_umask &= 0777;
- hsb->s_file_umask &= 0577;
- return 1;
+ return 0;
}
/*
@@ -376,29 +310,25 @@ static int parse_options(char *options, struct hfs_sb_info *hsb)
* hfs_btree_init() to get the necessary data about the extents and
* catalog B-trees and, finally, reading the root inode into memory.
*/
-static int hfs_fill_super(struct super_block *sb, void *data, int silent)
+static int hfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
- struct hfs_sb_info *sbi;
+ struct hfs_sb_info *sbi = HFS_SB(sb);
struct hfs_find_data fd;
hfs_cat_rec rec;
struct inode *root_inode;
+ int silent = fc->sb_flags & SB_SILENT;
int res;
- sbi = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL);
- if (!sbi)
- return -ENOMEM;
+ /* load_nls_default does not fail */
+ if (sbi->nls_disk && !sbi->nls_io)
+ sbi->nls_io = load_nls_default();
+ sbi->s_dir_umask &= 0777;
+ sbi->s_file_umask &= 0577;
- sbi->sb = sb;
- sb->s_fs_info = sbi;
spin_lock_init(&sbi->work_lock);
INIT_DELAYED_WORK(&sbi->mdb_work, flush_mdb);
- res = -EINVAL;
- if (!parse_options((char *)data, sbi)) {
- pr_err("unable to parse mount options\n");
- goto bail;
- }
-
+ sbi->sb = sb;
sb->s_op = &hfs_super_operations;
sb->s_xattr = hfs_xattr_handlers;
sb->s_flags |= SB_NODIRATIME;
@@ -451,18 +381,56 @@ bail:
return res;
}
-static struct dentry *hfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int hfs_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, hfs_fill_super);
+}
+
+static void hfs_free_fc(struct fs_context *fc)
+{
+ kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations hfs_context_ops = {
+ .parse_param = hfs_parse_param,
+ .get_tree = hfs_get_tree,
+ .reconfigure = hfs_reconfigure,
+ .free = hfs_free_fc,
+};
+
+static int hfs_init_fs_context(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
+ struct hfs_sb_info *hsb;
+
+ hsb = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL);
+ if (!hsb)
+ return -ENOMEM;
+
+ fc->s_fs_info = hsb;
+ fc->ops = &hfs_context_ops;
+
+ if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE) {
+ /* initialize options with defaults */
+ hsb->s_uid = current_uid();
+ hsb->s_gid = current_gid();
+ hsb->s_file_umask = 0133;
+ hsb->s_dir_umask = 0022;
+ hsb->s_type = cpu_to_be32(0x3f3f3f3f); /* == '????' */
+ hsb->s_creator = cpu_to_be32(0x3f3f3f3f); /* == '????' */
+ hsb->s_quiet = 0;
+ hsb->part = -1;
+ hsb->session = -1;
+ }
+
+ return 0;
}
static struct file_system_type hfs_fs_type = {
.owner = THIS_MODULE,
.name = "hfs",
- .mount = hfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = hfs_init_fs_context,
};
MODULE_ALIAS_FS("hfs");
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 59ce81dca73f..2f089bff0095 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -21,6 +21,7 @@
#include <linux/mutex.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
+#include <linux/fs_context.h>
#include "hfsplus_raw.h"
#define DBG_BNODE_REFS 0x00000001
@@ -156,6 +157,7 @@ struct hfsplus_sb_info {
/* Runtime variables */
u32 blockoffset;
+ u32 min_io_size;
sector_t part_start;
sector_t sect_count;
int fs_shift;
@@ -307,7 +309,7 @@ struct hfsplus_readdir_data {
*/
static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
{
- return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev),
+ return max_t(unsigned short, HFSPLUS_SB(sb)->min_io_size,
HFSPLUS_SECTOR_SIZE);
}
@@ -496,8 +498,7 @@ long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
/* options.c */
void hfsplus_fill_defaults(struct hfsplus_sb_info *opts);
-int hfsplus_parse_options_remount(char *input, int *force);
-int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi);
+int hfsplus_parse_param(struct fs_context *fc, struct fs_parameter *param);
int hfsplus_show_options(struct seq_file *seq, struct dentry *root);
/* part_tbl.c */
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index c94a58762ad6..a66a09a56bf7 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -12,7 +12,8 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/sched.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/nls.h>
#include <linux/mount.h>
#include <linux/seq_file.h>
@@ -23,26 +24,23 @@ enum {
opt_creator, opt_type,
opt_umask, opt_uid, opt_gid,
opt_part, opt_session, opt_nls,
- opt_nodecompose, opt_decompose,
- opt_barrier, opt_nobarrier,
- opt_force, opt_err
+ opt_decompose, opt_barrier,
+ opt_force,
};
-static const match_table_t tokens = {
- { opt_creator, "creator=%s" },
- { opt_type, "type=%s" },
- { opt_umask, "umask=%o" },
- { opt_uid, "uid=%u" },
- { opt_gid, "gid=%u" },
- { opt_part, "part=%u" },
- { opt_session, "session=%u" },
- { opt_nls, "nls=%s" },
- { opt_decompose, "decompose" },
- { opt_nodecompose, "nodecompose" },
- { opt_barrier, "barrier" },
- { opt_nobarrier, "nobarrier" },
- { opt_force, "force" },
- { opt_err, NULL }
+static const struct fs_parameter_spec hfs_param_spec[] = {
+ fsparam_string ("creator", opt_creator),
+ fsparam_string ("type", opt_type),
+ fsparam_u32oct ("umask", opt_umask),
+ fsparam_u32 ("uid", opt_uid),
+ fsparam_u32 ("gid", opt_gid),
+ fsparam_u32 ("part", opt_part),
+ fsparam_u32 ("session", opt_session),
+ fsparam_string ("nls", opt_nls),
+ fsparam_flag_no ("decompose", opt_decompose),
+ fsparam_flag_no ("barrier", opt_barrier),
+ fsparam_flag ("force", opt_force),
+ {}
};
/* Initialize an options object to reasonable defaults */
@@ -60,162 +58,89 @@ void hfsplus_fill_defaults(struct hfsplus_sb_info *opts)
opts->session = -1;
}
-/* convert a "four byte character" to a 32 bit int with error checks */
-static inline int match_fourchar(substring_t *arg, u32 *result)
+/* Parse options from mount. Returns nonzero errno on failure */
+int hfsplus_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- if (arg->to - arg->from != 4)
- return -EINVAL;
- memcpy(result, arg->from, 4);
- return 0;
-}
-
-int hfsplus_parse_options_remount(char *input, int *force)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int token;
-
- if (!input)
- return 1;
-
- while ((p = strsep(&input, ",")) != NULL) {
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case opt_force:
- *force = 1;
- break;
- default:
- break;
+ struct hfsplus_sb_info *sbi = fc->s_fs_info;
+ struct fs_parse_result result;
+ int opt;
+
+ /*
+ * Only the force option is examined during remount, all others
+ * are ignored.
+ */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
+ strncmp(param->key, "force", 5))
+ return 0;
+
+ opt = fs_parse(fc, hfs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case opt_creator:
+ if (strlen(param->string) != 4) {
+ pr_err("creator requires a 4 character value\n");
+ return -EINVAL;
}
- }
-
- return 1;
-}
-
-/* Parse options from mount. Returns 0 on failure */
-/* input is the options passed to mount() as a string */
-int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi)
-{
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int tmp, token;
-
- if (!input)
- goto done;
-
- while ((p = strsep(&input, ",")) != NULL) {
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case opt_creator:
- if (match_fourchar(&args[0], &sbi->creator)) {
- pr_err("creator requires a 4 character value\n");
- return 0;
- }
- break;
- case opt_type:
- if (match_fourchar(&args[0], &sbi->type)) {
- pr_err("type requires a 4 character value\n");
- return 0;
- }
- break;
- case opt_umask:
- if (match_octal(&args[0], &tmp)) {
- pr_err("umask requires a value\n");
- return 0;
- }
- sbi->umask = (umode_t)tmp;
- break;
- case opt_uid:
- if (match_int(&args[0], &tmp)) {
- pr_err("uid requires an argument\n");
- return 0;
- }
- sbi->uid = make_kuid(current_user_ns(), (uid_t)tmp);
- if (!uid_valid(sbi->uid)) {
- pr_err("invalid uid specified\n");
- return 0;
- } else {
- set_bit(HFSPLUS_SB_UID, &sbi->flags);
- }
- break;
- case opt_gid:
- if (match_int(&args[0], &tmp)) {
- pr_err("gid requires an argument\n");
- return 0;
- }
- sbi->gid = make_kgid(current_user_ns(), (gid_t)tmp);
- if (!gid_valid(sbi->gid)) {
- pr_err("invalid gid specified\n");
- return 0;
- } else {
- set_bit(HFSPLUS_SB_GID, &sbi->flags);
- }
- break;
- case opt_part:
- if (match_int(&args[0], &sbi->part)) {
- pr_err("part requires an argument\n");
- return 0;
- }
- break;
- case opt_session:
- if (match_int(&args[0], &sbi->session)) {
- pr_err("session requires an argument\n");
- return 0;
- }
- break;
- case opt_nls:
- if (sbi->nls) {
- pr_err("unable to change nls mapping\n");
- return 0;
- }
- p = match_strdup(&args[0]);
- if (p)
- sbi->nls = load_nls(p);
- if (!sbi->nls) {
- pr_err("unable to load nls mapping \"%s\"\n",
- p);
- kfree(p);
- return 0;
- }
- kfree(p);
- break;
- case opt_decompose:
- clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags);
- break;
- case opt_nodecompose:
+ memcpy(&sbi->creator, param->string, 4);
+ break;
+ case opt_type:
+ if (strlen(param->string) != 4) {
+ pr_err("type requires a 4 character value\n");
+ return -EINVAL;
+ }
+ memcpy(&sbi->type, param->string, 4);
+ break;
+ case opt_umask:
+ sbi->umask = (umode_t)result.uint_32;
+ break;
+ case opt_uid:
+ sbi->uid = result.uid;
+ set_bit(HFSPLUS_SB_UID, &sbi->flags);
+ break;
+ case opt_gid:
+ sbi->gid = result.gid;
+ set_bit(HFSPLUS_SB_GID, &sbi->flags);
+ break;
+ case opt_part:
+ sbi->part = result.uint_32;
+ break;
+ case opt_session:
+ sbi->session = result.uint_32;
+ break;
+ case opt_nls:
+ if (sbi->nls) {
+ pr_err("unable to change nls mapping\n");
+ return -EINVAL;
+ }
+ sbi->nls = load_nls(param->string);
+ if (!sbi->nls) {
+ pr_err("unable to load nls mapping \"%s\"\n",
+ param->string);
+ return -EINVAL;
+ }
+ break;
+ case opt_decompose:
+ if (result.negated)
set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags);
- break;
- case opt_barrier:
- clear_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags);
- break;
- case opt_nobarrier:
+ else
+ clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags);
+ break;
+ case opt_barrier:
+ if (result.negated)
set_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags);
- break;
- case opt_force:
- set_bit(HFSPLUS_SB_FORCE, &sbi->flags);
- break;
- default:
- return 0;
- }
- }
-
-done:
- if (!sbi->nls) {
- /* try utf8 first, as this is the old default behaviour */
- sbi->nls = load_nls("utf8");
- if (!sbi->nls)
- sbi->nls = load_nls_default();
- if (!sbi->nls)
- return 0;
+ else
+ clear_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags);
+ break;
+ case opt_force:
+ set_bit(HFSPLUS_SB_FORCE, &sbi->flags);
+ break;
+ default:
+ return -EINVAL;
}
- return 1;
+ return 0;
}
int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 97920202790f..948b8aaee33e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -14,6 +14,7 @@
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
#include <linux/slab.h>
#include <linux/vfs.h>
#include <linux/nls.h>
@@ -332,34 +333,33 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
+static int hfsplus_reconfigure(struct fs_context *fc)
{
+ struct super_block *sb = fc->root->d_sb;
+
sync_filesystem(sb);
- if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb))
+ if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb))
return 0;
- if (!(*flags & SB_RDONLY)) {
- struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr;
- int force = 0;
-
- if (!hfsplus_parse_options_remount(data, &force))
- return -EINVAL;
+ if (!(fc->sb_flags & SB_RDONLY)) {
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
+ struct hfsplus_vh *vhdr = sbi->s_vhdr;
if (!(vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_UNMNT))) {
pr_warn("filesystem was not cleanly unmounted, running fsck.hfsplus is recommended. leaving read-only.\n");
sb->s_flags |= SB_RDONLY;
- *flags |= SB_RDONLY;
- } else if (force) {
+ fc->sb_flags |= SB_RDONLY;
+ } else if (test_bit(HFSPLUS_SB_FORCE, &sbi->flags)) {
/* nothing */
} else if (vhdr->attributes &
cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) {
pr_warn("filesystem is marked locked, leaving read-only.\n");
sb->s_flags |= SB_RDONLY;
- *flags |= SB_RDONLY;
+ fc->sb_flags |= SB_RDONLY;
} else if (vhdr->attributes &
cpu_to_be32(HFSPLUS_VOL_JOURNALED)) {
pr_warn("filesystem is marked journaled, leaving read-only.\n");
sb->s_flags |= SB_RDONLY;
- *flags |= SB_RDONLY;
+ fc->sb_flags |= SB_RDONLY;
}
}
return 0;
@@ -373,38 +373,33 @@ static const struct super_operations hfsplus_sops = {
.put_super = hfsplus_put_super,
.sync_fs = hfsplus_sync_fs,
.statfs = hfsplus_statfs,
- .remount_fs = hfsplus_remount,
.show_options = hfsplus_show_options,
};
-static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
+static int hfsplus_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct hfsplus_vh *vhdr;
- struct hfsplus_sb_info *sbi;
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
hfsplus_cat_entry entry;
struct hfs_find_data fd;
struct inode *root, *inode;
struct qstr str;
- struct nls_table *nls = NULL;
+ struct nls_table *nls;
u64 last_fs_block, last_fs_page;
+ int silent = fc->sb_flags & SB_SILENT;
int err;
- err = -ENOMEM;
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- goto out;
-
- sb->s_fs_info = sbi;
mutex_init(&sbi->alloc_mutex);
mutex_init(&sbi->vh_mutex);
spin_lock_init(&sbi->work_lock);
INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
- hfsplus_fill_defaults(sbi);
err = -EINVAL;
- if (!hfsplus_parse_options(data, sbi)) {
- pr_err("unable to parse mount options\n");
- goto out_unload_nls;
+ if (!sbi->nls) {
+ /* try utf8 first, as this is the old default behaviour */
+ sbi->nls = load_nls("utf8");
+ if (!sbi->nls)
+ sbi->nls = load_nls_default();
}
/* temporarily use utf8 to correctly find the hidden dir below */
@@ -616,7 +611,6 @@ out_unload_nls:
unload_nls(sbi->nls);
unload_nls(nls);
kfree(sbi);
-out:
return err;
}
@@ -641,18 +635,46 @@ static void hfsplus_free_inode(struct inode *inode)
#define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info)
-static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int hfsplus_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, hfsplus_fill_super);
+}
+
+static void hfsplus_free_fc(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
+ kfree(fc->s_fs_info);
+}
+
+static const struct fs_context_operations hfsplus_context_ops = {
+ .parse_param = hfsplus_parse_param,
+ .get_tree = hfsplus_get_tree,
+ .reconfigure = hfsplus_reconfigure,
+ .free = hfsplus_free_fc,
+};
+
+static int hfsplus_init_fs_context(struct fs_context *fc)
+{
+ struct hfsplus_sb_info *sbi;
+
+ sbi = kzalloc(sizeof(struct hfsplus_sb_info), GFP_KERNEL);
+ if (!sbi)
+ return -ENOMEM;
+
+ if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE)
+ hfsplus_fill_defaults(sbi);
+
+ fc->s_fs_info = sbi;
+ fc->ops = &hfsplus_context_ops;
+
+ return 0;
}
static struct file_system_type hfsplus_fs_type = {
.owner = THIS_MODULE,
.name = "hfsplus",
- .mount = hfsplus_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = hfsplus_init_fs_context,
};
MODULE_ALIAS_FS("hfsplus");
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 9592ffcb44e5..74801911bc1c 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -172,6 +172,8 @@ int hfsplus_read_wrapper(struct super_block *sb)
if (!blocksize)
goto out;
+ sbi->min_io_size = blocksize;
+
if (hfsplus_get_last_session(sb, &part_start, &part_size))
goto out;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index e73717daa5f9..27567920abe4 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -9,7 +9,8 @@
#include "hpfs_fn.h"
#include <linux/module.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/init.h>
#include <linux/statfs.h>
#include <linux/magic.h>
@@ -90,7 +91,7 @@ void hpfs_error(struct super_block *s, const char *fmt, ...)
hpfs_sb(s)->sb_was_error = 1;
}
-/*
+/*
* A little trick to detect cycles in many hpfs structures and don't let the
* kernel crash on corrupted filesystem. When first called, set c2 to 0.
*
@@ -272,146 +273,70 @@ static void destroy_inodecache(void)
kmem_cache_destroy(hpfs_inode_cachep);
}
-/*
- * A tiny parser for option strings, stolen from dosfs.
- * Stolen again from read-only hpfs.
- * And updated for table-driven option parsing.
- */
-
enum {
- Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case_lower, Opt_case_asis,
- Opt_check_none, Opt_check_normal, Opt_check_strict,
- Opt_err_cont, Opt_err_ro, Opt_err_panic,
- Opt_eas_no, Opt_eas_ro, Opt_eas_rw,
- Opt_chkdsk_no, Opt_chkdsk_errors, Opt_chkdsk_always,
- Opt_timeshift, Opt_err,
+ Opt_help, Opt_uid, Opt_gid, Opt_umask, Opt_case,
+ Opt_check, Opt_err, Opt_eas, Opt_chkdsk, Opt_timeshift,
};
-static const match_table_t tokens = {
- {Opt_help, "help"},
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_umask, "umask=%o"},
- {Opt_case_lower, "case=lower"},
- {Opt_case_asis, "case=asis"},
- {Opt_check_none, "check=none"},
- {Opt_check_normal, "check=normal"},
- {Opt_check_strict, "check=strict"},
- {Opt_err_cont, "errors=continue"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_err_panic, "errors=panic"},
- {Opt_eas_no, "eas=no"},
- {Opt_eas_ro, "eas=ro"},
- {Opt_eas_rw, "eas=rw"},
- {Opt_chkdsk_no, "chkdsk=no"},
- {Opt_chkdsk_errors, "chkdsk=errors"},
- {Opt_chkdsk_always, "chkdsk=always"},
- {Opt_timeshift, "timeshift=%d"},
- {Opt_err, NULL},
+static const struct constant_table hpfs_param_case[] = {
+ {"asis", 0},
+ {"lower", 1},
+ {}
};
-static int parse_opts(char *opts, kuid_t *uid, kgid_t *gid, umode_t *umask,
- int *lowercase, int *eas, int *chk, int *errs,
- int *chkdsk, int *timeshift)
-{
- char *p;
- int option;
+static const struct constant_table hpfs_param_check[] = {
+ {"none", 0},
+ {"normal", 1},
+ {"strict", 2},
+ {}
+};
- if (!opts)
- return 1;
+static const struct constant_table hpfs_param_err[] = {
+ {"continue", 0},
+ {"remount-ro", 1},
+ {"panic", 2},
+ {}
+};
- /*pr_info("Parsing opts: '%s'\n",opts);*/
-
- while ((p = strsep(&opts, ",")) != NULL) {
- substring_t args[MAX_OPT_ARGS];
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_help:
- return 2;
- case Opt_uid:
- if (match_int(args, &option))
- return 0;
- *uid = make_kuid(current_user_ns(), option);
- if (!uid_valid(*uid))
- return 0;
- break;
- case Opt_gid:
- if (match_int(args, &option))
- return 0;
- *gid = make_kgid(current_user_ns(), option);
- if (!gid_valid(*gid))
- return 0;
- break;
- case Opt_umask:
- if (match_octal(args, &option))
- return 0;
- *umask = option;
- break;
- case Opt_case_lower:
- *lowercase = 1;
- break;
- case Opt_case_asis:
- *lowercase = 0;
- break;
- case Opt_check_none:
- *chk = 0;
- break;
- case Opt_check_normal:
- *chk = 1;
- break;
- case Opt_check_strict:
- *chk = 2;
- break;
- case Opt_err_cont:
- *errs = 0;
- break;
- case Opt_err_ro:
- *errs = 1;
- break;
- case Opt_err_panic:
- *errs = 2;
- break;
- case Opt_eas_no:
- *eas = 0;
- break;
- case Opt_eas_ro:
- *eas = 1;
- break;
- case Opt_eas_rw:
- *eas = 2;
- break;
- case Opt_chkdsk_no:
- *chkdsk = 0;
- break;
- case Opt_chkdsk_errors:
- *chkdsk = 1;
- break;
- case Opt_chkdsk_always:
- *chkdsk = 2;
- break;
- case Opt_timeshift:
- {
- int m = 1;
- char *rhs = args[0].from;
- if (!rhs || !*rhs)
- return 0;
- if (*rhs == '-') m = -1;
- if (*rhs == '+' || *rhs == '-') rhs++;
- *timeshift = simple_strtoul(rhs, &rhs, 0) * m;
- if (*rhs)
- return 0;
- break;
- }
- default:
- return 0;
- }
- }
- return 1;
-}
+static const struct constant_table hpfs_param_eas[] = {
+ {"no", 0},
+ {"ro", 1},
+ {"rw", 2},
+ {}
+};
+
+static const struct constant_table hpfs_param_chkdsk[] = {
+ {"no", 0},
+ {"errors", 1},
+ {"always", 2},
+ {}
+};
+
+static const struct fs_parameter_spec hpfs_param_spec[] = {
+ fsparam_flag ("help", Opt_help),
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
+ fsparam_u32oct ("umask", Opt_umask),
+ fsparam_enum ("case", Opt_case, hpfs_param_case),
+ fsparam_enum ("check", Opt_check, hpfs_param_check),
+ fsparam_enum ("errors", Opt_err, hpfs_param_err),
+ fsparam_enum ("eas", Opt_eas, hpfs_param_eas),
+ fsparam_enum ("chkdsk", Opt_chkdsk, hpfs_param_chkdsk),
+ fsparam_s32 ("timeshift", Opt_timeshift),
+ {}
+};
+
+struct hpfs_fc_context {
+ kuid_t uid;
+ kgid_t gid;
+ umode_t umask;
+ int lowercase;
+ int eas;
+ int chk;
+ int errs;
+ int chkdsk;
+ int timeshift;
+};
static inline void hpfs_help(void)
{
@@ -439,49 +364,92 @@ HPFS filesystem options:\n\
\n");
}
-static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
+static int hpfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- kuid_t uid;
- kgid_t gid;
- umode_t umask;
- int lowercase, eas, chk, errs, chkdsk, timeshift;
- int o;
+ struct hpfs_fc_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, hpfs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_help:
+ hpfs_help();
+ return -EINVAL;
+ case Opt_uid:
+ ctx->uid = result.uid;
+ break;
+ case Opt_gid:
+ ctx->gid = result.gid;
+ break;
+ case Opt_umask:
+ ctx->umask = result.uint_32;
+ break;
+ case Opt_case:
+ ctx->lowercase = result.uint_32;
+ break;
+ case Opt_check:
+ ctx->chk = result.uint_32;
+ break;
+ case Opt_err:
+ ctx->errs = result.uint_32;
+ break;
+ case Opt_eas:
+ ctx->eas = result.uint_32;
+ break;
+ case Opt_chkdsk:
+ ctx->chkdsk = result.uint_32;
+ break;
+ case Opt_timeshift:
+ {
+ int m = 1;
+ char *rhs = param->string;
+ int timeshift;
+
+ if (*rhs == '-') m = -1;
+ if (*rhs == '+' || *rhs == '-') rhs++;
+ timeshift = simple_strtoul(rhs, &rhs, 0) * m;
+ if (*rhs)
+ return -EINVAL;
+ ctx->timeshift = timeshift;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hpfs_reconfigure(struct fs_context *fc)
+{
+ struct hpfs_fc_context *ctx = fc->fs_private;
+ struct super_block *s = fc->root->d_sb;
struct hpfs_sb_info *sbi = hpfs_sb(s);
sync_filesystem(s);
- *flags |= SB_NOATIME;
+ fc->sb_flags |= SB_NOATIME;
hpfs_lock(s);
- uid = sbi->sb_uid; gid = sbi->sb_gid;
- umask = 0777 & ~sbi->sb_mode;
- lowercase = sbi->sb_lowercase;
- eas = sbi->sb_eas; chk = sbi->sb_chk; chkdsk = sbi->sb_chkdsk;
- errs = sbi->sb_err; timeshift = sbi->sb_timeshift;
-
- if (!(o = parse_opts(data, &uid, &gid, &umask, &lowercase,
- &eas, &chk, &errs, &chkdsk, &timeshift))) {
- pr_err("bad mount options.\n");
- goto out_err;
- }
- if (o == 2) {
- hpfs_help();
- goto out_err;
- }
- if (timeshift != sbi->sb_timeshift) {
+
+ if (ctx->timeshift != sbi->sb_timeshift) {
pr_err("timeshift can't be changed using remount.\n");
goto out_err;
}
unmark_dirty(s);
- sbi->sb_uid = uid; sbi->sb_gid = gid;
- sbi->sb_mode = 0777 & ~umask;
- sbi->sb_lowercase = lowercase;
- sbi->sb_eas = eas; sbi->sb_chk = chk; sbi->sb_chkdsk = chkdsk;
- sbi->sb_err = errs; sbi->sb_timeshift = timeshift;
+ sbi->sb_uid = ctx->uid; sbi->sb_gid = ctx->gid;
+ sbi->sb_mode = 0777 & ~ctx->umask;
+ sbi->sb_lowercase = ctx->lowercase;
+ sbi->sb_eas = ctx->eas; sbi->sb_chk = ctx->chk;
+ sbi->sb_chkdsk = ctx->chkdsk;
+ sbi->sb_err = ctx->errs; sbi->sb_timeshift = ctx->timeshift;
- if (!(*flags & SB_RDONLY)) mark_dirty(s, 1);
+ if (!(fc->sb_flags & SB_RDONLY)) mark_dirty(s, 1);
hpfs_unlock(s);
return 0;
@@ -530,30 +498,24 @@ static const struct super_operations hpfs_sops =
.evict_inode = hpfs_evict_inode,
.put_super = hpfs_put_super,
.statfs = hpfs_statfs,
- .remount_fs = hpfs_remount_fs,
.show_options = hpfs_show_options,
};
-static int hpfs_fill_super(struct super_block *s, void *options, int silent)
+static int hpfs_fill_super(struct super_block *s, struct fs_context *fc)
{
+ struct hpfs_fc_context *ctx = fc->fs_private;
struct buffer_head *bh0, *bh1, *bh2;
struct hpfs_boot_block *bootblock;
struct hpfs_super_block *superblock;
struct hpfs_spare_block *spareblock;
struct hpfs_sb_info *sbi;
struct inode *root;
-
- kuid_t uid;
- kgid_t gid;
- umode_t umask;
- int lowercase, eas, chk, errs, chkdsk, timeshift;
+ int silent = fc->sb_flags & SB_SILENT;
dnode_secno root_dno;
struct hpfs_dirent *de = NULL;
struct quad_buffer_head qbh;
- int o;
-
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi) {
return -ENOMEM;
@@ -563,26 +525,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
mutex_init(&sbi->hpfs_mutex);
hpfs_lock(s);
- uid = current_uid();
- gid = current_gid();
- umask = current_umask();
- lowercase = 0;
- eas = 2;
- chk = 1;
- errs = 1;
- chkdsk = 1;
- timeshift = 0;
-
- if (!(o = parse_opts(options, &uid, &gid, &umask, &lowercase,
- &eas, &chk, &errs, &chkdsk, &timeshift))) {
- pr_err("bad mount options.\n");
- goto bail0;
- }
- if (o==2) {
- hpfs_help();
- goto bail0;
- }
-
/*sbi->sb_mounting = 1;*/
sb_set_blocksize(s, 512);
sbi->sb_fs_size = -1;
@@ -622,17 +564,17 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
sbi->sb_dirband_start = le32_to_cpu(superblock->dir_band_start);
sbi->sb_dirband_size = le32_to_cpu(superblock->n_dir_band);
sbi->sb_dmap = le32_to_cpu(superblock->dir_band_bitmap);
- sbi->sb_uid = uid;
- sbi->sb_gid = gid;
- sbi->sb_mode = 0777 & ~umask;
+ sbi->sb_uid = ctx->uid;
+ sbi->sb_gid = ctx->gid;
+ sbi->sb_mode = 0777 & ~ctx->umask;
sbi->sb_n_free = -1;
sbi->sb_n_free_dnodes = -1;
- sbi->sb_lowercase = lowercase;
- sbi->sb_eas = eas;
- sbi->sb_chk = chk;
- sbi->sb_chkdsk = chkdsk;
- sbi->sb_err = errs;
- sbi->sb_timeshift = timeshift;
+ sbi->sb_lowercase = ctx->lowercase;
+ sbi->sb_eas = ctx->eas;
+ sbi->sb_chk = ctx->chk;
+ sbi->sb_chkdsk = ctx->chkdsk;
+ sbi->sb_err = ctx->errs;
+ sbi->sb_timeshift = ctx->timeshift;
sbi->sb_was_error = 0;
sbi->sb_cp_table = NULL;
sbi->sb_c_bitmap = -1;
@@ -653,7 +595,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
/* Check for general fs errors*/
if (spareblock->dirty && !spareblock->old_wrote) {
- if (errs == 2) {
+ if (sbi->sb_err == 2) {
pr_err("Improperly stopped, not mounted\n");
goto bail4;
}
@@ -667,16 +609,16 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
}
if (le32_to_cpu(spareblock->n_dnode_spares) != le32_to_cpu(spareblock->n_dnode_spares_free)) {
- if (errs >= 2) {
+ if (sbi->sb_err >= 2) {
pr_err("Spare dnodes used, try chkdsk\n");
mark_dirty(s, 0);
goto bail4;
}
hpfs_error(s, "warning: spare dnodes used, try chkdsk");
- if (errs == 0)
+ if (sbi->sb_err == 0)
pr_err("Proceeding, but your filesystem could be corrupted if you delete files or directories\n");
}
- if (chk) {
+ if (sbi->sb_chk) {
unsigned a;
if (le32_to_cpu(superblock->dir_band_end) - le32_to_cpu(superblock->dir_band_start) + 1 != le32_to_cpu(superblock->n_dir_band) ||
le32_to_cpu(superblock->dir_band_end) < le32_to_cpu(superblock->dir_band_start) || le32_to_cpu(superblock->n_dir_band) > 0x4000) {
@@ -755,18 +697,70 @@ bail0:
return -EINVAL;
}
-static struct dentry *hpfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int hpfs_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, hpfs_fill_super);
+}
+
+static void hpfs_free_fc(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
+ kfree(fc->fs_private);
}
+static const struct fs_context_operations hpfs_fc_context_ops = {
+ .parse_param = hpfs_parse_param,
+ .get_tree = hpfs_get_tree,
+ .reconfigure = hpfs_reconfigure,
+ .free = hpfs_free_fc,
+};
+
+static int hpfs_init_fs_context(struct fs_context *fc)
+{
+ struct hpfs_fc_context *ctx;
+
+ ctx = kzalloc(sizeof(struct hpfs_fc_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ struct super_block *sb = fc->root->d_sb;
+ struct hpfs_sb_info *sbi = hpfs_sb(sb);
+
+ ctx->uid = sbi->sb_uid;
+ ctx->gid = sbi->sb_gid;
+ ctx->umask = 0777 & ~sbi->sb_mode;
+ ctx->lowercase = sbi->sb_lowercase;
+ ctx->eas = sbi->sb_eas;
+ ctx->chk = sbi->sb_chk;
+ ctx->chkdsk = sbi->sb_chkdsk;
+ ctx->errs = sbi->sb_err;
+ ctx->timeshift = sbi->sb_timeshift;
+
+ } else {
+ ctx->uid = current_uid();
+ ctx->gid = current_gid();
+ ctx->umask = current_umask();
+ ctx->lowercase = 0;
+ ctx->eas = 2;
+ ctx->chk = 1;
+ ctx->errs = 1;
+ ctx->chkdsk = 1;
+ ctx->timeshift = 0;
+ }
+
+ fc->fs_private = ctx;
+ fc->ops = &hpfs_fc_context_ops;
+
+ return 0;
+};
+
static struct file_system_type hpfs_fs_type = {
.owner = THIS_MODULE,
.name = "hpfs",
- .mount = hpfs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = hpfs_init_fs_context,
+ .parameters = hpfs_param_spec,
};
MODULE_ALIAS_FS("hpfs");
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 5cf327337e22..2dea122e5b93 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -39,6 +39,9 @@
#include <linux/uaccess.h>
#include <linux/sched/mm.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/hugetlbfs.h>
+
static const struct address_space_operations hugetlbfs_aops;
static const struct file_operations hugetlbfs_file_operations;
static const struct inode_operations hugetlbfs_dir_inode_operations;
@@ -687,6 +690,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
{
struct resv_map *resv_map;
+ trace_hugetlbfs_evict_inode(inode);
remove_inode_hugepages(inode, 0, LLONG_MAX);
/*
@@ -814,8 +818,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP;
- if (mode & FALLOC_FL_PUNCH_HOLE)
- return hugetlbfs_punch_hole(inode, offset, len);
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ error = hugetlbfs_punch_hole(inode, offset, len);
+ goto out_nolock;
+ }
/*
* Default preallocate case.
@@ -919,6 +925,9 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
inode_set_ctime_current(inode);
out:
inode_unlock(inode);
+
+out_nolock:
+ trace_hugetlbfs_fallocate(inode, mode, offset, len, error);
return error;
}
@@ -935,6 +944,8 @@ static int hugetlbfs_setattr(struct mnt_idmap *idmap,
if (error)
return error;
+ trace_hugetlbfs_setattr(inode, dentry, attr);
+
if (ia_valid & ATTR_SIZE) {
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
@@ -1033,6 +1044,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
break;
}
lockdep_annotate_inode_mutex_key(inode);
+ trace_hugetlbfs_alloc_inode(inode, dir, mode);
} else {
if (resv_map)
kref_put(&resv_map->refs, resv_map_release);
@@ -1272,6 +1284,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
static void hugetlbfs_free_inode(struct inode *inode)
{
+ trace_hugetlbfs_free_inode(inode);
kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode));
}
diff --git a/fs/inode.c b/fs/inode.c
index 8dabb224f941..b13b778257ae 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -21,7 +21,12 @@
#include <linux/list_lru.h>
#include <linux/iversion.h>
#include <linux/rw_hint.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
#include <trace/events/writeback.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/timestamp.h>
+
#include "internal.h"
/*
@@ -98,6 +103,70 @@ long get_nr_dirty_inodes(void)
return nr_dirty > 0 ? nr_dirty : 0;
}
+#ifdef CONFIG_DEBUG_FS
+static DEFINE_PER_CPU(long, mg_ctime_updates);
+static DEFINE_PER_CPU(long, mg_fine_stamps);
+static DEFINE_PER_CPU(long, mg_ctime_swaps);
+
+static unsigned long get_mg_ctime_updates(void)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for_each_possible_cpu(i)
+ sum += data_race(per_cpu(mg_ctime_updates, i));
+ return sum;
+}
+
+static unsigned long get_mg_fine_stamps(void)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for_each_possible_cpu(i)
+ sum += data_race(per_cpu(mg_fine_stamps, i));
+ return sum;
+}
+
+static unsigned long get_mg_ctime_swaps(void)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for_each_possible_cpu(i)
+ sum += data_race(per_cpu(mg_ctime_swaps, i));
+ return sum;
+}
+
+#define mgtime_counter_inc(__var) this_cpu_inc(__var)
+
+static int mgts_show(struct seq_file *s, void *p)
+{
+ unsigned long ctime_updates = get_mg_ctime_updates();
+ unsigned long ctime_swaps = get_mg_ctime_swaps();
+ unsigned long fine_stamps = get_mg_fine_stamps();
+ unsigned long floor_swaps = timekeeping_get_mg_floor_swaps();
+
+ seq_printf(s, "%lu %lu %lu %lu\n",
+ ctime_updates, ctime_swaps, fine_stamps, floor_swaps);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(mgts);
+
+static int __init mg_debugfs_init(void)
+{
+ debugfs_create_file("multigrain_timestamps", S_IFREG | S_IRUGO, NULL, NULL, &mgts_fops);
+ return 0;
+}
+late_initcall(mg_debugfs_init);
+
+#else /* ! CONFIG_DEBUG_FS */
+
+#define mgtime_counter_inc(__var) do { } while (0)
+
+#endif /* CONFIG_DEBUG_FS */
+
/*
* Handle nr_inode sysctl
*/
@@ -174,6 +243,8 @@ int inode_init_always_gfp(struct super_block *sb, struct inode *inode, gfp_t gfp
inode->i_opflags = 0;
if (sb->s_xattr)
inode->i_opflags |= IOP_XATTR;
+ if (sb->s_type->fs_flags & FS_MGTIME)
+ inode->i_opflags |= IOP_MGTIME;
i_uid_write(inode, 0);
i_gid_write(inode, 0);
atomic_set(&inode->i_writecount, 0);
@@ -748,7 +819,7 @@ static void evict(struct inode *inode)
* ___wait_var_event() either sees the bit cleared or
* waitqueue_active() check in wake_up_var() sees the waiter.
*/
- smp_mb();
+ smp_mb__after_spinlock();
inode_wake_up_bit(inode, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
spin_unlock(&inode->i_lock);
@@ -1241,16 +1312,15 @@ EXPORT_SYMBOL(unlock_two_nondirectories);
* @data: opaque data pointer to pass to @test and @set
*
* Search for the inode specified by @hashval and @data in the inode cache,
- * and if present it is return it with an increased reference count. This is
- * a variant of iget5_locked() for callers that don't want to fail on memory
- * allocation of inode.
+ * and if present return it with an increased reference count. This is a
+ * variant of iget5_locked() that doesn't allocate an inode.
*
- * If the inode is not in cache, insert the pre-allocated inode to cache and
+ * If the inode is not present in the cache, insert the pre-allocated inode and
* return it locked, hashed, and with the I_NEW flag set. The file system gets
* to fill it in before unlocking it via unlock_new_inode().
*
- * Note both @test and @set are called with the inode_hash_lock held, so can't
- * sleep.
+ * Note that both @test and @set are called with the inode_hash_lock held, so
+ * they can't sleep.
*/
struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
int (*test)(struct inode *, void *),
@@ -1314,16 +1384,16 @@ EXPORT_SYMBOL(inode_insert5);
* @data: opaque data pointer to pass to @test and @set
*
* Search for the inode specified by @hashval and @data in the inode cache,
- * and if present it is return it with an increased reference count. This is
- * a generalized version of iget_locked() for file systems where the inode
+ * and if present return it with an increased reference count. This is a
+ * generalized version of iget_locked() for file systems where the inode
* number is not sufficient for unique identification of an inode.
*
- * If the inode is not in cache, allocate a new inode and return it locked,
- * hashed, and with the I_NEW flag set. The file system gets to fill it in
- * before unlocking it via unlock_new_inode().
+ * If the inode is not present in the cache, allocate and insert a new inode
+ * and return it locked, hashed, and with the I_NEW flag set. The file system
+ * gets to fill it in before unlocking it via unlock_new_inode().
*
- * Note both @test and @set are called with the inode_hash_lock held, so can't
- * sleep.
+ * Note that both @test and @set are called with the inode_hash_lock held, so
+ * they can't sleep.
*/
struct inode *iget5_locked(struct super_block *sb, unsigned long hashval,
int (*test)(struct inode *, void *),
@@ -2211,19 +2281,58 @@ int file_remove_privs(struct file *file)
}
EXPORT_SYMBOL(file_remove_privs);
+/**
+ * current_time - Return FS time (possibly fine-grained)
+ * @inode: inode.
+ *
+ * Return the current time truncated to the time granularity supported by
+ * the fs, as suitable for a ctime/mtime change. If the ctime is flagged
+ * as having been QUERIED, get a fine-grained timestamp, but don't update
+ * the floor.
+ *
+ * For a multigrain inode, this is effectively an estimate of the timestamp
+ * that a file would receive. An actual update must go through
+ * inode_set_ctime_current().
+ */
+struct timespec64 current_time(struct inode *inode)
+{
+ struct timespec64 now;
+ u32 cns;
+
+ ktime_get_coarse_real_ts64_mg(&now);
+
+ if (!is_mgtime(inode))
+ goto out;
+
+ /* If nothing has queried it, then coarse time is fine */
+ cns = smp_load_acquire(&inode->i_ctime_nsec);
+ if (cns & I_CTIME_QUERIED) {
+ /*
+ * If there is no apparent change, then get a fine-grained
+ * timestamp.
+ */
+ if (now.tv_nsec == (cns & ~I_CTIME_QUERIED))
+ ktime_get_real_ts64(&now);
+ }
+out:
+ return timestamp_truncate(now, inode);
+}
+EXPORT_SYMBOL(current_time);
+
static int inode_needs_update_time(struct inode *inode)
{
+ struct timespec64 now, ts;
int sync_it = 0;
- struct timespec64 now = current_time(inode);
- struct timespec64 ts;
/* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return 0;
+ now = current_time(inode);
+
ts = inode_get_mtime(inode);
if (!timespec64_equal(&ts, &now))
- sync_it = S_MTIME;
+ sync_it |= S_MTIME;
ts = inode_get_ctime(inode);
if (!timespec64_equal(&ts, &now))
@@ -2600,6 +2709,16 @@ void inode_nohighmem(struct inode *inode)
}
EXPORT_SYMBOL(inode_nohighmem);
+struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts)
+{
+ trace_inode_set_ctime_to_ts(inode, &ts);
+ set_normalized_timespec64(&ts, ts.tv_sec, ts.tv_nsec);
+ inode->i_ctime_sec = ts.tv_sec;
+ inode->i_ctime_nsec = ts.tv_nsec;
+ return ts;
+}
+EXPORT_SYMBOL(inode_set_ctime_to_ts);
+
/**
* timestamp_truncate - Truncate timespec to a granularity
* @t: Timespec
@@ -2632,39 +2751,159 @@ struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode)
EXPORT_SYMBOL(timestamp_truncate);
/**
- * current_time - Return FS time
- * @inode: inode.
+ * inode_set_ctime_current - set the ctime to current_time
+ * @inode: inode
*
- * Return the current time truncated to the time granularity supported by
- * the fs.
+ * Set the inode's ctime to the current value for the inode. Returns the
+ * current value that was assigned. If this is not a multigrain inode, then we
+ * set it to the later of the coarse time and floor value.
+ *
+ * If it is multigrain, then we first see if the coarse-grained timestamp is
+ * distinct from what is already there. If so, then use that. Otherwise, get a
+ * fine-grained timestamp.
*
- * Note that inode and inode->sb cannot be NULL.
- * Otherwise, the function warns and returns time without truncation.
+ * After that, try to swap the new value into i_ctime_nsec. Accept the
+ * resulting ctime, regardless of the outcome of the swap. If it has
+ * already been replaced, then that timestamp is later than the earlier
+ * unacceptable one, and is thus acceptable.
*/
-struct timespec64 current_time(struct inode *inode)
+struct timespec64 inode_set_ctime_current(struct inode *inode)
{
struct timespec64 now;
+ u32 cns, cur;
- ktime_get_coarse_real_ts64(&now);
- return timestamp_truncate(now, inode);
+ ktime_get_coarse_real_ts64_mg(&now);
+ now = timestamp_truncate(now, inode);
+
+ /* Just return that if this is not a multigrain fs */
+ if (!is_mgtime(inode)) {
+ inode_set_ctime_to_ts(inode, now);
+ goto out;
+ }
+
+ /*
+ * A fine-grained time is only needed if someone has queried
+ * for timestamps, and the current coarse grained time isn't
+ * later than what's already there.
+ */
+ cns = smp_load_acquire(&inode->i_ctime_nsec);
+ if (cns & I_CTIME_QUERIED) {
+ struct timespec64 ctime = { .tv_sec = inode->i_ctime_sec,
+ .tv_nsec = cns & ~I_CTIME_QUERIED };
+
+ if (timespec64_compare(&now, &ctime) <= 0) {
+ ktime_get_real_ts64_mg(&now);
+ now = timestamp_truncate(now, inode);
+ mgtime_counter_inc(mg_fine_stamps);
+ }
+ }
+ mgtime_counter_inc(mg_ctime_updates);
+
+ /* No need to cmpxchg if it's exactly the same */
+ if (cns == now.tv_nsec && inode->i_ctime_sec == now.tv_sec) {
+ trace_ctime_xchg_skip(inode, &now);
+ goto out;
+ }
+ cur = cns;
+retry:
+ /* Try to swap the nsec value into place. */
+ if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now.tv_nsec)) {
+ /* If swap occurred, then we're (mostly) done */
+ inode->i_ctime_sec = now.tv_sec;
+ trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur);
+ mgtime_counter_inc(mg_ctime_swaps);
+ } else {
+ /*
+ * Was the change due to someone marking the old ctime QUERIED?
+ * If so then retry the swap. This can only happen once since
+ * the only way to clear I_CTIME_QUERIED is to stamp the inode
+ * with a new ctime.
+ */
+ if (!(cns & I_CTIME_QUERIED) && (cns | I_CTIME_QUERIED) == cur) {
+ cns = cur;
+ goto retry;
+ }
+ /* Otherwise, keep the existing ctime */
+ now.tv_sec = inode->i_ctime_sec;
+ now.tv_nsec = cur & ~I_CTIME_QUERIED;
+ }
+out:
+ return now;
}
-EXPORT_SYMBOL(current_time);
+EXPORT_SYMBOL(inode_set_ctime_current);
/**
- * inode_set_ctime_current - set the ctime to current_time
- * @inode: inode
+ * inode_set_ctime_deleg - try to update the ctime on a delegated inode
+ * @inode: inode to update
+ * @update: timespec64 to set the ctime
*
- * Set the inode->i_ctime to the current value for the inode. Returns
- * the current value that was assigned to i_ctime.
+ * Attempt to atomically update the ctime on behalf of a delegation holder.
+ *
+ * The nfs server can call back the holder of a delegation to get updated
+ * inode attributes, including the mtime. When updating the mtime, update
+ * the ctime to a value at least equal to that.
+ *
+ * This can race with concurrent updates to the inode, in which
+ * case the update is skipped.
+ *
+ * Note that this works even when multigrain timestamps are not enabled,
+ * so it is used in either case.
*/
-struct timespec64 inode_set_ctime_current(struct inode *inode)
+struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 update)
{
- struct timespec64 now = current_time(inode);
+ struct timespec64 now, cur_ts;
+ u32 cur, old;
- inode_set_ctime_to_ts(inode, now);
- return now;
+ /* pairs with try_cmpxchg below */
+ cur = smp_load_acquire(&inode->i_ctime_nsec);
+ cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
+ cur_ts.tv_sec = inode->i_ctime_sec;
+
+ /* If the update is older than the existing value, skip it. */
+ if (timespec64_compare(&update, &cur_ts) <= 0)
+ return cur_ts;
+
+ ktime_get_coarse_real_ts64_mg(&now);
+
+ /* Clamp the update to "now" if it's in the future */
+ if (timespec64_compare(&update, &now) > 0)
+ update = now;
+
+ update = timestamp_truncate(update, inode);
+
+ /* No need to update if the values are already the same */
+ if (timespec64_equal(&update, &cur_ts))
+ return cur_ts;
+
+ /*
+ * Try to swap the nsec value into place. If it fails, that means
+ * it raced with an update due to a write or similar activity. That
+ * stamp takes precedence, so just skip the update.
+ */
+retry:
+ old = cur;
+ if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) {
+ inode->i_ctime_sec = update.tv_sec;
+ mgtime_counter_inc(mg_ctime_swaps);
+ return update;
+ }
+
+ /*
+ * Was the change due to another task marking the old ctime QUERIED?
+ *
+ * If so, then retry the swap. This can only happen once since
+ * the only way to clear I_CTIME_QUERIED is to stamp the inode
+ * with a new ctime.
+ */
+ if (!(old & I_CTIME_QUERIED) && (cur == (old | I_CTIME_QUERIED)))
+ goto retry;
+
+ /* Otherwise, it was a new timestamp. */
+ cur_ts.tv_sec = inode->i_ctime_sec;
+ cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
+ return cur_ts;
}
-EXPORT_SYMBOL(inode_set_ctime_current);
+EXPORT_SYMBOL(inode_set_ctime_deleg);
/**
* in_group_or_capable - check whether caller is CAP_FSETID privileged
@@ -2672,7 +2911,7 @@ EXPORT_SYMBOL(inode_set_ctime_current);
* @inode: inode to check
* @vfsgid: the new/current vfsgid of @inode
*
- * Check wether @vfsgid is in the caller's group list or if the caller is
+ * Check whether @vfsgid is in the caller's group list or if the caller is
* privileged with CAP_FSETID over @inode. This can be used to determine
* whether the setgid bit can be kept or must be dropped.
*
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index aa587b2142e2..ce73d2a48c1e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1277,22 +1277,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
loff_t length = iomap_length(iter);
loff_t written = 0;
- /* Don't bother with blocks that are not shared to start with. */
- if (!(iomap->flags & IOMAP_F_SHARED))
- return length;
-
- /*
- * Don't bother with delalloc reservations, holes or unwritten extents.
- *
- * Note that we use srcmap directly instead of iomap_iter_srcmap as
- * unsharing requires providing a separate source map, and the presence
- * of one is a good indicator that unsharing is needed, unlike
- * IOMAP_F_SHARED which can be set for any data that goes into the COW
- * fork for XFS.
- */
- if (iter->srcmap.type == IOMAP_HOLE ||
- iter->srcmap.type == IOMAP_DELALLOC ||
- iter->srcmap.type == IOMAP_UNWRITTEN)
+ if (!iomap_want_unshare_iter(iter))
return length;
do {
@@ -1799,7 +1784,7 @@ new_ioend:
if (ifs)
atomic_add(len, &ifs->write_bytes_pending);
wpc->ioend->io_size += len;
- wbc_account_cgroup_owner(wbc, &folio->page, len);
+ wbc_account_cgroup_owner(wbc, folio, len);
return 0;
}
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index f637aa0706a3..b521eb15759e 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -271,7 +271,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
* clearing the WRITE_THROUGH flag in the dio request.
*/
static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
- const struct iomap *iomap, bool use_fua)
+ const struct iomap *iomap, bool use_fua, bool atomic)
{
blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
@@ -283,6 +283,8 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
opflags |= REQ_FUA;
else
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
+ if (atomic)
+ opflags |= REQ_ATOMIC;
return opflags;
}
@@ -293,7 +295,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
const struct iomap *iomap = &iter->iomap;
struct inode *inode = iter->inode;
unsigned int fs_block_size = i_blocksize(inode), pad;
- loff_t length = iomap_length(iter);
+ const loff_t length = iomap_length(iter);
+ bool atomic = iter->flags & IOMAP_ATOMIC;
loff_t pos = iter->pos;
blk_opf_t bio_opf;
struct bio *bio;
@@ -303,6 +306,9 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
size_t copied = 0;
size_t orig_count;
+ if (atomic && length != fs_block_size)
+ return -EINVAL;
+
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
!bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
return -EINVAL;
@@ -377,12 +383,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
goto out;
}
- /*
- * Set the operation flags early so that bio_iov_iter_get_pages
- * can set up the page vector appropriately for a ZONE_APPEND
- * operation.
- */
- bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua);
+ bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic);
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
do {
@@ -415,6 +416,17 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
}
n = bio->bi_iter.bi_size;
+ if (WARN_ON_ONCE(atomic && n != length)) {
+ /*
+ * This bio should have covered the complete length,
+ * which it doesn't, so error. We may need to zero out
+ * the tail (complete FS block), similar to when
+ * bio_iov_iter_get_pages() returns an error, above.
+ */
+ ret = -EINVAL;
+ bio_put(bio);
+ goto zero_tail;
+ }
if (dio->flags & IOMAP_DIO_WRITE) {
task_io_account_write(n);
} else {
@@ -598,6 +610,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (iocb->ki_flags & IOCB_NOWAIT)
iomi.flags |= IOMAP_NOWAIT;
+ if (iocb->ki_flags & IOCB_ATOMIC)
+ iomi.flags |= IOMAP_ATOMIC;
+
if (iov_iter_rw(iter) == READ) {
/* reads can always complete inline */
dio->flags |= IOMAP_DIO_INLINE_COMP;
@@ -659,7 +674,17 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (ret != -EAGAIN) {
trace_iomap_dio_invalidate_fail(inode, iomi.pos,
iomi.len);
- ret = -ENOTBLK;
+ if (iocb->ki_flags & IOCB_ATOMIC) {
+ /*
+ * folio invalidation failed, maybe
+ * this is transient, unlock and see if
+ * the caller tries again.
+ */
+ ret = -EAGAIN;
+ } else {
+ /* fall back to buffered write */
+ ret = -ENOTBLK;
+ }
}
goto out_free_dio;
}
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index 0a991c4ce87d..4118a42cdab0 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -98,7 +98,8 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
{ IOMAP_REPORT, "REPORT" }, \
{ IOMAP_FAULT, "FAULT" }, \
{ IOMAP_DIRECT, "DIRECT" }, \
- { IOMAP_NOWAIT, "NOWAIT" }
+ { IOMAP_NOWAIT, "NOWAIT" }, \
+ { IOMAP_ATOMIC, "ATOMIC" }
#define IOMAP_F_FLAGS_STRINGS \
{ IOMAP_F_NEW, "NEW" }, \
diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h
index 33ef13a0b110..8794281f8ffd 100644
--- a/fs/jfs/jfs_filsys.h
+++ b/fs/jfs/jfs_filsys.h
@@ -24,6 +24,7 @@
#define JFS_ERR_REMOUNT_RO 0x00000002 /* remount read-only */
#define JFS_ERR_CONTINUE 0x00000004 /* continue */
#define JFS_ERR_PANIC 0x00000008 /* panic */
+#define JFS_ERR_MASK (JFS_ERR_REMOUNT_RO|JFS_ERR_CONTINUE|JFS_ERR_PANIC)
/* Quota support */
#define JFS_USRQUOTA 0x00000010
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index e1be21ca5d6e..223d9ac59839 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -6,11 +6,11 @@
#include <linux/fs.h>
#include <linux/module.h>
-#include <linux/parser.h>
#include <linux/completion.h>
#include <linux/vfs.h>
#include <linux/quotaops.h>
-#include <linux/mount.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/moduleparam.h>
#include <linux/kthread.h>
#include <linux/posix_acl.h>
@@ -210,240 +210,195 @@ enum {
Opt_discard, Opt_nodiscard, Opt_discard_minblk
};
-static const match_table_t tokens = {
- {Opt_integrity, "integrity"},
- {Opt_nointegrity, "nointegrity"},
- {Opt_iocharset, "iocharset=%s"},
- {Opt_resize, "resize=%u"},
- {Opt_resize_nosize, "resize"},
- {Opt_errors, "errors=%s"},
- {Opt_ignore, "noquota"},
- {Opt_quota, "quota"},
- {Opt_usrquota, "usrquota"},
- {Opt_grpquota, "grpquota"},
- {Opt_uid, "uid=%u"},
- {Opt_gid, "gid=%u"},
- {Opt_umask, "umask=%u"},
- {Opt_discard, "discard"},
- {Opt_nodiscard, "nodiscard"},
- {Opt_discard_minblk, "discard=%u"},
- {Opt_err, NULL}
+static const struct constant_table jfs_param_errors[] = {
+ {"continue", JFS_ERR_CONTINUE},
+ {"remount-ro", JFS_ERR_REMOUNT_RO},
+ {"panic", JFS_ERR_PANIC},
+ {}
};
-static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
- int *flag)
-{
- void *nls_map = (void *)-1; /* -1: no change; NULL: none */
- char *p;
- struct jfs_sb_info *sbi = JFS_SBI(sb);
+static const struct fs_parameter_spec jfs_param_spec[] = {
+ fsparam_flag_no ("integrity", Opt_integrity),
+ fsparam_string ("iocharset", Opt_iocharset),
+ fsparam_u64 ("resize", Opt_resize),
+ fsparam_flag ("resize", Opt_resize_nosize),
+ fsparam_enum ("errors", Opt_errors, jfs_param_errors),
+ fsparam_flag ("quota", Opt_quota),
+ fsparam_flag ("noquota", Opt_ignore),
+ fsparam_flag ("usrquota", Opt_usrquota),
+ fsparam_flag ("grpquota", Opt_grpquota),
+ fsparam_uid ("uid", Opt_uid),
+ fsparam_gid ("gid", Opt_gid),
+ fsparam_u32oct ("umask", Opt_umask),
+ fsparam_flag ("discard", Opt_discard),
+ fsparam_u32 ("discard", Opt_discard_minblk),
+ fsparam_flag ("nodiscard", Opt_nodiscard),
+ {}
+};
- *newLVSize = 0;
-
- if (!options)
- return 1;
-
- while ((p = strsep(&options, ",")) != NULL) {
- substring_t args[MAX_OPT_ARGS];
- int token;
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_integrity:
- *flag &= ~JFS_NOINTEGRITY;
- break;
- case Opt_nointegrity:
- *flag |= JFS_NOINTEGRITY;
- break;
- case Opt_ignore:
- /* Silently ignore the quota options */
- /* Don't do anything ;-) */
- break;
- case Opt_iocharset:
- if (nls_map && nls_map != (void *) -1)
- unload_nls(nls_map);
- if (!strcmp(args[0].from, "none"))
- nls_map = NULL;
- else {
- nls_map = load_nls(args[0].from);
- if (!nls_map) {
- pr_err("JFS: charset not found\n");
- goto cleanup;
- }
- }
- break;
- case Opt_resize:
- {
- char *resize = args[0].from;
- int rc = kstrtoll(resize, 0, newLVSize);
+struct jfs_context {
+ int flag;
+ kuid_t uid;
+ kgid_t gid;
+ uint umask;
+ uint minblks_trim;
+ void *nls_map;
+ bool resize;
+ s64 newLVSize;
+};
- if (rc)
- goto cleanup;
- break;
- }
- case Opt_resize_nosize:
- {
- *newLVSize = sb_bdev_nr_blocks(sb);
- if (*newLVSize == 0)
- pr_err("JFS: Cannot determine volume size\n");
- break;
+static int jfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct jfs_context *ctx = fc->fs_private;
+ int reconfigure = (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE);
+ struct fs_parse_result result;
+ struct nls_table *nls_map;
+ int opt;
+
+ opt = fs_parse(fc, jfs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_integrity:
+ if (result.negated)
+ ctx->flag |= JFS_NOINTEGRITY;
+ else
+ ctx->flag &= ~JFS_NOINTEGRITY;
+ break;
+ case Opt_ignore:
+ /* Silently ignore the quota options */
+ /* Don't do anything ;-) */
+ break;
+ case Opt_iocharset:
+ if (ctx->nls_map && ctx->nls_map != (void *) -1) {
+ unload_nls(ctx->nls_map);
+ ctx->nls_map = NULL;
}
- case Opt_errors:
- {
- char *errors = args[0].from;
- if (!errors || !*errors)
- goto cleanup;
- if (!strcmp(errors, "continue")) {
- *flag &= ~JFS_ERR_REMOUNT_RO;
- *flag &= ~JFS_ERR_PANIC;
- *flag |= JFS_ERR_CONTINUE;
- } else if (!strcmp(errors, "remount-ro")) {
- *flag &= ~JFS_ERR_CONTINUE;
- *flag &= ~JFS_ERR_PANIC;
- *flag |= JFS_ERR_REMOUNT_RO;
- } else if (!strcmp(errors, "panic")) {
- *flag &= ~JFS_ERR_CONTINUE;
- *flag &= ~JFS_ERR_REMOUNT_RO;
- *flag |= JFS_ERR_PANIC;
- } else {
- pr_err("JFS: %s is an invalid error handler\n",
- errors);
- goto cleanup;
+ if (!strcmp(param->string, "none"))
+ ctx->nls_map = NULL;
+ else {
+ nls_map = load_nls(param->string);
+ if (!nls_map) {
+ pr_err("JFS: charset not found\n");
+ return -EINVAL;
}
- break;
+ ctx->nls_map = nls_map;
}
+ break;
+ case Opt_resize:
+ if (!reconfigure)
+ return -EINVAL;
+ ctx->resize = true;
+ ctx->newLVSize = result.uint_64;
+ break;
+ case Opt_resize_nosize:
+ if (!reconfigure)
+ return -EINVAL;
+ ctx->resize = true;
+ break;
+ case Opt_errors:
+ ctx->flag &= ~JFS_ERR_MASK;
+ ctx->flag |= result.uint_32;
+ break;
#ifdef CONFIG_QUOTA
- case Opt_quota:
- case Opt_usrquota:
- *flag |= JFS_USRQUOTA;
- break;
- case Opt_grpquota:
- *flag |= JFS_GRPQUOTA;
- break;
+ case Opt_quota:
+ case Opt_usrquota:
+ ctx->flag |= JFS_USRQUOTA;
+ break;
+ case Opt_grpquota:
+ ctx->flag |= JFS_GRPQUOTA;
+ break;
#else
- case Opt_usrquota:
- case Opt_grpquota:
- case Opt_quota:
- pr_err("JFS: quota operations not supported\n");
- break;
+ case Opt_usrquota:
+ case Opt_grpquota:
+ case Opt_quota:
+ pr_err("JFS: quota operations not supported\n");
+ break;
#endif
- case Opt_uid:
- {
- char *uid = args[0].from;
- uid_t val;
- int rc = kstrtouint(uid, 0, &val);
-
- if (rc)
- goto cleanup;
- sbi->uid = make_kuid(current_user_ns(), val);
- if (!uid_valid(sbi->uid))
- goto cleanup;
- break;
- }
-
- case Opt_gid:
- {
- char *gid = args[0].from;
- gid_t val;
- int rc = kstrtouint(gid, 0, &val);
-
- if (rc)
- goto cleanup;
- sbi->gid = make_kgid(current_user_ns(), val);
- if (!gid_valid(sbi->gid))
- goto cleanup;
- break;
+ case Opt_uid:
+ ctx->uid = result.uid;
+ break;
+
+ case Opt_gid:
+ ctx->gid = result.gid;
+ break;
+
+ case Opt_umask:
+ if (result.uint_32 & ~0777) {
+ pr_err("JFS: Invalid value of umask\n");
+ return -EINVAL;
}
+ ctx->umask = result.uint_32;
+ break;
- case Opt_umask:
- {
- char *umask = args[0].from;
- int rc = kstrtouint(umask, 8, &sbi->umask);
+ case Opt_discard:
+ /* if set to 1, even copying files will cause
+ * trimming :O
+ * -> user has more control over the online trimming
+ */
+ ctx->minblks_trim = 64;
+ ctx->flag |= JFS_DISCARD;
+ break;
- if (rc)
- goto cleanup;
- if (sbi->umask & ~0777) {
- pr_err("JFS: Invalid value of umask\n");
- goto cleanup;
- }
- break;
- }
+ case Opt_nodiscard:
+ ctx->flag &= ~JFS_DISCARD;
+ break;
- case Opt_discard:
- /* if set to 1, even copying files will cause
- * trimming :O
- * -> user has more control over the online trimming
- */
- sbi->minblks_trim = 64;
- if (bdev_max_discard_sectors(sb->s_bdev))
- *flag |= JFS_DISCARD;
- else
- pr_err("JFS: discard option not supported on device\n");
- break;
-
- case Opt_nodiscard:
- *flag &= ~JFS_DISCARD;
- break;
-
- case Opt_discard_minblk:
- {
- char *minblks_trim = args[0].from;
- int rc;
- if (bdev_max_discard_sectors(sb->s_bdev)) {
- *flag |= JFS_DISCARD;
- rc = kstrtouint(minblks_trim, 0,
- &sbi->minblks_trim);
- if (rc)
- goto cleanup;
- } else
- pr_err("JFS: discard option not supported on device\n");
- break;
- }
+ case Opt_discard_minblk:
+ ctx->minblks_trim = result.uint_32;
+ ctx->flag |= JFS_DISCARD;
+ break;
- default:
- printk("jfs: Unrecognized mount option \"%s\" or missing value\n",
- p);
- goto cleanup;
- }
- }
-
- if (nls_map != (void *) -1) {
- /* Discard old (if remount) */
- unload_nls(sbi->nls_tab);
- sbi->nls_tab = nls_map;
+ default:
+ return -EINVAL;
}
- return 1;
-cleanup:
- if (nls_map && nls_map != (void *) -1)
- unload_nls(nls_map);
return 0;
}
-static int jfs_remount(struct super_block *sb, int *flags, char *data)
+static int jfs_reconfigure(struct fs_context *fc)
{
- s64 newLVSize = 0;
+ struct jfs_context *ctx = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
+ int readonly = fc->sb_flags & SB_RDONLY;
int rc = 0;
- int flag = JFS_SBI(sb)->flag;
+ int flag = ctx->flag;
int ret;
sync_filesystem(sb);
- if (!parse_options(data, sb, &newLVSize, &flag))
- return -EINVAL;
- if (newLVSize) {
+ /* Transfer results of parsing to the sbi */
+ JFS_SBI(sb)->flag = ctx->flag;
+ JFS_SBI(sb)->uid = ctx->uid;
+ JFS_SBI(sb)->gid = ctx->gid;
+ JFS_SBI(sb)->umask = ctx->umask;
+ JFS_SBI(sb)->minblks_trim = ctx->minblks_trim;
+ if (ctx->nls_map != (void *) -1) {
+ unload_nls(JFS_SBI(sb)->nls_tab);
+ JFS_SBI(sb)->nls_tab = ctx->nls_map;
+ }
+ ctx->nls_map = NULL;
+
+ if (ctx->resize) {
if (sb_rdonly(sb)) {
pr_err("JFS: resize requires volume to be mounted read-write\n");
return -EROFS;
}
- rc = jfs_extendfs(sb, newLVSize, 0);
+
+ if (!ctx->newLVSize) {
+ ctx->newLVSize = sb_bdev_nr_blocks(sb);
+ if (ctx->newLVSize == 0)
+ pr_err("JFS: Cannot determine volume size\n");
+ }
+
+ rc = jfs_extendfs(sb, ctx->newLVSize, 0);
if (rc)
return rc;
}
- if (sb_rdonly(sb) && !(*flags & SB_RDONLY)) {
+ if (sb_rdonly(sb) && !readonly) {
/*
* Invalidate any previously read metadata. fsck may have
* changed the on-disk data since we mounted r/o
@@ -459,7 +414,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
dquot_resume(sb, -1);
return ret;
}
- if (!sb_rdonly(sb) && (*flags & SB_RDONLY)) {
+ if (!sb_rdonly(sb) && readonly) {
rc = dquot_suspend(sb, -1);
if (rc < 0)
return rc;
@@ -467,7 +422,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
JFS_SBI(sb)->flag = flag;
return rc;
}
- if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY))
+ if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) {
if (!sb_rdonly(sb)) {
rc = jfs_umount_rw(sb);
if (rc)
@@ -477,18 +432,20 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
ret = jfs_mount_rw(sb, 1);
return ret;
}
+ }
JFS_SBI(sb)->flag = flag;
return 0;
}
-static int jfs_fill_super(struct super_block *sb, void *data, int silent)
+static int jfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct jfs_context *ctx = fc->fs_private;
+ int silent = fc->sb_flags & SB_SILENT;
struct jfs_sb_info *sbi;
struct inode *inode;
int rc;
- s64 newLVSize = 0;
- int flag, ret = -EINVAL;
+ int ret = -EINVAL;
jfs_info("In jfs_read_super: s_flags=0x%lx", sb->s_flags);
@@ -501,24 +458,34 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_time_min = 0;
sb->s_time_max = U32_MAX;
sbi->sb = sb;
- sbi->uid = INVALID_UID;
- sbi->gid = INVALID_GID;
- sbi->umask = -1;
-
- /* initialize the mount flag and determine the default error handler */
- flag = JFS_ERR_REMOUNT_RO;
- if (!parse_options((char *) data, sb, &newLVSize, &flag))
- goto out_kfree;
- sbi->flag = flag;
+ /* Transfer results of parsing to the sbi */
+ sbi->flag = ctx->flag;
+ sbi->uid = ctx->uid;
+ sbi->gid = ctx->gid;
+ sbi->umask = ctx->umask;
+ if (ctx->nls_map != (void *) -1) {
+ unload_nls(sbi->nls_tab);
+ sbi->nls_tab = ctx->nls_map;
+ }
+ ctx->nls_map = NULL;
+
+ if (sbi->flag & JFS_DISCARD) {
+ if (!bdev_max_discard_sectors(sb->s_bdev)) {
+ pr_err("JFS: discard option not supported on device\n");
+ sbi->flag &= ~JFS_DISCARD;
+ } else {
+ sbi->minblks_trim = ctx->minblks_trim;
+ }
+ }
#ifdef CONFIG_JFS_POSIX_ACL
sb->s_flags |= SB_POSIXACL;
#endif
- if (newLVSize) {
+ if (ctx->resize) {
pr_err("resize option for remount only\n");
- goto out_kfree;
+ goto out_unload;
}
/*
@@ -608,7 +575,6 @@ out_mount_failed:
sbi->direct_inode = NULL;
out_unload:
unload_nls(sbi->nls_tab);
-out_kfree:
kfree(sbi);
return ret;
}
@@ -664,10 +630,9 @@ out:
return rc;
}
-static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int jfs_get_tree(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
+ return get_tree_bdev(fc, jfs_fill_super);
}
static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -886,7 +851,6 @@ static const struct super_operations jfs_super_operations = {
.freeze_fs = jfs_freeze,
.unfreeze_fs = jfs_unfreeze,
.statfs = jfs_statfs,
- .remount_fs = jfs_remount,
.show_options = jfs_show_options,
#ifdef CONFIG_QUOTA
.quota_read = jfs_quota_read,
@@ -902,12 +866,71 @@ static const struct export_operations jfs_export_operations = {
.get_parent = jfs_get_parent,
};
+static void jfs_init_options(struct fs_context *fc, struct jfs_context *ctx)
+{
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ struct super_block *sb = fc->root->d_sb;
+
+ /* Copy over current option values and mount flags */
+ ctx->uid = JFS_SBI(sb)->uid;
+ ctx->gid = JFS_SBI(sb)->gid;
+ ctx->umask = JFS_SBI(sb)->umask;
+ ctx->nls_map = (void *)-1;
+ ctx->minblks_trim = JFS_SBI(sb)->minblks_trim;
+ ctx->flag = JFS_SBI(sb)->flag;
+
+ } else {
+ /*
+ * Initialize the mount flag and determine the default
+ * error handler
+ */
+ ctx->flag = JFS_ERR_REMOUNT_RO;
+ ctx->uid = INVALID_UID;
+ ctx->gid = INVALID_GID;
+ ctx->umask = -1;
+ ctx->nls_map = (void *)-1;
+ }
+}
+
+static void jfs_free_fc(struct fs_context *fc)
+{
+ struct jfs_context *ctx = fc->fs_private;
+
+ if (ctx->nls_map != (void *) -1)
+ unload_nls(ctx->nls_map);
+ kfree(ctx);
+}
+
+static const struct fs_context_operations jfs_context_ops = {
+ .parse_param = jfs_parse_param,
+ .get_tree = jfs_get_tree,
+ .reconfigure = jfs_reconfigure,
+ .free = jfs_free_fc,
+};
+
+static int jfs_init_fs_context(struct fs_context *fc)
+{
+ struct jfs_context *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ jfs_init_options(fc, ctx);
+
+ fc->fs_private = ctx;
+ fc->ops = &jfs_context_ops;
+
+ return 0;
+}
+
static struct file_system_type jfs_fs_type = {
.owner = THIS_MODULE,
.name = "jfs",
- .mount = jfs_do_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
+ .init_fs_context = jfs_init_fs_context,
+ .parameters = jfs_param_spec,
};
MODULE_ALIAS_FS("jfs");
diff --git a/fs/libfs.c b/fs/libfs.c
index 46966fd8bcf9..a168ece5cc61 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -77,6 +77,10 @@ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned
return ERR_PTR(-ENAMETOOLONG);
if (!dentry->d_sb->s_d_op)
d_set_d_op(dentry, &simple_dentry_operations);
+
+ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir))
+ return NULL;
+
d_add(dentry, NULL);
return NULL;
}
@@ -1791,8 +1795,8 @@ bool is_empty_dir_inode(struct inode *inode)
*
* Return: 0 if names match, 1 if mismatch, or -ERRNO
*/
-static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
{
const struct dentry *parent;
const struct inode *dir;
@@ -1835,6 +1839,7 @@ static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr);
}
+EXPORT_SYMBOL(generic_ci_d_compare);
/**
* generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
@@ -1843,7 +1848,7 @@ static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
*
* Return: 0 if hash was successful or unchanged, and -EINVAL on error
*/
-static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
{
const struct inode *dir = READ_ONCE(dentry->d_inode);
struct super_block *sb = dentry->d_sb;
@@ -1858,6 +1863,7 @@ static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL(generic_ci_d_hash);
static const struct dentry_operations generic_ci_dentry_ops = {
.d_hash = generic_ci_d_hash,
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 1f2149db10f2..2359347c9fbd 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -30,7 +30,6 @@
#include <linux/sunrpc/svc_xprt.h>
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>
-#include <linux/exportfs.h>
#define NLMDBG_FACILITY NLMDBG_SVCLOCK
@@ -481,7 +480,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_host *host, struct nlm_lock *lock, int wait,
struct nlm_cookie *cookie, int reclaim)
{
- struct inode *inode = nlmsvc_file_inode(file);
+ struct inode *inode __maybe_unused = nlmsvc_file_inode(file);
struct nlm_block *block = NULL;
int error;
int mode;
@@ -496,7 +495,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
(long long)lock->fl.fl_end,
wait);
- if (!exportfs_lock_op_is_async(inode->i_sb->s_export_op)) {
+ if (!locks_can_async_lock(nlmsvc_file_file(file)->f_op)) {
async_block = wait;
wait = 0;
}
@@ -550,7 +549,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
* requests on the underlaying ->lock() implementation but
* only one nlm_block to being granted by lm_grant().
*/
- if (exportfs_lock_op_is_async(inode->i_sb->s_export_op) &&
+ if (locks_can_async_lock(nlmsvc_file_file(file)->f_op) &&
!list_empty(&block->b_list)) {
spin_unlock(&nlm_blocked_lock);
ret = nlm_lck_blocked;
diff --git a/fs/mpage.c b/fs/mpage.c
index b5b5ddf9d513..82aecf372743 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -606,7 +606,7 @@ alloc_new:
* the confused fail path above (OOM) will be very confused when
* it finds all bh marked clean (i.e. it will not write anything)
*/
- wbc_account_cgroup_owner(wbc, &folio->page, folio_size(folio));
+ wbc_account_cgroup_owner(wbc, folio, folio_size(folio));
length = first_unmapped << blkbits;
if (!bio_add_folio(bio, folio, length, 0)) {
bio = mpage_bio_submit_write(bio);
diff --git a/fs/namei.c b/fs/namei.c
index 4a4a22a08ac2..05a8d544fb35 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -326,6 +326,25 @@ static int check_acl(struct mnt_idmap *idmap,
return -EAGAIN;
}
+/*
+ * Very quick optimistic "we know we have no ACL's" check.
+ *
+ * Note that this is purely for ACL_TYPE_ACCESS, and purely
+ * for the "we have cached that there are no ACLs" case.
+ *
+ * If this returns true, we know there are no ACLs. But if
+ * it returns false, we might still not have ACLs (it could
+ * be the is_uncached_acl() case).
+ */
+static inline bool no_acl_inode(struct inode *inode)
+{
+#ifdef CONFIG_FS_POSIX_ACL
+ return likely(!READ_ONCE(inode->i_acl));
+#else
+ return true;
+#endif
+}
+
/**
* acl_permission_check - perform basic UNIX permission checking
* @idmap: idmap of the mount the inode was found from
@@ -348,6 +367,28 @@ static int acl_permission_check(struct mnt_idmap *idmap,
unsigned int mode = inode->i_mode;
vfsuid_t vfsuid;
+ /*
+ * Common cheap case: everybody has the requested
+ * rights, and there are no ACLs to check. No need
+ * to do any owner/group checks in that case.
+ *
+ * - 'mask&7' is the requested permission bit set
+ * - multiplying by 0111 spreads them out to all of ugo
+ * - '& ~mode' looks for missing inode permission bits
+ * - the '!' is for "no missing permissions"
+ *
+ * After that, we just need to check that there are no
+ * ACL's on the inode - do the 'IS_POSIXACL()' check last
+ * because it will dereference the ->i_sb pointer and we
+ * want to avoid that if at all possible.
+ */
+ if (!((mask & 7) * 0111 & ~mode)) {
+ if (no_acl_inode(inode))
+ return 0;
+ if (!IS_POSIXACL(inode))
+ return 0;
+ }
+
/* Are we the owner? If so, ACL's don't matter */
vfsuid = i_uid_into_vfsuid(idmap, inode);
if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) {
diff --git a/fs/namespace.c b/fs/namespace.c
index d26f5e6d2ca3..206fc54feeba 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3901,7 +3901,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
}
new_ns->ns.ops = &mntns_operations;
if (!anon)
- new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
+ new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
refcount_set(&new_ns->ns.count, 1);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
@@ -5006,6 +5006,40 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq)
return 0;
}
+static void statmount_fs_subtype(struct kstatmount *s, struct seq_file *seq)
+{
+ struct super_block *sb = s->mnt->mnt_sb;
+
+ if (sb->s_subtype)
+ seq_puts(seq, sb->s_subtype);
+}
+
+static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq)
+{
+ struct super_block *sb = s->mnt->mnt_sb;
+ struct mount *r = real_mount(s->mnt);
+
+ if (sb->s_op->show_devname) {
+ size_t start = seq->count;
+ int ret;
+
+ ret = sb->s_op->show_devname(seq, s->mnt->mnt_root);
+ if (ret)
+ return ret;
+
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
+
+ /* Unescape the result */
+ seq->buf[seq->count] = '\0';
+ seq->count = start;
+ seq_commit(seq, string_unescape_inplace(seq->buf + start, UNESCAPE_OCTAL));
+ } else if (r->mnt_devname) {
+ seq_puts(seq, r->mnt_devname);
+ }
+ return 0;
+}
+
static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
{
s->sm.mask |= STATMOUNT_MNT_NS_ID;
@@ -5040,35 +5074,134 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
return 0;
}
+static inline int statmount_opt_unescape(struct seq_file *seq, char *buf_start)
+{
+ char *buf_end, *opt_start, *opt_end;
+ int count = 0;
+
+ buf_end = seq->buf + seq->count;
+ *buf_end = '\0';
+ for (opt_start = buf_start + 1; opt_start < buf_end; opt_start = opt_end + 1) {
+ opt_end = strchrnul(opt_start, ',');
+ *opt_end = '\0';
+ buf_start += string_unescape(opt_start, buf_start, 0, UNESCAPE_OCTAL) + 1;
+ if (WARN_ON_ONCE(++count == INT_MAX))
+ return -EOVERFLOW;
+ }
+ seq->count = buf_start - 1 - seq->buf;
+ return count;
+}
+
+static int statmount_opt_array(struct kstatmount *s, struct seq_file *seq)
+{
+ struct vfsmount *mnt = s->mnt;
+ struct super_block *sb = mnt->mnt_sb;
+ size_t start = seq->count;
+ char *buf_start;
+ int err;
+
+ if (!sb->s_op->show_options)
+ return 0;
+
+ buf_start = seq->buf + start;
+ err = sb->s_op->show_options(seq, mnt->mnt_root);
+ if (err)
+ return err;
+
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
+
+ if (seq->count == start)
+ return 0;
+
+ err = statmount_opt_unescape(seq, buf_start);
+ if (err < 0)
+ return err;
+
+ s->sm.opt_num = err;
+ return 0;
+}
+
+static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq)
+{
+ struct vfsmount *mnt = s->mnt;
+ struct super_block *sb = mnt->mnt_sb;
+ size_t start = seq->count;
+ char *buf_start;
+ int err;
+
+ buf_start = seq->buf + start;
+
+ err = security_sb_show_options(seq, sb);
+ if (!err)
+ return err;
+
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
+
+ if (seq->count == start)
+ return 0;
+
+ err = statmount_opt_unescape(seq, buf_start);
+ if (err < 0)
+ return err;
+
+ s->sm.opt_sec_num = err;
+ return 0;
+}
+
static int statmount_string(struct kstatmount *s, u64 flag)
{
- int ret;
+ int ret = 0;
size_t kbufsize;
struct seq_file *seq = &s->seq;
struct statmount *sm = &s->sm;
+ u32 start = seq->count;
switch (flag) {
case STATMOUNT_FS_TYPE:
- sm->fs_type = seq->count;
+ sm->fs_type = start;
ret = statmount_fs_type(s, seq);
break;
case STATMOUNT_MNT_ROOT:
- sm->mnt_root = seq->count;
+ sm->mnt_root = start;
ret = statmount_mnt_root(s, seq);
break;
case STATMOUNT_MNT_POINT:
- sm->mnt_point = seq->count;
+ sm->mnt_point = start;
ret = statmount_mnt_point(s, seq);
break;
case STATMOUNT_MNT_OPTS:
- sm->mnt_opts = seq->count;
+ sm->mnt_opts = start;
ret = statmount_mnt_opts(s, seq);
break;
+ case STATMOUNT_OPT_ARRAY:
+ sm->opt_array = start;
+ ret = statmount_opt_array(s, seq);
+ break;
+ case STATMOUNT_OPT_SEC_ARRAY:
+ sm->opt_sec_array = start;
+ ret = statmount_opt_sec_array(s, seq);
+ break;
+ case STATMOUNT_FS_SUBTYPE:
+ sm->fs_subtype = start;
+ statmount_fs_subtype(s, seq);
+ break;
+ case STATMOUNT_SB_SOURCE:
+ sm->sb_source = start;
+ ret = statmount_sb_source(s, seq);
+ break;
default:
WARN_ON_ONCE(true);
return -EINVAL;
}
+ /*
+ * If nothing was emitted, return to avoid setting the flag
+ * and terminating the buffer.
+ */
+ if (seq->count == start)
+ return ret;
if (unlikely(check_add_overflow(sizeof(*sm), seq->count, &kbufsize)))
return -EOVERFLOW;
if (kbufsize >= s->bufsize)
@@ -5203,6 +5336,18 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
if (!err && s->mask & STATMOUNT_MNT_OPTS)
err = statmount_string(s, STATMOUNT_MNT_OPTS);
+ if (!err && s->mask & STATMOUNT_OPT_ARRAY)
+ err = statmount_string(s, STATMOUNT_OPT_ARRAY);
+
+ if (!err && s->mask & STATMOUNT_OPT_SEC_ARRAY)
+ err = statmount_string(s, STATMOUNT_OPT_SEC_ARRAY);
+
+ if (!err && s->mask & STATMOUNT_FS_SUBTYPE)
+ err = statmount_string(s, STATMOUNT_FS_SUBTYPE);
+
+ if (!err && s->mask & STATMOUNT_SB_SOURCE)
+ err = statmount_string(s, STATMOUNT_SB_SOURCE);
+
if (!err && s->mask & STATMOUNT_MNT_NS_ID)
statmount_mnt_ns_id(s, ns);
@@ -5224,7 +5369,9 @@ static inline bool retry_statmount(const long ret, size_t *seq_size)
}
#define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \
- STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS)
+ STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \
+ STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \
+ STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY)
static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
struct statmount __user *buf, size_t bufsize,
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index af46a598f4d7..7ac34550c403 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -627,7 +627,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
if (unlikely(always_fill)) {
if (pos - offset + len <= i_size)
return false; /* Page entirely before EOF */
- zero_user_segment(&folio->page, 0, plen);
+ folio_zero_segment(folio, 0, plen);
folio_mark_uptodate(folio);
return true;
}
@@ -646,7 +646,7 @@ static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len,
return false;
zero_out:
- zero_user_segments(&folio->page, 0, offset, offset + len, plen);
+ folio_zero_segments(folio, 0, offset, offset + len, plen);
return true;
}
@@ -713,7 +713,7 @@ retry:
if (folio_test_uptodate(folio))
goto have_folio;
- /* If the page is beyond the EOF, we want to clear it - unless it's
+ /* If the folio is beyond the EOF, we want to clear it - unless it's
* within the cache granule containing the EOF, in which case we need
* to preload the granule.
*/
@@ -773,7 +773,7 @@ error:
EXPORT_SYMBOL(netfs_write_begin);
/*
- * Preload the data into a page we're proposing to write into.
+ * Preload the data into a folio we're proposing to write into.
*/
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len)
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index b3910dfcb56d..b4826360a411 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -83,13 +83,13 @@ static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
* netfs_perform_write - Copy data into the pagecache.
* @iocb: The operation parameters
* @iter: The source buffer
- * @netfs_group: Grouping for dirty pages (eg. ceph snaps).
+ * @netfs_group: Grouping for dirty folios (eg. ceph snaps).
*
- * Copy data into pagecache pages attached to the inode specified by @iocb.
+ * Copy data into pagecache folios attached to the inode specified by @iocb.
* The caller must hold appropriate inode locks.
*
- * Dirty pages are tagged with a netfs_folio struct if they're not up to date
- * to indicate the range modified. Dirty pages may also be tagged with a
+ * Dirty folios are tagged with a netfs_folio struct if they're not up to date
+ * to indicate the range modified. Dirty folios may also be tagged with a
* netfs-specific grouping such that data from an old group gets flushed before
* a new one is started.
*/
@@ -223,11 +223,11 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
* we try to read it.
*/
if (fpos >= ctx->zero_point) {
- zero_user_segment(&folio->page, 0, offset);
+ folio_zero_segment(folio, 0, offset);
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
if (unlikely(copied == 0))
goto copy_failed;
- zero_user_segment(&folio->page, offset + copied, flen);
+ folio_zero_segment(folio, offset + copied, flen);
__netfs_set_group(folio, netfs_group);
folio_mark_uptodate(folio);
trace_netfs_folio(folio, netfs_modify_and_clear);
@@ -407,7 +407,7 @@ EXPORT_SYMBOL(netfs_perform_write);
* netfs_buffered_write_iter_locked - write data to a file
* @iocb: IO state structure (file, offset, etc.)
* @from: iov_iter with data to write
- * @netfs_group: Grouping for dirty pages (eg. ceph snaps).
+ * @netfs_group: Grouping for dirty folios (eg. ceph snaps).
*
* This function does all the work needed for actually writing data to a
* file. It does all basic checks, removes SUID from the file, updates
@@ -491,7 +491,9 @@ EXPORT_SYMBOL(netfs_file_write_iter);
/*
* Notification that a previously read-only page is about to become writable.
- * Note that the caller indicates a single page of a multipage folio.
+ * The caller indicates the precise page that needs to be written to, but
+ * we only track group on a per-folio basis, so we block more often than
+ * we might otherwise.
*/
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group)
{
@@ -501,7 +503,7 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
struct address_space *mapping = file->f_mapping;
struct inode *inode = file_inode(file);
struct netfs_inode *ictx = netfs_inode(inode);
- vm_fault_t ret = VM_FAULT_RETRY;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
int err;
_enter("%lx", folio->index);
@@ -510,21 +512,15 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
if (folio_lock_killable(folio) < 0)
goto out;
- if (folio->mapping != mapping) {
- folio_unlock(folio);
- ret = VM_FAULT_NOPAGE;
- goto out;
- }
-
- if (folio_wait_writeback_killable(folio)) {
- ret = VM_FAULT_LOCKED;
- goto out;
- }
+ if (folio->mapping != mapping)
+ goto unlock;
+ if (folio_wait_writeback_killable(folio) < 0)
+ goto unlock;
/* Can we see a streaming write here? */
if (WARN_ON(!folio_test_uptodate(folio))) {
- ret = VM_FAULT_SIGBUS | VM_FAULT_LOCKED;
- goto out;
+ ret = VM_FAULT_SIGBUS;
+ goto unlock;
}
group = netfs_folio_group(folio);
@@ -559,5 +555,8 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
out:
sb_end_pagefault(inode->i_sb);
return ret;
+unlock:
+ folio_unlock(folio);
+ goto out;
}
EXPORT_SYMBOL(netfs_page_mkwrite);
diff --git a/fs/netfs/fscache_volume.c b/fs/netfs/fscache_volume.c
index cb75c07b5281..ced14ac78cc1 100644
--- a/fs/netfs/fscache_volume.c
+++ b/fs/netfs/fscache_volume.c
@@ -322,8 +322,7 @@ maybe_wait:
}
return;
no_wait:
- clear_bit_unlock(FSCACHE_VOLUME_CREATING, &volume->flags);
- wake_up_bit(&volume->flags, FSCACHE_VOLUME_CREATING);
+ clear_and_wake_up_bit(FSCACHE_VOLUME_CREATING, &volume->flags);
}
/*
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 114282398716..03ecc7765615 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -181,8 +181,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
#if IS_ENABLED(CONFIG_NFS_LOCALIO)
seqlock_init(&clp->cl_boot_lock);
ktime_get_real_ts64(&clp->cl_nfssvc_boot);
- clp->cl_uuid.net = NULL;
- clp->cl_uuid.dom = NULL;
+ nfs_uuid_init(&clp->cl_uuid);
spin_lock_init(&clp->cl_localio_lock);
#endif /* CONFIG_NFS_LOCALIO */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 542c7d97b235..596f35170137 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -205,12 +205,15 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
nfs_fscache_invalidate(inode, 0);
flags &= ~NFS_INO_REVAL_FORCED;
- nfsi->cache_validity |= flags;
+ flags |= nfsi->cache_validity;
+ if (inode->i_mapping->nrpages == 0)
+ flags &= ~NFS_INO_INVALID_DATA;
- if (inode->i_mapping->nrpages == 0) {
- nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
- nfs_ooo_clear(nfsi);
- } else if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+ /* pairs with nfs_clear_invalid_mapping()'s smp_load_acquire() */
+ smp_store_release(&nfsi->cache_validity, flags);
+
+ if (inode->i_mapping->nrpages == 0 ||
+ nfsi->cache_validity & NFS_INO_INVALID_DATA) {
nfs_ooo_clear(nfsi);
}
trace_nfs_set_cache_invalid(inode, 0);
@@ -628,23 +631,35 @@ nfs_fattr_fixup_delegated(struct inode *inode, struct nfs_fattr *fattr)
}
}
+static void nfs_update_timestamps(struct inode *inode, unsigned int ia_valid)
+{
+ enum file_time_flags time_flags = 0;
+ unsigned int cache_flags = 0;
+
+ if (ia_valid & ATTR_MTIME) {
+ time_flags |= S_MTIME | S_CTIME;
+ cache_flags |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
+ }
+ if (ia_valid & ATTR_ATIME) {
+ time_flags |= S_ATIME;
+ cache_flags |= NFS_INO_INVALID_ATIME;
+ }
+ inode_update_timestamps(inode, time_flags);
+ NFS_I(inode)->cache_validity &= ~cache_flags;
+}
+
void nfs_update_delegated_atime(struct inode *inode)
{
spin_lock(&inode->i_lock);
- if (nfs_have_delegated_atime(inode)) {
- inode_update_timestamps(inode, S_ATIME);
- NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ATIME;
- }
+ if (nfs_have_delegated_atime(inode))
+ nfs_update_timestamps(inode, ATTR_ATIME);
spin_unlock(&inode->i_lock);
}
void nfs_update_delegated_mtime_locked(struct inode *inode)
{
- if (nfs_have_delegated_mtime(inode)) {
- inode_update_timestamps(inode, S_CTIME | S_MTIME);
- NFS_I(inode)->cache_validity &= ~(NFS_INO_INVALID_CTIME |
- NFS_INO_INVALID_MTIME);
- }
+ if (nfs_have_delegated_mtime(inode))
+ nfs_update_timestamps(inode, ATTR_MTIME);
}
void nfs_update_delegated_mtime(struct inode *inode)
@@ -682,15 +697,16 @@ nfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
attr->ia_valid &= ~ATTR_SIZE;
}
- if (nfs_have_delegated_mtime(inode)) {
- if (attr->ia_valid & ATTR_MTIME) {
- nfs_update_delegated_mtime(inode);
- attr->ia_valid &= ~ATTR_MTIME;
- }
- if (attr->ia_valid & ATTR_ATIME) {
- nfs_update_delegated_atime(inode);
- attr->ia_valid &= ~ATTR_ATIME;
- }
+ if (nfs_have_delegated_mtime(inode) && attr->ia_valid & ATTR_MTIME) {
+ spin_lock(&inode->i_lock);
+ nfs_update_timestamps(inode, attr->ia_valid);
+ spin_unlock(&inode->i_lock);
+ attr->ia_valid &= ~(ATTR_MTIME | ATTR_ATIME);
+ } else if (nfs_have_delegated_atime(inode) &&
+ attr->ia_valid & ATTR_ATIME &&
+ !(attr->ia_valid & ATTR_MTIME)) {
+ nfs_update_delegated_atime(inode);
+ attr->ia_valid &= ~ATTR_ATIME;
}
/* Optimization: if the end result is no change, don't RPC */
@@ -1408,6 +1424,13 @@ int nfs_clear_invalid_mapping(struct address_space *mapping)
TASK_KILLABLE|TASK_FREEZABLE_UNSAFE);
if (ret)
goto out;
+ smp_rmb(); /* pairs with smp_wmb() below */
+ if (test_bit(NFS_INO_INVALIDATING, bitlock))
+ continue;
+ /* pairs with nfs_set_cache_invalid()'s smp_store_release() */
+ if (!(smp_load_acquire(&nfsi->cache_validity) & NFS_INO_INVALID_DATA))
+ goto out;
+ /* Slow-path that double-checks with spinlock held */
spin_lock(&inode->i_lock);
if (test_bit(NFS_INO_INVALIDATING, bitlock)) {
spin_unlock(&inode->i_lock);
@@ -1633,6 +1656,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
fattr->gencount = nfs_inc_attr_generation_counter();
fattr->owner_name = NULL;
fattr->group_name = NULL;
+ fattr->mdsthreshold = NULL;
}
EXPORT_SYMBOL_GPL(nfs_fattr_init);
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index d0aa680ec816..8f0ce82a677e 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -205,7 +205,8 @@ void nfs_local_probe(struct nfs_client *clp)
nfs_local_disable(clp);
}
- nfs_uuid_begin(&clp->cl_uuid);
+ if (!nfs_uuid_begin(&clp->cl_uuid))
+ return;
if (nfs_server_uuid_is_local(clp))
nfs_local_enable(clp);
nfs_uuid_end(&clp->cl_uuid);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cd2fbde2e6d7..9d40319e063d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3452,6 +3452,10 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
adjust_flags |= NFS_INO_INVALID_MODE;
if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
adjust_flags |= NFS_INO_INVALID_OTHER;
+ if (sattr->ia_valid & ATTR_ATIME)
+ adjust_flags |= NFS_INO_INVALID_ATIME;
+ if (sattr->ia_valid & ATTR_MTIME)
+ adjust_flags |= NFS_INO_INVALID_MTIME;
do {
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, fattr->label),
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 9723b6c53397..ae5c5e39afa0 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -885,7 +885,15 @@ static int nfs_request_mount(struct fs_context *fc,
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ if ((request.protocol == XPRT_TRANSPORT_UDP) ==
+ !(ctx->flags & NFS_MOUNT_TCP))
+ /*
+ * NFS protocol and mount protocol are both UDP or neither UDP
+ * so timeouts are compatible. Use NFS timeouts for MOUNT
+ */
+ status = nfs_mount(&request, ctx->timeo, ctx->retrans);
+ else
+ status = nfs_mount(&request, NFS_UNSPEC_TIMEO, NFS_UNSPEC_RETRANS);
if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status);
diff --git a/fs/nfs_common/nfslocalio.c b/fs/nfs_common/nfslocalio.c
index 5c8ce5066c16..09404d142d1a 100644
--- a/fs/nfs_common/nfslocalio.c
+++ b/fs/nfs_common/nfslocalio.c
@@ -5,7 +5,7 @@
*/
#include <linux/module.h>
-#include <linux/rculist.h>
+#include <linux/list.h>
#include <linux/nfslocalio.h>
#include <net/netns/generic.h>
@@ -20,15 +20,27 @@ static DEFINE_SPINLOCK(nfs_uuid_lock);
*/
static LIST_HEAD(nfs_uuids);
-void nfs_uuid_begin(nfs_uuid_t *nfs_uuid)
+void nfs_uuid_init(nfs_uuid_t *nfs_uuid)
{
nfs_uuid->net = NULL;
nfs_uuid->dom = NULL;
- uuid_gen(&nfs_uuid->uuid);
+ INIT_LIST_HEAD(&nfs_uuid->list);
+}
+EXPORT_SYMBOL_GPL(nfs_uuid_init);
+bool nfs_uuid_begin(nfs_uuid_t *nfs_uuid)
+{
spin_lock(&nfs_uuid_lock);
- list_add_tail_rcu(&nfs_uuid->list, &nfs_uuids);
+ /* Is this nfs_uuid already in use? */
+ if (!list_empty(&nfs_uuid->list)) {
+ spin_unlock(&nfs_uuid_lock);
+ return false;
+ }
+ uuid_gen(&nfs_uuid->uuid);
+ list_add_tail(&nfs_uuid->list, &nfs_uuids);
spin_unlock(&nfs_uuid_lock);
+
+ return true;
}
EXPORT_SYMBOL_GPL(nfs_uuid_begin);
@@ -36,7 +48,8 @@ void nfs_uuid_end(nfs_uuid_t *nfs_uuid)
{
if (nfs_uuid->net == NULL) {
spin_lock(&nfs_uuid_lock);
- list_del_init(&nfs_uuid->list);
+ if (nfs_uuid->net == NULL)
+ list_del_init(&nfs_uuid->list);
spin_unlock(&nfs_uuid_lock);
}
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b5a6bf4f459f..d32f2dfd148f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1841,14 +1841,12 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!async_copy)
goto out_err;
async_copy->cp_nn = nn;
+ INIT_LIST_HEAD(&async_copy->copies);
+ refcount_set(&async_copy->refcount, 1);
/* Arbitrary cap on number of pending async copy operations */
if (atomic_inc_return(&nn->pending_async_copies) >
- (int)rqstp->rq_pool->sp_nrthreads) {
- atomic_dec(&nn->pending_async_copies);
+ (int)rqstp->rq_pool->sp_nrthreads)
goto out_err;
- }
- INIT_LIST_HEAD(&async_copy->copies);
- refcount_set(&async_copy->refcount, 1);
async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
if (!async_copy->cp_src)
goto out_err;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 551d2958ec29..d80406f8b568 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -8001,9 +8001,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
- if (nfsd4_has_session(cstate) ||
- exportfs_lock_op_is_async(sb->s_export_op))
- flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
spin_lock(&fp->fi_lock);
@@ -8014,9 +8011,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
- if (nfsd4_has_session(cstate) ||
- exportfs_lock_op_is_async(sb->s_export_op))
- flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
spin_lock(&fp->fi_lock);
@@ -8036,15 +8030,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
}
- /*
- * Most filesystems with their own ->lock operations will block
- * the nfsd thread waiting to acquire the lock. That leads to
- * deadlocks (we don't want every nfsd thread tied up waiting
- * for file locks), so don't attempt blocking lock notifications
- * on those filesystems:
- */
- if (!exportfs_lock_op_is_async(sb->s_export_op))
- flags &= ~FL_SLEEP;
+ if (lock->lk_type & (NFS4_READW_LT | NFS4_WRITEW_LT) &&
+ nfsd4_has_session(cstate) &&
+ locks_can_async_lock(nf->nf_file->f_op))
+ flags |= FL_SLEEP;
nbl = find_or_allocate_block(lock_sop, &fp->fi_fhandle, nn);
if (!nbl) {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 22325b590e17..d6d4f2a0e898 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -903,11 +903,6 @@ __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
goto out;
}
- if (may_flags & NFSD_MAY_64BIT_COOKIE)
- file->f_mode |= FMODE_64BITHASH;
- else
- file->f_mode |= FMODE_32BITHASH;
-
*filp = file;
out:
return host_err;
@@ -2174,13 +2169,15 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
loff_t offset = *offsetp;
int may_flags = NFSD_MAY_READ;
- if (fhp->fh_64bit_cookies)
- may_flags |= NFSD_MAY_64BIT_COOKIE;
-
err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
if (err)
goto out;
+ if (fhp->fh_64bit_cookies)
+ file->f_mode |= FMODE_64BITHASH;
+ else
+ file->f_mode |= FMODE_32BITHASH;
+
offset = vfs_llseek(file, offset, SEEK_SET);
if (offset < 0) {
err = nfserrno((int)offset);
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 57b4af5ad646..501ad7be5174 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -68,7 +68,6 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
goto failed;
}
memset(bh->b_data, 0, i_blocksize(inode));
- bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
set_buffer_uptodate(bh);
@@ -133,7 +132,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr,
goto found;
}
set_buffer_mapped(bh);
- bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = pblocknr; /* set block address for read */
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 1c9ae36a03ab..ace22253fed0 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -83,10 +83,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
goto out;
}
- if (!buffer_mapped(bh)) {
- bh->b_bdev = inode->i_sb->s_bdev;
+ if (!buffer_mapped(bh))
set_buffer_mapped(bh);
- }
bh->b_blocknr = pbn;
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index ceb7dc0b5bad..2db6350b5ac2 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -89,7 +89,6 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
if (buffer_uptodate(bh))
goto failed_bh;
- bh->b_bdev = sb->s_bdev;
err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
if (likely(!err)) {
get_bh(bh);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 4905063790c5..9b108052d9f7 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -157,6 +157,9 @@ static int nilfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
/* slow symlink */
inode->i_op = &nilfs_symlink_inode_operations;
inode_nohighmem(inode);
+ mapping_set_gfp_mask(inode->i_mapping,
+ mapping_gfp_constraint(inode->i_mapping,
+ ~__GFP_FS));
inode->i_mapping->a_ops = &nilfs_aops;
err = page_symlink(inode, symname, l);
if (err)
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 5436eb0424bd..9a849397c768 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -39,7 +39,6 @@ static struct buffer_head *__nilfs_get_folio_block(struct folio *folio,
first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
bh = get_nth_bh(bh, block - first_block);
- touch_buffer(bh);
wait_on_buffer(bh);
return bh;
}
@@ -64,6 +63,7 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
folio_put(folio);
return NULL;
}
+ bh->b_bdev = inode->i_sb->s_bdev;
return bh;
}
@@ -99,16 +99,16 @@ void nilfs_forget_buffer(struct buffer_head *bh)
*/
void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
{
- void *kaddr0, *kaddr1;
+ void *saddr, *daddr;
unsigned long bits;
- struct page *spage = sbh->b_page, *dpage = dbh->b_page;
+ struct folio *sfolio = sbh->b_folio, *dfolio = dbh->b_folio;
struct buffer_head *bh;
- kaddr0 = kmap_local_page(spage);
- kaddr1 = kmap_local_page(dpage);
- memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size);
- kunmap_local(kaddr1);
- kunmap_local(kaddr0);
+ saddr = kmap_local_folio(sfolio, bh_offset(sbh));
+ daddr = kmap_local_folio(dfolio, bh_offset(dbh));
+ memcpy(daddr, saddr, sbh->b_size);
+ kunmap_local(daddr);
+ kunmap_local(saddr);
dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS;
dbh->b_blocknr = sbh->b_blocknr;
@@ -122,13 +122,13 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh)
unlock_buffer(bh);
}
if (bits & BIT(BH_Uptodate))
- SetPageUptodate(dpage);
+ folio_mark_uptodate(dfolio);
else
- ClearPageUptodate(dpage);
+ folio_clear_uptodate(dfolio);
if (bits & BIT(BH_Mapped))
- SetPageMappedToDisk(dpage);
+ folio_set_mappedtodisk(dfolio);
else
- ClearPageMappedToDisk(dpage);
+ folio_clear_mappedtodisk(dfolio);
}
/**
@@ -401,6 +401,7 @@ void nilfs_clear_folio_dirty(struct folio *folio)
folio_clear_uptodate(folio);
folio_clear_mappedtodisk(folio);
+ folio_clear_checked(folio);
head = folio_buffers(folio);
if (head) {
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index d5dbef7f5c95..6004dfdfdf0f 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -16,7 +16,6 @@
#include <linux/security.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
-#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
static int dir_notify_enable __read_mostly = 1;
@@ -347,9 +346,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
new_fsn_mark = NULL;
}
- rcu_read_lock();
- f = lookup_fdget_rcu(fd);
- rcu_read_unlock();
+ f = fget_raw(fd);
/* if (f != filp) means that we lost a race and another task/thread
* actually closed the fd we are still playing with before we grabbed
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 224bccaab4cc..24c7c5df4998 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/fanotify.h>
-#include <linux/fdtable.h>
#include <linux/fsnotify_backend.h>
#include <linux/init.h>
#include <linux/jiffies.h>
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9644bc72e457..61b83039771e 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/fanotify.h>
#include <linux/fcntl.h>
-#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 96b684763b39..b95724b767e1 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -280,5 +280,4 @@ const struct export_operations ocfs2_export_ops = {
.fh_to_dentry = ocfs2_fh_to_dentry,
.fh_to_parent = ocfs2_fh_to_parent,
.get_parent = ocfs2_get_parent,
- .flags = EXPORT_OP_ASYNC_LOCK,
};
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 58887456e3c5..4fa6c840d20b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1787,6 +1787,14 @@ int ocfs2_remove_inode_range(struct inode *inode,
return 0;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
+
+ if (byte_start > id_count || byte_start + byte_len > id_count) {
+ ret = -EINVAL;
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
byte_start + byte_len, 0);
if (ret) {
@@ -2804,6 +2812,7 @@ const struct file_operations ocfs2_fops = {
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
.remap_file_range = ocfs2_remap_file_range,
+ .fop_flags = FOP_ASYNC_LOCK,
};
WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
@@ -2820,6 +2829,7 @@ const struct file_operations ocfs2_dops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
+ .fop_flags = FOP_ASYNC_LOCK,
};
/*
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index c4a4016d3866..b0733c08ed13 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -574,6 +574,8 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out_free_group_bh:
+ if (ret < 0)
+ ocfs2_remove_from_cache(INODE_CACHE(inode), group_bh);
brelse(group_bh);
out_unlock:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 3d404624bb96..c79b4291777f 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -2319,6 +2319,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
struct ocfs2_blockcheck_stats *stats)
{
int status = -EAGAIN;
+ u32 blksz_bits;
if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
@@ -2333,11 +2334,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
goto out;
}
status = -EINVAL;
- if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
+ /* Acceptable block sizes are 512 bytes, 1K, 2K and 4K. */
+ blksz_bits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
+ if (blksz_bits < 9 || blksz_bits > 12) {
mlog(ML_ERROR, "found superblock with incorrect block "
- "size: found %u, should be %u\n",
- 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
- blksz);
+ "size bits: found %u, should be 9, 10, 11, or 12\n",
+ blksz_bits);
+ } else if ((1 << le32_to_cpu(blksz_bits)) != blksz) {
+ mlog(ML_ERROR, "found superblock with incorrect block "
+ "size: found %u, should be %u\n", 1 << blksz_bits, blksz);
} else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
OCFS2_MAJOR_REV_LEVEL ||
le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index dd0a05365e79..73a6f6fd8a8e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -2036,8 +2036,7 @@ static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
rc = 0;
ocfs2_xa_cleanup_value_truncate(loc, "removing",
orig_clusters);
- if (rc)
- goto out;
+ goto out;
}
}
diff --git a/fs/open.c b/fs/open.c
index 5da4df2f9b18..446ab8ef04af 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1576,23 +1576,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
return retval;
}
-/**
- * sys_close_range() - Close all file descriptors in a given range.
- *
- * @fd: starting file descriptor to close
- * @max_fd: last file descriptor to close
- * @flags: reserved for future extensions
- *
- * This closes a range of file descriptors. All file descriptors
- * from @fd up to and including @max_fd are closed.
- * Currently, errors to close a given file descriptor are ignored.
- */
-SYSCALL_DEFINE3(close_range, unsigned int, fd, unsigned int, max_fd,
- unsigned int, flags)
-{
- return __close_range(fd, max_fd, flags);
-}
-
/*
* This routine simulates a hangup on the tty, to arrange that users
* are given clean terminals at login time.
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 2ed6ad641a20..ee2cbd044ce6 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -16,7 +16,6 @@
#include <linux/sched/signal.h>
#include <linux/cred.h>
#include <linux/namei.h>
-#include <linux/fdtable.h>
#include <linux/ratelimit.h>
#include <linux/exportfs.h>
#include "overlayfs.h"
diff --git a/fs/overlayfs/params.c b/fs/overlayfs/params.c
index e42546c6c5df..1115c22deca0 100644
--- a/fs/overlayfs/params.c
+++ b/fs/overlayfs/params.c
@@ -141,10 +141,10 @@ static int ovl_verity_mode_def(void)
const struct fs_parameter_spec ovl_parameter_spec[] = {
fsparam_string_empty("lowerdir", Opt_lowerdir),
- fsparam_string("lowerdir+", Opt_lowerdir_add),
- fsparam_string("datadir+", Opt_datadir_add),
- fsparam_string("upperdir", Opt_upperdir),
- fsparam_string("workdir", Opt_workdir),
+ fsparam_file_or_string("lowerdir+", Opt_lowerdir_add),
+ fsparam_file_or_string("datadir+", Opt_datadir_add),
+ fsparam_file_or_string("upperdir", Opt_upperdir),
+ fsparam_file_or_string("workdir", Opt_workdir),
fsparam_flag("default_permissions", Opt_default_permissions),
fsparam_enum("redirect_dir", Opt_redirect_dir, ovl_parameter_redirect_dir),
fsparam_enum("index", Opt_index, ovl_parameter_bool),
@@ -367,40 +367,100 @@ static void ovl_add_layer(struct fs_context *fc, enum ovl_opt layer,
}
}
-static int ovl_parse_layer(struct fs_context *fc, const char *layer_name, enum ovl_opt layer)
+static inline bool is_upper_layer(enum ovl_opt layer)
+{
+ return layer == Opt_upperdir || layer == Opt_workdir;
+}
+
+/* Handle non-file descriptor-based layer options that require path lookup. */
+static inline int ovl_kern_path(const char *layer_name, struct path *layer_path,
+ enum ovl_opt layer)
{
- char *name = kstrdup(layer_name, GFP_KERNEL);
- bool upper = (layer == Opt_upperdir || layer == Opt_workdir);
- struct path path;
int err;
+ switch (layer) {
+ case Opt_upperdir:
+ fallthrough;
+ case Opt_workdir:
+ fallthrough;
+ case Opt_lowerdir:
+ err = ovl_mount_dir(layer_name, layer_path);
+ break;
+ case Opt_lowerdir_add:
+ fallthrough;
+ case Opt_datadir_add:
+ err = ovl_mount_dir_noesc(layer_name, layer_path);
+ break;
+ default:
+ WARN_ON_ONCE(true);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+static int ovl_do_parse_layer(struct fs_context *fc, const char *layer_name,
+ struct path *layer_path, enum ovl_opt layer)
+{
+ char *name __free(kfree) = kstrdup(layer_name, GFP_KERNEL);
+ bool upper;
+ int err = 0;
+
if (!name)
return -ENOMEM;
- if (upper || layer == Opt_lowerdir)
- err = ovl_mount_dir(name, &path);
- else
- err = ovl_mount_dir_noesc(name, &path);
+ upper = is_upper_layer(layer);
+ err = ovl_mount_dir_check(fc, layer_path, layer, name, upper);
if (err)
- goto out_free;
-
- err = ovl_mount_dir_check(fc, &path, layer, name, upper);
- if (err)
- goto out_put;
+ return err;
if (!upper) {
err = ovl_ctx_realloc_lower(fc);
if (err)
- goto out_put;
+ return err;
}
/* Store the user provided path string in ctx to show in mountinfo */
- ovl_add_layer(fc, layer, &path, &name);
+ ovl_add_layer(fc, layer, layer_path, &name);
+ return err;
+}
+
+static int ovl_parse_layer(struct fs_context *fc, struct fs_parameter *param,
+ enum ovl_opt layer)
+{
+ struct path layer_path __free(path_put) = {};
+ int err = 0;
+
+ switch (param->type) {
+ case fs_value_is_string:
+ err = ovl_kern_path(param->string, &layer_path, layer);
+ if (err)
+ return err;
+ err = ovl_do_parse_layer(fc, param->string, &layer_path, layer);
+ break;
+ case fs_value_is_file: {
+ char *buf __free(kfree);
+ char *layer_name;
+
+ buf = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT);
+ if (!buf)
+ return -ENOMEM;
+
+ layer_path = param->file->f_path;
+ path_get(&layer_path);
+
+ layer_name = d_path(&layer_path, buf, PATH_MAX);
+ if (IS_ERR(layer_name))
+ return PTR_ERR(layer_name);
+
+ err = ovl_do_parse_layer(fc, layer_name, &layer_path, layer);
+ break;
+ }
+ default:
+ WARN_ON_ONCE(true);
+ err = -EINVAL;
+ }
-out_put:
- path_put(&path);
-out_free:
- kfree(name);
return err;
}
@@ -474,7 +534,13 @@ static int ovl_parse_param_lowerdir(const char *name, struct fs_context *fc)
iter = dup;
for (nr = 0; nr < nr_lower; nr++) {
- err = ovl_parse_layer(fc, iter, Opt_lowerdir);
+ struct path path __free(path_put) = {};
+
+ err = ovl_kern_path(iter, &path, Opt_lowerdir);
+ if (err)
+ goto out_err;
+
+ err = ovl_do_parse_layer(fc, iter, &path, Opt_lowerdir);
if (err)
goto out_err;
@@ -555,7 +621,7 @@ static int ovl_parse_param(struct fs_context *fc, struct fs_parameter *param)
case Opt_datadir_add:
case Opt_upperdir:
case Opt_workdir:
- err = ovl_parse_layer(fc, param->string, opt);
+ err = ovl_parse_layer(fc, param, opt);
break;
case Opt_default_permissions:
config->default_permissions = true;
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 80675b6bf884..618abb1fa1b8 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -2,6 +2,7 @@
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/cgroup.h>
#include <linux/magic.h>
#include <linux/mount.h>
#include <linux/pid.h>
@@ -114,6 +115,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
return poll_flags;
}
+static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg)
+{
+ struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
+ size_t usize = _IOC_SIZE(cmd);
+ struct pidfd_info kinfo = {};
+ struct user_namespace *user_ns;
+ const struct cred *c;
+ __u64 mask;
+#ifdef CONFIG_CGROUPS
+ struct cgroup *cgrp;
+#endif
+
+ if (!uinfo)
+ return -EINVAL;
+ if (usize < PIDFD_INFO_SIZE_VER0)
+ return -EINVAL; /* First version, no smaller struct possible */
+
+ if (copy_from_user(&mask, &uinfo->mask, sizeof(mask)))
+ return -EFAULT;
+
+ c = get_task_cred(task);
+ if (!c)
+ return -ESRCH;
+
+ /* Unconditionally return identifiers and credentials, the rest only on request */
+
+ user_ns = current_user_ns();
+ kinfo.ruid = from_kuid_munged(user_ns, c->uid);
+ kinfo.rgid = from_kgid_munged(user_ns, c->gid);
+ kinfo.euid = from_kuid_munged(user_ns, c->euid);
+ kinfo.egid = from_kgid_munged(user_ns, c->egid);
+ kinfo.suid = from_kuid_munged(user_ns, c->suid);
+ kinfo.sgid = from_kgid_munged(user_ns, c->sgid);
+ kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid);
+ kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid);
+ kinfo.mask |= PIDFD_INFO_CREDS;
+ put_cred(c);
+
+#ifdef CONFIG_CGROUPS
+ rcu_read_lock();
+ cgrp = task_dfl_cgroup(task);
+ kinfo.cgroupid = cgroup_id(cgrp);
+ kinfo.mask |= PIDFD_INFO_CGROUPID;
+ rcu_read_unlock();
+#endif
+
+ /*
+ * Copy pid/tgid last, to reduce the chances the information might be
+ * stale. Note that it is not possible to ensure it will be valid as the
+ * task might return as soon as the copy_to_user finishes, but that's ok
+ * and userspace expects that might happen and can act accordingly, so
+ * this is just best-effort. What we can do however is checking that all
+ * the fields are set correctly, or return ESRCH to avoid providing
+ * incomplete information. */
+
+ kinfo.ppid = task_ppid_nr_ns(task, NULL);
+ kinfo.tgid = task_tgid_vnr(task);
+ kinfo.pid = task_pid_vnr(task);
+ kinfo.mask |= PIDFD_INFO_PID;
+
+ if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1))
+ return -ESRCH;
+
+ /*
+ * If userspace and the kernel have the same struct size it can just
+ * be copied. If userspace provides an older struct, only the bits that
+ * userspace knows about will be copied. If userspace provides a new
+ * struct, only the bits that the kernel knows about will be copied.
+ */
+ if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo))))
+ return -EFAULT;
+
+ return 0;
+}
+
static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct task_struct *task __free(put_task) = NULL;
@@ -122,13 +198,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct ns_common *ns_common = NULL;
struct pid_namespace *pid_ns;
- if (arg)
- return -EINVAL;
-
task = get_pid_task(pid, PIDTYPE_PID);
if (!task)
return -ESRCH;
+ /* Extensible IOCTL that does not open namespace FDs, take a shortcut */
+ if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO))
+ return pidfd_info(task, cmd, arg);
+
+ if (arg)
+ return -EINVAL;
+
scoped_guard(task_lock, task) {
nsp = task->nsproxy;
if (nsp)
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 6c66a37522d0..4050942ab52f 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -200,11 +200,11 @@ EXPORT_SYMBOL(posix_acl_init);
* Allocate a new ACL with the specified number of entries.
*/
struct posix_acl *
-posix_acl_alloc(int count, gfp_t flags)
+posix_acl_alloc(unsigned int count, gfp_t flags)
{
- const size_t size = sizeof(struct posix_acl) +
- count * sizeof(struct posix_acl_entry);
- struct posix_acl *acl = kmalloc(size, flags);
+ struct posix_acl *acl;
+
+ acl = kmalloc(struct_size(acl, a_entries, count), flags);
if (acl)
posix_acl_init(acl, count);
return acl;
@@ -220,9 +220,8 @@ posix_acl_clone(const struct posix_acl *acl, gfp_t flags)
struct posix_acl *clone = NULL;
if (acl) {
- int size = sizeof(struct posix_acl) + acl->a_count *
- sizeof(struct posix_acl_entry);
- clone = kmemdup(acl, size, flags);
+ clone = kmemdup(acl, struct_size(acl, a_entries, acl->a_count),
+ flags);
if (clone)
refcount_set(&clone->a_refcount, 1);
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b31283d81c52..e9d7ddc52f69 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -58,7 +58,6 @@
#include <linux/init.h>
#include <linux/capability.h>
#include <linux/file.h>
-#include <linux/fdtable.h>
#include <linux/generic-radix-tree.h>
#include <linux/string.h>
#include <linux/seq_file.h>
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 5e391cbca7a3..24baf23e864f 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -116,9 +116,7 @@ static bool tid_fd_mode(struct task_struct *task, unsigned fd, fmode_t *mode)
{
struct file *file;
- rcu_read_lock();
- file = task_lookup_fdget_rcu(task, fd);
- rcu_read_unlock();
+ file = fget_task(task, fd);
if (file) {
*mode = file->f_mode;
fput(file);
@@ -258,19 +256,17 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
if (!dir_emit_dots(file, ctx))
goto out;
- rcu_read_lock();
for (fd = ctx->pos - 2;; fd++) {
struct file *f;
struct fd_data data;
char name[10 + 1];
unsigned int len;
- f = task_lookup_next_fdget_rcu(p, &fd);
+ f = fget_task_next(p, &fd);
ctx->pos = fd + 2LL;
if (!f)
break;
data.mode = f->f_mode;
- rcu_read_unlock();
fput(f);
data.fd = fd;
@@ -278,11 +274,9 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
if (!proc_fill_cache(file, ctx,
name, len, instantiate, p,
&data))
- goto out;
+ break;
cond_resched();
- rcu_read_lock();
}
- rcu_read_unlock();
out:
put_task_struct(p);
return 0;
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index f4616083faef..04bb29721419 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -20,7 +20,7 @@ static int show_softirqs(struct seq_file *p, void *v)
for (i = 0; i < NR_SOFTIRQS; i++) {
seq_printf(p, "%12s:", softirq_to_name[i]);
for_each_possible_cpu(j)
- seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
+ seq_put_decimal_ull_width(p, " ", kstat_softirqs_cpu(i, j), 10);
seq_putc(p, '\n');
}
return 0;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e52bd96137a6..7eb010de39fe 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -2665,8 +2665,10 @@ static int pagemap_scan_get_args(struct pm_scan_arg *arg,
return -EFAULT;
if (!arg->vec && arg->vec_len)
return -EINVAL;
+ if (UINT_MAX == SIZE_MAX && arg->vec_len > SIZE_MAX)
+ return -EINVAL;
if (arg->vec && !access_ok((void __user *)(long)arg->vec,
- arg->vec_len * sizeof(struct page_region)))
+ size_mul(arg->vec_len, sizeof(struct page_region))))
return -EFAULT;
/* Fixup default values */
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b52d85f8ad59..b4521b096058 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -457,10 +457,6 @@ static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
#endif
}
-static const struct vm_operations_struct vmcore_mmap_ops = {
- .fault = mmap_vmcore_fault,
-};
-
/**
* vmcore_alloc_buf - allocate buffer in vmalloc memory
* @size: size of buffer
@@ -488,6 +484,11 @@ static inline char *vmcore_alloc_buf(size_t size)
* virtually contiguous user-space in ELF layout.
*/
#ifdef CONFIG_MMU
+
+static const struct vm_operations_struct vmcore_mmap_ops = {
+ .fault = mmap_vmcore_fault,
+};
+
/*
* remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
* reported as not being ram with the zero page.
diff --git a/fs/read_write.c b/fs/read_write.c
index 64dc24afdb3a..3e5dad12a5b4 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1830,18 +1830,22 @@ int generic_file_rw_checks(struct file *file_in, struct file *file_out)
return 0;
}
-bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos)
+int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter)
{
size_t len = iov_iter_count(iter);
if (!iter_is_ubuf(iter))
- return false;
+ return -EINVAL;
if (!is_power_of_2(len))
- return false;
+ return -EINVAL;
+
+ if (!IS_ALIGNED(iocb->ki_pos, len))
+ return -EINVAL;
- if (!IS_ALIGNED(pos, len))
- return false;
+ if (!(iocb->ki_flags & IOCB_DIRECT))
+ return -EOPNOTSUPP;
- return true;
+ return 0;
}
+EXPORT_SYMBOL_GPL(generic_atomic_write_valid);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index e676c8b0cf5d..8bbb1ad46335 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -343,8 +343,8 @@ EXPORT_SYMBOL(seq_lseek);
/**
* seq_release - free the structures associated with sequential file.
- * @file: file in question
* @inode: its inode
+ * @file: file in question
*
* Frees the structures associated with sequential file; can be used
* as ->f_op->release() if you don't have private data to destroy.
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 15d94ac4095e..0ce2d704b1f3 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -1037,6 +1037,7 @@ clean_demultiplex_info(struct TCP_Server_Info *server)
*/
}
+ put_net(cifs_net_ns(server));
kfree(server->leaf_fullpath);
kfree(server);
@@ -1635,8 +1636,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
/* srv_count can never go negative */
WARN_ON(server->srv_count < 0);
- put_net(cifs_net_ns(server));
-
list_del_init(&server->tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
@@ -3070,13 +3069,22 @@ generic_ip_connect(struct TCP_Server_Info *server)
if (server->ssocket) {
socket = server->ssocket;
} else {
- rc = __sock_create(cifs_net_ns(server), sfamily, SOCK_STREAM,
+ struct net *net = cifs_net_ns(server);
+ struct sock *sk;
+
+ rc = __sock_create(net, sfamily, SOCK_STREAM,
IPPROTO_TCP, &server->ssocket, 1);
if (rc < 0) {
cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
return rc;
}
+ sk = server->ssocket->sk;
+ __netns_tracker_free(net, &sk->ns_tracker, false);
+ sk->sk_net_refcnt = 1;
+ get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
+
/* BB other socket options to set KEEPALIVE, NODELAY? */
cifs_dbg(FYI, "Socket created\n");
socket = server->ssocket;
diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c
index aa2a37a7ce84..e6a72f75ab94 100644
--- a/fs/smb/server/connection.c
+++ b/fs/smb/server/connection.c
@@ -70,6 +70,7 @@ struct ksmbd_conn *ksmbd_conn_alloc(void)
atomic_set(&conn->req_running, 0);
atomic_set(&conn->r_count, 0);
atomic_set(&conn->refcnt, 1);
+ atomic_set(&conn->mux_smb_requests, 0);
conn->total_credits = 1;
conn->outstanding_credits = 0;
diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h
index b379ae4fdcdf..8ddd5a3c7baf 100644
--- a/fs/smb/server/connection.h
+++ b/fs/smb/server/connection.h
@@ -107,6 +107,7 @@ struct ksmbd_conn {
__le16 signing_algorithm;
bool binding;
atomic_t refcnt;
+ atomic_t mux_smb_requests;
};
struct ksmbd_conn_ops {
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 1e4624e9d434..ad02fe555fda 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -90,7 +90,7 @@ static int __rpc_method(char *rpc_name)
int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
{
- struct ksmbd_session_rpc *entry;
+ struct ksmbd_session_rpc *entry, *old;
struct ksmbd_rpc_command *resp;
int method;
@@ -106,16 +106,19 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
entry->id = ksmbd_ipc_id_alloc();
if (entry->id < 0)
goto free_entry;
- xa_store(&sess->rpc_handle_list, entry->id, entry, GFP_KERNEL);
+ old = xa_store(&sess->rpc_handle_list, entry->id, entry, GFP_KERNEL);
+ if (xa_is_err(old))
+ goto free_id;
resp = ksmbd_rpc_open(sess, entry->id);
if (!resp)
- goto free_id;
+ goto erase_xa;
kvfree(resp);
return entry->id;
-free_id:
+erase_xa:
xa_erase(&sess->rpc_handle_list, entry->id);
+free_id:
ksmbd_rpc_id_free(entry->id);
free_entry:
kfree(entry);
@@ -175,6 +178,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn)
unsigned long id;
struct ksmbd_session *sess;
+ down_write(&sessions_table_lock);
down_write(&conn->session_lock);
xa_for_each(&conn->sessions, id, sess) {
if (atomic_read(&sess->refcnt) == 0 &&
@@ -188,6 +192,7 @@ static void ksmbd_expire_session(struct ksmbd_conn *conn)
}
}
up_write(&conn->session_lock);
+ up_write(&sessions_table_lock);
}
int ksmbd_session_register(struct ksmbd_conn *conn,
@@ -229,7 +234,6 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
}
}
}
- up_write(&sessions_table_lock);
down_write(&conn->session_lock);
xa_for_each(&conn->sessions, id, sess) {
@@ -249,6 +253,7 @@ void ksmbd_sessions_deregister(struct ksmbd_conn *conn)
}
}
up_write(&conn->session_lock);
+ up_write(&sessions_table_lock);
}
struct ksmbd_session *ksmbd_session_lookup(struct ksmbd_conn *conn,
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index 9670c97f14b3..e6cfedba9992 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -238,11 +238,11 @@ static void __handle_ksmbd_work(struct ksmbd_work *work,
} while (is_chained == true);
send:
- if (work->sess)
- ksmbd_user_session_put(work->sess);
if (work->tcon)
ksmbd_tree_connect_put(work->tcon);
smb3_preauth_hash_rsp(work);
+ if (work->sess)
+ ksmbd_user_session_put(work->sess);
if (work->sess && work->sess->enc && work->encrypted &&
conn->ops->encrypt_resp) {
rc = conn->ops->encrypt_resp(work);
@@ -270,6 +270,7 @@ static void handle_ksmbd_work(struct work_struct *wk)
ksmbd_conn_try_dequeue_request(work);
ksmbd_free_work_struct(work);
+ atomic_dec(&conn->mux_smb_requests);
/*
* Checking waitqueue to dropping pending requests on
* disconnection. waitqueue_active is safe because it
@@ -291,6 +292,15 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
struct ksmbd_work *work;
int err;
+ err = ksmbd_init_smb_server(conn);
+ if (err)
+ return 0;
+
+ if (atomic_inc_return(&conn->mux_smb_requests) >= conn->vals->max_credits) {
+ atomic_dec_return(&conn->mux_smb_requests);
+ return -ENOSPC;
+ }
+
work = ksmbd_alloc_work_struct();
if (!work) {
pr_err("allocation for work failed\n");
@@ -301,12 +311,6 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
work->request_buf = conn->request_buf;
conn->request_buf = NULL;
- err = ksmbd_init_smb_server(work);
- if (err) {
- ksmbd_free_work_struct(work);
- return 0;
- }
-
ksmbd_conn_enqueue_request(work);
atomic_inc(&conn->r_count);
/* update activity on connection */
diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c
index a2ebbe604c8c..75b4eb856d32 100644
--- a/fs/smb/server/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -388,6 +388,10 @@ static struct smb_version_ops smb1_server_ops = {
.set_rsp_status = set_smb1_rsp_status,
};
+static struct smb_version_values smb1_server_values = {
+ .max_credits = SMB2_MAX_CREDITS,
+};
+
static int smb1_negotiate(struct ksmbd_work *work)
{
return ksmbd_smb_negotiate_common(work, SMB_COM_NEGOTIATE);
@@ -399,18 +403,18 @@ static struct smb_version_cmds smb1_server_cmds[1] = {
static int init_smb1_server(struct ksmbd_conn *conn)
{
+ conn->vals = &smb1_server_values;
conn->ops = &smb1_server_ops;
conn->cmds = smb1_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb1_server_cmds);
return 0;
}
-int ksmbd_init_smb_server(struct ksmbd_work *work)
+int ksmbd_init_smb_server(struct ksmbd_conn *conn)
{
- struct ksmbd_conn *conn = work->conn;
__le32 proto;
- proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol;
+ proto = *(__le32 *)((struct smb_hdr *)conn->request_buf)->Protocol;
if (conn->need_neg == false) {
if (proto == SMB1_PROTO_NUMBER)
return -EINVAL;
diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h
index cc1d6dfe29d5..a3d8a905b07e 100644
--- a/fs/smb/server/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -427,7 +427,7 @@ bool ksmbd_smb_request(struct ksmbd_conn *conn);
int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count);
-int ksmbd_init_smb_server(struct ksmbd_work *work);
+int ksmbd_init_smb_server(struct ksmbd_conn *conn);
struct ksmbd_kstat;
int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work,
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 22251743fadf..d19d4db74af8 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -30,7 +30,8 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
loff_t start_index = folio->index & ~mask;
loff_t end_index = start_index | mask;
- int i, n, pages, bytes, res = -ENOMEM;
+ loff_t index;
+ int i, pages, bytes, res = -ENOMEM;
struct page **page, *last_page;
struct squashfs_page_actor *actor;
void *pageaddr;
@@ -45,9 +46,9 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize,
return res;
/* Try to grab all the pages covered by the Squashfs block */
- for (i = 0, n = start_index; n <= end_index; n++) {
- page[i] = (n == folio->index) ? target_page :
- grab_cache_page_nowait(target_page->mapping, n);
+ for (i = 0, index = start_index; index <= end_index; index++) {
+ page[i] = (index == folio->index) ? target_page :
+ grab_cache_page_nowait(target_page->mapping, index);
if (page[i] == NULL)
continue;
diff --git a/fs/stat.c b/fs/stat.c
index 41e598376d7e..5ef213343834 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -23,10 +23,46 @@
#include <linux/uaccess.h>
#include <asm/unistd.h>
+#include <trace/events/timestamp.h>
+
#include "internal.h"
#include "mount.h"
/**
+ * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED
+ * @stat: where to store the resulting values
+ * @request_mask: STATX_* values requested
+ * @inode: inode from which to grab the c/mtime
+ *
+ * Given @inode, grab the ctime and mtime out if it and store the result
+ * in @stat. When fetching the value, flag it as QUERIED (if not already)
+ * so the next write will record a distinct timestamp.
+ *
+ * NB: The QUERIED flag is tracked in the ctime, but we set it there even
+ * if only the mtime was requested, as that ensures that the next mtime
+ * change will be distinct.
+ */
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode)
+{
+ atomic_t *pcn = (atomic_t *)&inode->i_ctime_nsec;
+
+ /* If neither time was requested, then don't report them */
+ if (!(request_mask & (STATX_CTIME|STATX_MTIME))) {
+ stat->result_mask &= ~(STATX_CTIME|STATX_MTIME);
+ return;
+ }
+
+ stat->mtime = inode_get_mtime(inode);
+ stat->ctime.tv_sec = inode->i_ctime_sec;
+ stat->ctime.tv_nsec = (u32)atomic_read(pcn);
+ if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED))
+ stat->ctime.tv_nsec = ((u32)atomic_fetch_or(I_CTIME_QUERIED, pcn));
+ stat->ctime.tv_nsec &= ~I_CTIME_QUERIED;
+ trace_fill_mg_cmtime(inode, &stat->ctime, &stat->mtime);
+}
+EXPORT_SYMBOL(fill_mg_cmtime);
+
+/**
* generic_fillattr - Fill in the basic attributes from the inode struct
* @idmap: idmap of the mount the inode was found from
* @request_mask: statx request_mask
@@ -58,8 +94,14 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
stat->rdev = inode->i_rdev;
stat->size = i_size_read(inode);
stat->atime = inode_get_atime(inode);
- stat->mtime = inode_get_mtime(inode);
- stat->ctime = inode_get_ctime(inode);
+
+ if (is_mgtime(inode)) {
+ fill_mg_cmtime(stat, request_mask, inode);
+ } else {
+ stat->ctime = inode_get_ctime(inode);
+ stat->mtime = inode_get_mtime(inode);
+ }
+
stat->blksize = i_blocksize(inode);
stat->blocks = inode->i_blocks;
diff --git a/fs/super.c b/fs/super.c
index 1db230432960..c9c7223bc2a2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1596,13 +1596,14 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
EXPORT_SYMBOL_GPL(setup_bdev_super);
/**
- * get_tree_bdev - Get a superblock based on a single block device
+ * get_tree_bdev_flags - Get a superblock based on a single block device
* @fc: The filesystem context holding the parameters
* @fill_super: Helper to initialise a new superblock
+ * @flags: GET_TREE_BDEV_* flags
*/
-int get_tree_bdev(struct fs_context *fc,
- int (*fill_super)(struct super_block *,
- struct fs_context *))
+int get_tree_bdev_flags(struct fs_context *fc,
+ int (*fill_super)(struct super_block *sb,
+ struct fs_context *fc), unsigned int flags)
{
struct super_block *s;
int error = 0;
@@ -1613,10 +1614,10 @@ int get_tree_bdev(struct fs_context *fc,
error = lookup_bdev(fc->source, &dev);
if (error) {
- errorf(fc, "%s: Can't lookup blockdev", fc->source);
+ if (!(flags & GET_TREE_BDEV_QUIET_LOOKUP))
+ errorf(fc, "%s: Can't lookup blockdev", fc->source);
return error;
}
-
fc->sb_flags |= SB_NOSEC;
s = sget_dev(fc, dev);
if (IS_ERR(s))
@@ -1644,6 +1645,19 @@ int get_tree_bdev(struct fs_context *fc,
fc->root = dget(s->s_root);
return 0;
}
+EXPORT_SYMBOL_GPL(get_tree_bdev_flags);
+
+/**
+ * get_tree_bdev - Get a superblock based on a single block device
+ * @fc: The filesystem context holding the parameters
+ * @fill_super: Helper to initialise a new superblock
+ */
+int get_tree_bdev(struct fs_context *fc,
+ int (*fill_super)(struct super_block *,
+ struct fs_context *))
+{
+ return get_tree_bdev_flags(fc, fill_super, 0);
+}
EXPORT_SYMBOL(get_tree_bdev);
static int test_bdev_super(struct super_block *s, void *data)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 1748dff58c3b..cfc614c638da 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -392,6 +392,9 @@ static int tracefs_reconfigure(struct fs_context *fc)
struct tracefs_fs_info *sb_opts = sb->s_fs_info;
struct tracefs_fs_info *new_opts = fc->s_fs_info;
+ if (!new_opts)
+ return 0;
+
sync_filesystem(sb);
/* structure copy of new mount options to sb */
*sb_opts = *new_opts;
@@ -478,14 +481,17 @@ static int tracefs_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_op = &tracefs_super_operations;
sb->s_d_op = &tracefs_dentry_operations;
- tracefs_apply_options(sb, false);
-
return 0;
}
static int tracefs_get_tree(struct fs_context *fc)
{
- return get_tree_single(fc, tracefs_fill_super);
+ int err = get_tree_single(fc, tracefs_fill_super);
+
+ if (err)
+ return err;
+
+ return tracefs_reconfigure(fc);
}
static void tracefs_free_fc(struct fs_context *fc)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 291583005dd1..3fb308b6e167 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -19,9 +19,9 @@
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/kthread.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/seq_file.h>
-#include <linux/mount.h>
#include <linux/math64.h>
#include <linux/writeback.h>
#include "ubifs.h"
@@ -981,177 +981,120 @@ enum {
Opt_auth_key,
Opt_auth_hash_name,
Opt_ignore,
- Opt_err,
};
-static const match_table_t tokens = {
- {Opt_fast_unmount, "fast_unmount"},
- {Opt_norm_unmount, "norm_unmount"},
- {Opt_bulk_read, "bulk_read"},
- {Opt_no_bulk_read, "no_bulk_read"},
- {Opt_chk_data_crc, "chk_data_crc"},
- {Opt_no_chk_data_crc, "no_chk_data_crc"},
- {Opt_override_compr, "compr=%s"},
- {Opt_auth_key, "auth_key=%s"},
- {Opt_auth_hash_name, "auth_hash_name=%s"},
- {Opt_ignore, "ubi=%s"},
- {Opt_ignore, "vol=%s"},
- {Opt_assert, "assert=%s"},
- {Opt_err, NULL},
+static const struct constant_table ubifs_param_compr[] = {
+ { "none", UBIFS_COMPR_NONE },
+ { "lzo", UBIFS_COMPR_LZO },
+ { "zlib", UBIFS_COMPR_ZLIB },
+ { "zstd", UBIFS_COMPR_ZSTD },
+ {}
};
-/**
- * parse_standard_option - parse a standard mount option.
- * @option: the option to parse
- *
- * Normally, standard mount options like "sync" are passed to file-systems as
- * flags. However, when a "rootflags=" kernel boot parameter is used, they may
- * be present in the options string. This function tries to deal with this
- * situation and parse standard options. Returns 0 if the option was not
- * recognized, and the corresponding integer flag if it was.
- *
- * UBIFS is only interested in the "sync" option, so do not check for anything
- * else.
- */
-static int parse_standard_option(const char *option)
-{
+static const struct constant_table ubifs_param_assert[] = {
+ { "report", ASSACT_REPORT },
+ { "read-only", ASSACT_RO },
+ { "panic", ASSACT_PANIC },
+ {}
+};
- pr_notice("UBIFS: parse %s\n", option);
- if (!strcmp(option, "sync"))
- return SB_SYNCHRONOUS;
- return 0;
-}
+static const struct fs_parameter_spec ubifs_fs_param_spec[] = {
+ fsparam_flag ("fast_unmount", Opt_fast_unmount),
+ fsparam_flag ("norm_unmount", Opt_norm_unmount),
+ fsparam_flag ("bulk_read", Opt_bulk_read),
+ fsparam_flag ("no_bulk_read", Opt_no_bulk_read),
+ fsparam_flag ("chk_data_crc", Opt_chk_data_crc),
+ fsparam_flag ("no_chk_data_crc", Opt_no_chk_data_crc),
+ fsparam_enum ("compr", Opt_override_compr, ubifs_param_compr),
+ fsparam_enum ("assert", Opt_assert, ubifs_param_assert),
+ fsparam_string ("auth_key", Opt_auth_key),
+ fsparam_string ("auth_hash_name", Opt_auth_hash_name),
+ fsparam_string ("ubi", Opt_ignore),
+ fsparam_string ("vol", Opt_ignore),
+ {}
+};
+
+struct ubifs_fs_context {
+ struct ubifs_mount_opts mount_opts;
+ char *auth_key_name;
+ char *auth_hash_name;
+ unsigned int no_chk_data_crc:1;
+ unsigned int bulk_read:1;
+ unsigned int default_compr:2;
+ unsigned int assert_action:2;
+};
/**
- * ubifs_parse_options - parse mount parameters.
- * @c: UBIFS file-system description object
- * @options: parameters to parse
- * @is_remount: non-zero if this is FS re-mount
+ * ubifs_parse_param - parse a parameter.
+ * @fc: the filesystem context
+ * @param: the parameter to parse
*
* This function parses UBIFS mount options and returns zero in case success
* and a negative error code in case of failure.
*/
-static int ubifs_parse_options(struct ubifs_info *c, char *options,
- int is_remount)
+static int ubifs_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- char *p;
- substring_t args[MAX_OPT_ARGS];
-
- if (!options)
- return 0;
+ struct ubifs_fs_context *ctx = fc->fs_private;
+ struct fs_parse_result result;
+ bool is_remount = (fc->purpose & FS_CONTEXT_FOR_RECONFIGURE);
+ int opt;
- while ((p = strsep(&options, ","))) {
- int token;
+ opt = fs_parse(fc, ubifs_fs_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
+ switch (opt) {
/*
* %Opt_fast_unmount and %Opt_norm_unmount options are ignored.
* We accept them in order to be backward-compatible. But this
* should be removed at some point.
*/
- case Opt_fast_unmount:
- c->mount_opts.unmount_mode = 2;
- break;
- case Opt_norm_unmount:
- c->mount_opts.unmount_mode = 1;
- break;
- case Opt_bulk_read:
- c->mount_opts.bulk_read = 2;
- c->bulk_read = 1;
- break;
- case Opt_no_bulk_read:
- c->mount_opts.bulk_read = 1;
- c->bulk_read = 0;
- break;
- case Opt_chk_data_crc:
- c->mount_opts.chk_data_crc = 2;
- c->no_chk_data_crc = 0;
- break;
- case Opt_no_chk_data_crc:
- c->mount_opts.chk_data_crc = 1;
- c->no_chk_data_crc = 1;
- break;
- case Opt_override_compr:
- {
- char *name = match_strdup(&args[0]);
-
- if (!name)
- return -ENOMEM;
- if (!strcmp(name, "none"))
- c->mount_opts.compr_type = UBIFS_COMPR_NONE;
- else if (!strcmp(name, "lzo"))
- c->mount_opts.compr_type = UBIFS_COMPR_LZO;
- else if (!strcmp(name, "zlib"))
- c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
- else if (!strcmp(name, "zstd"))
- c->mount_opts.compr_type = UBIFS_COMPR_ZSTD;
- else {
- ubifs_err(c, "unknown compressor \"%s\"", name); //FIXME: is c ready?
- kfree(name);
- return -EINVAL;
- }
- kfree(name);
- c->mount_opts.override_compr = 1;
- c->default_compr = c->mount_opts.compr_type;
- break;
- }
- case Opt_assert:
- {
- char *act = match_strdup(&args[0]);
-
- if (!act)
- return -ENOMEM;
- if (!strcmp(act, "report"))
- c->assert_action = ASSACT_REPORT;
- else if (!strcmp(act, "read-only"))
- c->assert_action = ASSACT_RO;
- else if (!strcmp(act, "panic"))
- c->assert_action = ASSACT_PANIC;
- else {
- ubifs_err(c, "unknown assert action \"%s\"", act);
- kfree(act);
- return -EINVAL;
- }
- kfree(act);
- break;
- }
- case Opt_auth_key:
- if (!is_remount) {
- c->auth_key_name = kstrdup(args[0].from,
- GFP_KERNEL);
- if (!c->auth_key_name)
- return -ENOMEM;
- }
- break;
- case Opt_auth_hash_name:
- if (!is_remount) {
- c->auth_hash_name = kstrdup(args[0].from,
- GFP_KERNEL);
- if (!c->auth_hash_name)
- return -ENOMEM;
- }
- break;
- case Opt_ignore:
- break;
- default:
- {
- unsigned long flag;
- struct super_block *sb = c->vfs_sb;
-
- flag = parse_standard_option(p);
- if (!flag) {
- ubifs_err(c, "unrecognized mount option \"%s\" or missing value",
- p);
- return -EINVAL;
- }
- sb->s_flags |= flag;
- break;
+ case Opt_fast_unmount:
+ ctx->mount_opts.unmount_mode = 2;
+ break;
+ case Opt_norm_unmount:
+ ctx->mount_opts.unmount_mode = 1;
+ break;
+ case Opt_bulk_read:
+ ctx->mount_opts.bulk_read = 2;
+ ctx->bulk_read = 1;
+ break;
+ case Opt_no_bulk_read:
+ ctx->mount_opts.bulk_read = 1;
+ ctx->bulk_read = 0;
+ break;
+ case Opt_chk_data_crc:
+ ctx->mount_opts.chk_data_crc = 2;
+ ctx->no_chk_data_crc = 0;
+ break;
+ case Opt_no_chk_data_crc:
+ ctx->mount_opts.chk_data_crc = 1;
+ ctx->no_chk_data_crc = 1;
+ break;
+ case Opt_override_compr:
+ ctx->mount_opts.compr_type = result.uint_32;
+ ctx->mount_opts.override_compr = 1;
+ ctx->default_compr = ctx->mount_opts.compr_type;
+ break;
+ case Opt_assert:
+ ctx->assert_action = result.uint_32;
+ break;
+ case Opt_auth_key:
+ if (!is_remount) {
+ kfree(ctx->auth_key_name);
+ ctx->auth_key_name = param->string;
+ param->string = NULL;
}
+ break;
+ case Opt_auth_hash_name:
+ if (!is_remount) {
+ kfree(ctx->auth_hash_name);
+ ctx->auth_hash_name = param->string;
+ param->string = NULL;
}
+ break;
+ case Opt_ignore:
+ break;
}
return 0;
@@ -2003,21 +1946,27 @@ static void ubifs_put_super(struct super_block *sb)
mutex_unlock(&c->umount_mutex);
}
-static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
+static int ubifs_reconfigure(struct fs_context *fc)
{
+ struct ubifs_fs_context *ctx = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
int err;
struct ubifs_info *c = sb->s_fs_info;
sync_filesystem(sb);
- dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags);
+ dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, fc->sb_flags);
- err = ubifs_parse_options(c, data, 1);
- if (err) {
- ubifs_err(c, "invalid or unknown remount parameter");
- return err;
- }
+ /*
+ * Apply the mount option changes.
+ * auth_key_name and auth_hash_name are ignored on remount.
+ */
+ c->mount_opts = ctx->mount_opts;
+ c->bulk_read = ctx->bulk_read;
+ c->no_chk_data_crc = ctx->no_chk_data_crc;
+ c->default_compr = ctx->default_compr;
+ c->assert_action = ctx->assert_action;
- if (c->ro_mount && !(*flags & SB_RDONLY)) {
+ if (c->ro_mount && !(fc->sb_flags & SB_RDONLY)) {
if (c->ro_error) {
ubifs_msg(c, "cannot re-mount R/W due to prior errors");
return -EROFS;
@@ -2029,7 +1978,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
err = ubifs_remount_rw(c);
if (err)
return err;
- } else if (!c->ro_mount && (*flags & SB_RDONLY)) {
+ } else if (!c->ro_mount && (fc->sb_flags & SB_RDONLY)) {
if (c->ro_error) {
ubifs_msg(c, "cannot re-mount R/O due to prior errors");
return -EROFS;
@@ -2062,14 +2011,13 @@ const struct super_operations ubifs_super_operations = {
.evict_inode = ubifs_evict_inode,
.statfs = ubifs_statfs,
.dirty_inode = ubifs_dirty_inode,
- .remount_fs = ubifs_remount_fs,
.show_options = ubifs_show_options,
.sync_fs = ubifs_sync_fs,
};
/**
* open_ubi - parse UBI device name string and open the UBI device.
- * @name: UBI volume name
+ * @fc: The filesystem context
* @mode: UBI volume open mode
*
* The primary method of mounting UBIFS is by specifying the UBI volume
@@ -2086,15 +2034,13 @@ const struct super_operations ubifs_super_operations = {
* returns UBI volume description object in case of success and a negative
* error code in case of failure.
*/
-static struct ubi_volume_desc *open_ubi(const char *name, int mode)
+static struct ubi_volume_desc *open_ubi(struct fs_context *fc, int mode)
{
struct ubi_volume_desc *ubi;
+ const char *name = fc->source;
int dev, vol;
char *endptr;
- if (!name || !*name)
- return ERR_PTR(-EINVAL);
-
/* First, try to open using the device node path method */
ubi = ubi_open_volume_path(name, mode);
if (!IS_ERR(ubi))
@@ -2102,14 +2048,14 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
/* Try the "nodev" method */
if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
- return ERR_PTR(-EINVAL);
+ goto invalid_source;
/* ubi:NAME method */
if ((name[3] == ':' || name[3] == '!') && name[4] != '\0')
return ubi_open_volume_nm(0, name + 4, mode);
if (!isdigit(name[3]))
- return ERR_PTR(-EINVAL);
+ goto invalid_source;
dev = simple_strtoul(name + 3, &endptr, 0);
@@ -2121,7 +2067,7 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
if (*endptr == '_' && isdigit(endptr[1])) {
vol = simple_strtoul(endptr + 1, &endptr, 0);
if (*endptr != '\0')
- return ERR_PTR(-EINVAL);
+ goto invalid_source;
return ubi_open_volume(dev, vol, mode);
}
@@ -2129,7 +2075,8 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0')
return ubi_open_volume_nm(dev, ++endptr, mode);
- return ERR_PTR(-EINVAL);
+invalid_source:
+ return ERR_PTR(invalf(fc, "Invalid source name"));
}
static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
@@ -2181,9 +2128,10 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
return c;
}
-static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
+static int ubifs_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct ubifs_info *c = sb->s_fs_info;
+ struct ubifs_fs_context *ctx = fc->fs_private;
struct inode *root;
int err;
@@ -2195,9 +2143,18 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
goto out;
}
- err = ubifs_parse_options(c, data, 0);
- if (err)
- goto out_close;
+ /* Copy in parsed mount options */
+ c->mount_opts = ctx->mount_opts;
+ c->auth_key_name = ctx->auth_key_name;
+ c->auth_hash_name = ctx->auth_hash_name;
+ c->no_chk_data_crc = ctx->no_chk_data_crc;
+ c->bulk_read = ctx->bulk_read;
+ c->default_compr = ctx->default_compr;
+ c->assert_action = ctx->assert_action;
+
+ /* ubifs_info owns auth strings now */
+ ctx->auth_key_name = NULL;
+ ctx->auth_hash_name = NULL;
/*
* UBIFS provides 'backing_dev_info' in order to disable read-ahead. For
@@ -2264,41 +2221,38 @@ out:
return err;
}
-static int sb_test(struct super_block *sb, void *data)
+static int sb_test(struct super_block *sb, struct fs_context *fc)
{
- struct ubifs_info *c1 = data;
+ struct ubifs_info *c1 = fc->s_fs_info;
struct ubifs_info *c = sb->s_fs_info;
return c->vi.cdev == c1->vi.cdev;
}
-static int sb_set(struct super_block *sb, void *data)
-{
- sb->s_fs_info = data;
- return set_anon_super(sb, NULL);
-}
-
-static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
- const char *name, void *data)
+static int ubifs_get_tree(struct fs_context *fc)
{
struct ubi_volume_desc *ubi;
struct ubifs_info *c;
struct super_block *sb;
int err;
- dbg_gen("name %s, flags %#x", name, flags);
+ if (!fc->source || !*fc->source)
+ return invalf(fc, "No source specified");
+
+ dbg_gen("name %s, flags %#x", fc->source, fc->sb_flags);
/*
* Get UBI device number and volume ID. Mount it read-only so far
* because this might be a new mount point, and UBI allows only one
* read-write user at a time.
*/
- ubi = open_ubi(name, UBI_READONLY);
+ ubi = open_ubi(fc, UBI_READONLY);
if (IS_ERR(ubi)) {
- if (!(flags & SB_SILENT))
+ err = PTR_ERR(ubi);
+ if (!(fc->sb_flags & SB_SILENT))
pr_err("UBIFS error (pid: %d): cannot open \"%s\", error %d",
- current->pid, name, (int)PTR_ERR(ubi));
- return ERR_CAST(ubi);
+ current->pid, fc->source, err);
+ return err;
}
c = alloc_ubifs_info(ubi);
@@ -2306,10 +2260,11 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
err = -ENOMEM;
goto out_close;
}
+ fc->s_fs_info = c;
dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
- sb = sget(fs_type, sb_test, sb_set, flags, c);
+ sb = sget_fc(fc, sb_test, set_anon_super_fc);
if (IS_ERR(sb)) {
err = PTR_ERR(sb);
kfree(c);
@@ -2321,12 +2276,12 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
kfree(c);
/* A new mount point for already mounted UBIFS */
dbg_gen("this ubi volume is already mounted");
- if (!!(flags & SB_RDONLY) != c1->ro_mount) {
+ if (!!(fc->sb_flags & SB_RDONLY) != c1->ro_mount) {
err = -EBUSY;
goto out_deact;
}
} else {
- err = ubifs_fill_super(sb, data, flags & SB_SILENT ? 1 : 0);
+ err = ubifs_fill_super(sb, fc);
if (err)
goto out_deact;
/* We do not support atime */
@@ -2340,13 +2295,14 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
/* 'fill_super()' opens ubi again so we must close it here */
ubi_close_volume(ubi);
- return dget(sb->s_root);
+ fc->root = dget(sb->s_root);
+ return 0;
out_deact:
deactivate_locked_super(sb);
out_close:
ubi_close_volume(ubi);
- return ERR_PTR(err);
+ return err;
}
static void kill_ubifs_super(struct super_block *s)
@@ -2356,10 +2312,61 @@ static void kill_ubifs_super(struct super_block *s)
kfree(c);
}
+static void ubifs_free_fc(struct fs_context *fc)
+{
+ struct ubifs_fs_context *ctx = fc->fs_private;
+
+ if (ctx) {
+ kfree(ctx->auth_key_name);
+ kfree(ctx->auth_hash_name);
+ kfree(ctx);
+ }
+}
+
+static const struct fs_context_operations ubifs_context_ops = {
+ .free = ubifs_free_fc,
+ .parse_param = ubifs_parse_param,
+ .get_tree = ubifs_get_tree,
+ .reconfigure = ubifs_reconfigure,
+};
+
+static int ubifs_init_fs_context(struct fs_context *fc)
+{
+ struct ubifs_fs_context *ctx;
+
+ ctx = kzalloc(sizeof(struct ubifs_fs_context), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE) {
+ /* Iniitialize for first mount */
+ ctx->no_chk_data_crc = 1;
+ ctx->assert_action = ASSACT_RO;
+ } else {
+ struct ubifs_info *c = fc->root->d_sb->s_fs_info;
+
+ /*
+ * Preserve existing options across remounts.
+ * auth_key_name and auth_hash_name are not remountable.
+ */
+ ctx->mount_opts = c->mount_opts;
+ ctx->bulk_read = c->bulk_read;
+ ctx->no_chk_data_crc = c->no_chk_data_crc;
+ ctx->default_compr = c->default_compr;
+ ctx->assert_action = c->assert_action;
+ }
+
+ fc->ops = &ubifs_context_ops;
+ fc->fs_private = ctx;
+
+ return 0;
+}
+
static struct file_system_type ubifs_fs_type = {
.name = "ubifs",
.owner = THIS_MODULE,
- .mount = ubifs_mount,
+ .init_fs_context = ubifs_init_fs_context,
+ .parameters = ubifs_fs_param_spec,
.kill_sb = kill_ubifs_super,
};
MODULE_ALIAS_FS("ubifs");
diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 8395066341a4..7f7cb14e01ce 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -214,3 +214,29 @@ void utf8_unload(struct unicode_map *um)
}
EXPORT_SYMBOL(utf8_unload);
+/**
+ * utf8_parse_version - Parse a UTF-8 version number from a string
+ *
+ * @version: input string
+ *
+ * Returns the parsed version on success, negative code on error
+ */
+int utf8_parse_version(char *version)
+{
+ substring_t args[3];
+ unsigned int maj, min, rev;
+ static const struct match_token token[] = {
+ {1, "%d.%d.%d"},
+ {0, NULL}
+ };
+
+ if (match_token(version, token, args) != 1)
+ return -EINVAL;
+
+ if (match_int(&args[0], &maj) || match_int(&args[1], &min) ||
+ match_int(&args[2], &rev))
+ return -EINVAL;
+
+ return UNICODE_AGE(maj, min, rev);
+}
+EXPORT_SYMBOL(utf8_parse_version);
diff --git a/fs/unicode/utf8-selftest.c b/fs/unicode/utf8-selftest.c
index 600e15efe9ed..5ddaf27b21a6 100644
--- a/fs/unicode/utf8-selftest.c
+++ b/fs/unicode/utf8-selftest.c
@@ -17,9 +17,6 @@
static unsigned int failed_tests;
static unsigned int total_tests;
-/* Tests will be based on this version. */
-#define UTF8_LATEST UNICODE_AGE(12, 1, 0)
-
#define _test(cond, func, line, fmt, ...) do { \
total_tests++; \
if (!cond) { \
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 68cdd89c97a3..7c0bd0b55f88 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -692,6 +692,34 @@ void dup_userfaultfd_complete(struct list_head *fcs)
}
}
+void dup_userfaultfd_fail(struct list_head *fcs)
+{
+ struct userfaultfd_fork_ctx *fctx, *n;
+
+ /*
+ * An error has occurred on fork, we will tear memory down, but have
+ * allocated memory for fctx's and raised reference counts for both the
+ * original and child contexts (and on the mm for each as a result).
+ *
+ * These would ordinarily be taken care of by a user handling the event,
+ * but we are no longer doing so, so manually clean up here.
+ *
+ * mm tear down will take care of cleaning up VMA contexts.
+ */
+ list_for_each_entry_safe(fctx, n, fcs, list) {
+ struct userfaultfd_ctx *octx = fctx->orig;
+ struct userfaultfd_ctx *ctx = fctx->new;
+
+ atomic_dec(&octx->mmap_changing);
+ VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0);
+ userfaultfd_ctx_put(octx);
+ userfaultfd_ctx_put(ctx);
+
+ list_del(&fctx->list);
+ kfree(fctx);
+ }
+}
+
void mremap_userfaultfd_prep(struct vm_area_struct *vma,
struct vm_userfaultfd_ctx *vm_ctx)
{
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 04f64cf9777e..22bdbb3e9980 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -1923,7 +1923,7 @@ restart:
error = -EFSCORRUPTED;
goto error0;
}
- if (flen < bestrlen)
+ if (flen <= bestrlen)
break;
busy = xfs_alloc_compute_aligned(args, fbno, flen,
&rbno, &rlen, &busy_gen);
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 3c40f37e82c7..c962ad64b0c1 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -62,12 +62,12 @@ xfs_trans_ichgtime(
ASSERT(tp);
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
- tv = current_time(inode);
+ /* If the mtime changes, then ctime must also change */
+ ASSERT(flags & XFS_ICHGTIME_CHG);
+ tv = inode_set_ctime_current(inode);
if (flags & XFS_ICHGTIME_MOD)
inode_set_mtime_to_ts(inode, tv);
- if (flags & XFS_ICHGTIME_CHG)
- inode_set_ctime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_ACCESS)
inode_set_atime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_CREATE)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index aa4dbda7b536..e8196f5778e2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2115,6 +2115,13 @@ xfs_alloc_buftarg(
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
mp, ops);
+ if (bdev_can_atomic_write(btp->bt_bdev)) {
+ btp->bt_bdev_awu_min = bdev_atomic_write_unit_min_bytes(
+ btp->bt_bdev);
+ btp->bt_bdev_awu_max = bdev_atomic_write_unit_max_bytes(
+ btp->bt_bdev);
+ }
+
/*
* When allocating the buftargs we have not yet read the super block and
* thus don't know the file system sector size yet.
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 209a389f2abc..3d56bc7a35cc 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -124,6 +124,10 @@ struct xfs_buftarg {
struct percpu_counter bt_io_count;
struct ratelimit_state bt_ioerror_rl;
+ /* Atomic write unit values */
+ unsigned int bt_bdev_awu_min;
+ unsigned int bt_bdev_awu_max;
+
/* built-in cache, if we're not using the perag one */
struct xfs_buf_cache bt_cache[];
};
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index b19916b11fd5..ca47cae5a40a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -852,6 +852,20 @@ xfs_file_write_iter(
if (IS_DAX(inode))
return xfs_file_dax_write(iocb, from);
+ if (iocb->ki_flags & IOCB_ATOMIC) {
+ /*
+ * Currently only atomic writing of a single FS block is
+ * supported. It would be possible to atomic write smaller than
+ * a FS block, but there is no requirement to support this.
+ * Note that iomap also does not support this yet.
+ */
+ if (ocount != ip->i_mount->m_sb.sb_blocksize)
+ return -EINVAL;
+ ret = generic_atomic_write_valid(iocb, from);
+ if (ret)
+ return ret;
+ }
+
if (iocb->ki_flags & IOCB_DIRECT) {
/*
* Allow a directio write to fall back to a buffered
@@ -1239,6 +1253,8 @@ xfs_file_open(
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
return -EIO;
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
+ if (xfs_inode_can_atomicwrite(XFS_I(inode)))
+ file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
return generic_file_open(inode, file);
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index e3aaa0555597..290ba8887d29 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -64,25 +64,31 @@ xfs_filestream_pick_ag(
struct xfs_perag *pag;
struct xfs_perag *max_pag = NULL;
xfs_extlen_t minlen = *longest;
- xfs_extlen_t free = 0, minfree, maxfree = 0;
+ xfs_extlen_t minfree, maxfree = 0;
xfs_agnumber_t agno;
bool first_pass = true;
- int err;
/* 2% of an AG's blocks must be free for it to be chosen. */
minfree = mp->m_sb.sb_agblocks / 50;
restart:
for_each_perag_wrap(mp, start_agno, agno, pag) {
+ int err;
+
trace_xfs_filestream_scan(pag, pino);
+
*longest = 0;
err = xfs_bmap_longest_free_extent(pag, NULL, longest);
if (err) {
- if (err != -EAGAIN)
- break;
- /* Couldn't lock the AGF, skip this AG. */
- err = 0;
- continue;
+ if (err == -EAGAIN) {
+ /* Couldn't lock the AGF, skip this AG. */
+ err = 0;
+ continue;
+ }
+ xfs_perag_rele(pag);
+ if (max_pag)
+ xfs_perag_rele(max_pag);
+ return err;
}
/* Keep track of the AG with the most free blocks. */
@@ -107,8 +113,9 @@ restart:
!(flags & XFS_PICK_USERDATA) ||
(flags & XFS_PICK_LOWSPACE))) {
/* Break out, retaining the reference on the AG. */
- free = pag->pagf_freeblks;
- break;
+ if (max_pag)
+ xfs_perag_rele(max_pag);
+ goto done;
}
}
@@ -116,57 +123,47 @@ restart:
atomic_dec(&pag->pagf_fstrms);
}
- if (err) {
- xfs_perag_rele(pag);
- if (max_pag)
- xfs_perag_rele(max_pag);
- return err;
+ /*
+ * Allow a second pass to give xfs_bmap_longest_free_extent() another
+ * attempt at locking AGFs that it might have skipped over before we
+ * fail.
+ */
+ if (first_pass) {
+ first_pass = false;
+ goto restart;
}
- if (!pag) {
- /*
- * Allow a second pass to give xfs_bmap_longest_free_extent()
- * another attempt at locking AGFs that it might have skipped
- * over before we fail.
- */
- if (first_pass) {
- first_pass = false;
- goto restart;
- }
+ /*
+ * We must be low on data space, so run a final lowspace optimised
+ * selection pass if we haven't already.
+ */
+ if (!(flags & XFS_PICK_LOWSPACE)) {
+ flags |= XFS_PICK_LOWSPACE;
+ goto restart;
+ }
- /*
- * We must be low on data space, so run a final lowspace
- * optimised selection pass if we haven't already.
- */
- if (!(flags & XFS_PICK_LOWSPACE)) {
- flags |= XFS_PICK_LOWSPACE;
- goto restart;
+ /*
+ * No unassociated AGs are available, so select the AG with the most
+ * free space, regardless of whether it's already in use by another
+ * filestream. It none suit, just use whatever AG we can grab.
+ */
+ if (!max_pag) {
+ for_each_perag_wrap(args->mp, 0, start_agno, pag) {
+ max_pag = pag;
+ break;
}
- /*
- * No unassociated AGs are available, so select the AG with the
- * most free space, regardless of whether it's already in use by
- * another filestream. It none suit, just use whatever AG we can
- * grab.
- */
- if (!max_pag) {
- for_each_perag_wrap(args->mp, 0, start_agno, args->pag)
- break;
- atomic_inc(&args->pag->pagf_fstrms);
- *longest = 0;
- } else {
- pag = max_pag;
- free = maxfree;
- atomic_inc(&pag->pagf_fstrms);
- }
- } else if (max_pag) {
- xfs_perag_rele(max_pag);
+ /* Bail if there are no AGs at all to select from. */
+ if (!max_pag)
+ return -ENOSPC;
}
- trace_xfs_filestream_pick(pag, pino, free);
+ pag = max_pag;
+ atomic_inc(&pag->pagf_fstrms);
+done:
+ trace_xfs_filestream_pick(pag, pino);
args->pag = pag;
return 0;
-
}
static struct xfs_inode *
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index bcc277fc0a83..19dcb569a3e7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1409,7 +1409,7 @@ xfs_inactive(
if (S_ISREG(VFS_I(ip)->i_mode) &&
(ip->i_disk_size != 0 || XFS_ISIZE(ip) != 0 ||
- ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
+ xfs_inode_has_filedata(ip)))
truncate = 1;
if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 97ed912306fd..a2a6b5fd2545 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -292,6 +292,11 @@ static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
}
+static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip)
+{
+ return ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0;
+}
+
/*
* Check if an inode has any data in the COW fork. This might be often false
* even for inodes with the reflink flag when there is no pending COW operation.
@@ -327,6 +332,21 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
(XFS_IS_REALTIME_INODE(ip) ? \
(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
+static inline bool
+xfs_inode_can_atomicwrite(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+
+ if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
+ return false;
+ if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
+ return false;
+
+ return true;
+}
+
/*
* In-core inode flags.
*/
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index a20d426ef021..2567fd2a0994 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -481,7 +481,7 @@ xfs_ioctl_setattr_xflags(
if (rtflag != XFS_IS_REALTIME_INODE(ip)) {
/* Can't change realtime flag if any extents are allocated. */
- if (ip->i_df.if_nextents || ip->i_delayed_blks)
+ if (xfs_inode_has_filedata(ip))
return -EINVAL;
/*
@@ -602,7 +602,7 @@ xfs_ioctl_setattr_check_extsize(
if (!fa->fsx_valid)
return 0;
- if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_df.if_nextents &&
+ if (S_ISREG(VFS_I(ip)->i_mode) && xfs_inode_has_filedata(ip) &&
XFS_FSB_TO_B(mp, ip->i_extsize) != fa->fsx_extsize)
return -EINVAL;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 916531d9f83c..86da16f54be9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -707,7 +707,7 @@ imap_needs_cow(
return false;
/* when zeroing we don't have to COW holes or unwritten extents */
- if (flags & IOMAP_ZERO) {
+ if (flags & (IOMAP_UNSHARE | IOMAP_ZERO)) {
if (!nimaps ||
imap->br_startblock == HOLESTARTBLOCK ||
imap->br_state == XFS_EXT_UNWRITTEN)
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ee79cf161312..4084d26f0d78 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -570,6 +570,20 @@ xfs_stat_blksize(
return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
}
+static void
+xfs_get_atomic_write_attr(
+ struct xfs_inode *ip,
+ unsigned int *unit_min,
+ unsigned int *unit_max)
+{
+ if (!xfs_inode_can_atomicwrite(ip)) {
+ *unit_min = *unit_max = 0;
+ return;
+ }
+
+ *unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
+}
+
STATIC int
xfs_vn_getattr(
struct mnt_idmap *idmap,
@@ -597,8 +611,9 @@ xfs_vn_getattr(
stat->gid = vfsgid_into_kgid(vfsgid);
stat->ino = ip->i_ino;
stat->atime = inode_get_atime(inode);
- stat->mtime = inode_get_mtime(inode);
- stat->ctime = inode_get_ctime(inode);
+
+ fill_mg_cmtime(stat, request_mask, inode);
+
stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
if (xfs_has_v3inodes(mp)) {
@@ -608,11 +623,6 @@ xfs_vn_getattr(
}
}
- if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
- stat->change_cookie = inode_query_iversion(inode);
- stat->result_mask |= STATX_CHANGE_COOKIE;
- }
-
/*
* Note: If you add another clause to set an attribute flag, please
* update attributes_mask below.
@@ -643,6 +653,14 @@ xfs_vn_getattr(
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
stat->dio_offset_align = bdev_logical_block_size(bdev);
}
+ if (request_mask & STATX_WRITE_ATOMIC) {
+ unsigned int unit_min, unit_max;
+
+ xfs_get_atomic_write_attr(ip, &unit_min,
+ &unit_max);
+ generic_fill_statx_atomic_writes(stat,
+ unit_min, unit_max);
+ }
fallthrough;
default:
stat->blksize = xfs_stat_blksize(ip);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index fbb3a1594c0d..fda75db739b1 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -2063,7 +2063,7 @@ static struct file_system_type xfs_fs_type = {
.init_fs_context = xfs_init_fs_context,
.parameters = xfs_fs_parameters,
.kill_sb = xfs_kill_sb,
- .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
};
MODULE_ALIAS_FS("xfs");
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ee9f0b1f548d..fcb2bad4f76e 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -691,8 +691,8 @@ DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
TRACE_EVENT(xfs_filestream_pick,
- TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free),
- TP_ARGS(pag, ino, free),
+ TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino),
+ TP_ARGS(pag, ino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
@@ -703,14 +703,9 @@ TRACE_EVENT(xfs_filestream_pick,
TP_fast_assign(
__entry->dev = pag->pag_mount->m_super->s_dev;
__entry->ino = ino;
- if (pag) {
- __entry->agno = pag->pag_agno;
- __entry->streams = atomic_read(&pag->pagf_fstrms);
- } else {
- __entry->agno = NULLAGNUMBER;
- __entry->streams = 0;
- }
- __entry->free = free;
+ __entry->agno = pag->pag_agno;
+ __entry->streams = atomic_read(&pag->pagf_fstrms);
+ __entry->free = pag->pagf_freeblks;
),
TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d",
MAJOR(__entry->dev), MINOR(__entry->dev),