diff options
Diffstat (limited to 'mm/shmem.c')
-rw-r--r-- | mm/shmem.c | 622 |
1 files changed, 513 insertions, 109 deletions
diff --git a/mm/shmem.c b/mm/shmem.c index 4f11b5506363..ccb9629a0f70 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -40,6 +40,7 @@ #include <linux/fs_parser.h> #include <linux/swapfile.h> #include <linux/iversion.h> +#include <linux/unicode.h> #include "swap.h" static struct vfsmount *shm_mnt __ro_after_init; @@ -123,6 +124,10 @@ struct shmem_options { bool noswap; unsigned short quota_types; struct shmem_quota_limits qlimits; +#if IS_ENABLED(CONFIG_UNICODE) + struct unicode_map *encoding; + bool strict_encoding; +#endif #define SHMEM_SEEN_BLOCKS 1 #define SHMEM_SEEN_INODES 2 #define SHMEM_SEEN_HUGE 4 @@ -136,6 +141,7 @@ static unsigned long huge_shmem_orders_always __read_mostly; static unsigned long huge_shmem_orders_madvise __read_mostly; static unsigned long huge_shmem_orders_inherit __read_mostly; static unsigned long huge_shmem_orders_within_size __read_mostly; +static bool shmem_orders_configured __initdata; #endif #ifdef CONFIG_TMPFS @@ -548,17 +554,15 @@ static bool shmem_confirm_swap(struct address_space *mapping, static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; -static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index, - loff_t write_end, bool shmem_huge_force, - struct vm_area_struct *vma, - unsigned long vm_flags) +static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, + loff_t write_end, bool shmem_huge_force, + unsigned long vm_flags) { - struct mm_struct *mm = vma ? vma->vm_mm : NULL; loff_t i_size; - if (!S_ISREG(inode->i_mode)) + if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER) return false; - if (mm && ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &mm->flags))) + if (!S_ISREG(inode->i_mode)) return false; if (shmem_huge == SHMEM_HUGE_DENY) return false; @@ -576,7 +580,7 @@ static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index, return true; fallthrough; case SHMEM_HUGE_ADVISE: - if (mm && (vm_flags & VM_HUGEPAGE)) + if (vm_flags & VM_HUGEPAGE) return true; fallthrough; default: @@ -584,35 +588,39 @@ static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index, } } -static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, - loff_t write_end, bool shmem_huge_force, - struct vm_area_struct *vma, unsigned long vm_flags) +static int shmem_parse_huge(const char *str) { - if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER) - return false; + int huge; - return __shmem_huge_global_enabled(inode, index, write_end, - shmem_huge_force, vma, vm_flags); -} + if (!str) + return -EINVAL; -#if defined(CONFIG_SYSFS) -static int shmem_parse_huge(const char *str) -{ if (!strcmp(str, "never")) - return SHMEM_HUGE_NEVER; - if (!strcmp(str, "always")) - return SHMEM_HUGE_ALWAYS; - if (!strcmp(str, "within_size")) - return SHMEM_HUGE_WITHIN_SIZE; - if (!strcmp(str, "advise")) - return SHMEM_HUGE_ADVISE; - if (!strcmp(str, "deny")) - return SHMEM_HUGE_DENY; - if (!strcmp(str, "force")) - return SHMEM_HUGE_FORCE; - return -EINVAL; + huge = SHMEM_HUGE_NEVER; + else if (!strcmp(str, "always")) + huge = SHMEM_HUGE_ALWAYS; + else if (!strcmp(str, "within_size")) + huge = SHMEM_HUGE_WITHIN_SIZE; + else if (!strcmp(str, "advise")) + huge = SHMEM_HUGE_ADVISE; + else if (!strcmp(str, "deny")) + huge = SHMEM_HUGE_DENY; + else if (!strcmp(str, "force")) + huge = SHMEM_HUGE_FORCE; + else + return -EINVAL; + + if (!has_transparent_hugepage() && + huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY) + return -EINVAL; + + /* Do not override huge allocation policy with non-PMD sized mTHP */ + if (huge == SHMEM_HUGE_FORCE && + huge_shmem_orders_inherit != BIT(HPAGE_PMD_ORDER)) + return -EINVAL; + + return huge; } -#endif #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) static const char *shmem_format_huge(int huge) @@ -772,8 +780,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, } static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index, - loff_t write_end, bool shmem_huge_force, - struct vm_area_struct *vma, unsigned long vm_flags) + loff_t write_end, bool shmem_huge_force, + unsigned long vm_flags) { return false; } @@ -1168,7 +1176,7 @@ static int shmem_getattr(struct mnt_idmap *idmap, STATX_ATTR_NODUMP); generic_fillattr(idmap, request_mask, inode, stat); - if (shmem_huge_global_enabled(inode, 0, 0, false, NULL, 0)) + if (shmem_huge_global_enabled(inode, 0, 0, false, 0)) stat->blksize = HPAGE_PMD_SIZE; if (request_mask & STATX_BTIME) { @@ -1653,6 +1661,23 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp) } #ifdef CONFIG_TRANSPARENT_HUGEPAGE +bool shmem_hpage_pmd_enabled(void) +{ + if (shmem_huge == SHMEM_HUGE_DENY) + return false; + if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_always)) + return true; + if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_madvise)) + return true; + if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_within_size)) + return true; + if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_inherit) && + shmem_huge != SHMEM_HUGE_NEVER) + return true; + + return false; +} + unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, loff_t write_end, bool shmem_huge_force) @@ -1664,16 +1689,11 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, loff_t i_size; int order; - if (vma && ((vm_flags & VM_NOHUGEPAGE) || - test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))) - return 0; - - /* If the hardware/firmware marked hugepage support disabled. */ - if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED)) + if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags))) return 0; global_huge = shmem_huge_global_enabled(inode, index, write_end, - shmem_huge_force, vma, vm_flags); + shmem_huge_force, vm_flags); if (!vma || !vma_is_anon_shmem(vma)) { /* * For tmpfs, we now only support PMD sized THP if huge page @@ -2736,9 +2756,6 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) if (ret) return ret; - /* arm64 - allow memory tagging on RAM-based files */ - vm_flags_set(vma, VM_MTE_ALLOWED); - file_accessed(file); /* This is anonymous shared memory if it is unlinked at the time of mmap */ if (inode->i_nlink) @@ -2757,13 +2774,62 @@ static int shmem_file_open(struct inode *inode, struct file *file) #ifdef CONFIG_TMPFS_XATTR static int shmem_initxattrs(struct inode *, const struct xattr *, void *); +#if IS_ENABLED(CONFIG_UNICODE) +/* + * shmem_inode_casefold_flags - Deal with casefold file attribute flag + * + * The casefold file attribute needs some special checks. I can just be added to + * an empty dir, and can't be removed from a non-empty dir. + */ +static int shmem_inode_casefold_flags(struct inode *inode, unsigned int fsflags, + struct dentry *dentry, unsigned int *i_flags) +{ + unsigned int old = inode->i_flags; + struct super_block *sb = inode->i_sb; + + if (fsflags & FS_CASEFOLD_FL) { + if (!(old & S_CASEFOLD)) { + if (!sb->s_encoding) + return -EOPNOTSUPP; + + if (!S_ISDIR(inode->i_mode)) + return -ENOTDIR; + + if (dentry && !simple_empty(dentry)) + return -ENOTEMPTY; + } + + *i_flags = *i_flags | S_CASEFOLD; + } else if (old & S_CASEFOLD) { + if (dentry && !simple_empty(dentry)) + return -ENOTEMPTY; + } + + return 0; +} +#else +static int shmem_inode_casefold_flags(struct inode *inode, unsigned int fsflags, + struct dentry *dentry, unsigned int *i_flags) +{ + if (fsflags & FS_CASEFOLD_FL) + return -EOPNOTSUPP; + + return 0; +} +#endif + /* * chattr's fsflags are unrelated to extended attributes, * but tmpfs has chosen to enable them under the same config option. */ -static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) +static int shmem_set_inode_flags(struct inode *inode, unsigned int fsflags, struct dentry *dentry) { unsigned int i_flags = 0; + int ret; + + ret = shmem_inode_casefold_flags(inode, fsflags, dentry, &i_flags); + if (ret) + return ret; if (fsflags & FS_NOATIME_FL) i_flags |= S_NOATIME; @@ -2774,10 +2840,12 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) /* * But FS_NODUMP_FL does not require any action in i_flags. */ - inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE); + inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE | S_CASEFOLD); + + return 0; } #else -static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags) +static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags, struct dentry *dentry) { } #define shmem_initxattrs NULL @@ -2824,14 +2892,17 @@ static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, info->fsflags = (dir == NULL) ? 0 : SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED; if (info->fsflags) - shmem_set_inode_flags(inode, info->fsflags); + shmem_set_inode_flags(inode, info->fsflags, NULL); INIT_LIST_HEAD(&info->shrinklist); INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); cache_no_acl(inode); if (sbinfo->noswap) mapping_set_unevictable(inode->i_mapping); - mapping_set_large_folios(inode->i_mapping); + + /* Don't consider 'deny' for emergencies and 'force' for testing */ + if (sbinfo->huge) + mapping_set_large_folios(inode->i_mapping); switch (mode & S_IFMT) { default: @@ -3092,27 +3163,19 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) unsigned long offset; int error = 0; ssize_t retval = 0; - loff_t *ppos = &iocb->ki_pos; - - index = *ppos >> PAGE_SHIFT; - offset = *ppos & ~PAGE_MASK; for (;;) { struct folio *folio = NULL; struct page *page = NULL; - pgoff_t end_index; unsigned long nr, ret; - loff_t i_size = i_size_read(inode); + loff_t end_offset, i_size = i_size_read(inode); + bool fallback_page_copy = false; + size_t fsize; - end_index = i_size >> PAGE_SHIFT; - if (index > end_index) + if (unlikely(iocb->ki_pos >= i_size)) break; - if (index == end_index) { - nr = i_size & ~PAGE_MASK; - if (nr <= offset) - break; - } + index = iocb->ki_pos >> PAGE_SHIFT; error = shmem_get_folio(inode, index, 0, &folio, SGP_READ); if (error) { if (error == -EINVAL) @@ -3128,24 +3191,29 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) error = -EIO; break; } + + if (folio_test_large(folio) && + folio_test_has_hwpoisoned(folio)) + fallback_page_copy = true; } /* * We must evaluate after, since reads (unlike writes) * are called without i_rwsem protection against truncate */ - nr = PAGE_SIZE; i_size = i_size_read(inode); - end_index = i_size >> PAGE_SHIFT; - if (index == end_index) { - nr = i_size & ~PAGE_MASK; - if (nr <= offset) { - if (folio) - folio_put(folio); - break; - } + if (unlikely(iocb->ki_pos >= i_size)) { + if (folio) + folio_put(folio); + break; } - nr -= offset; + end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count); + if (folio && likely(!fallback_page_copy)) + fsize = folio_size(folio); + else + fsize = PAGE_SIZE; + offset = iocb->ki_pos & (fsize - 1); + nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset); if (folio) { /* @@ -3153,10 +3221,15 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. */ - if (mapping_writably_mapped(mapping)) - flush_dcache_page(page); + if (mapping_writably_mapped(mapping)) { + if (likely(!fallback_page_copy)) + flush_dcache_folio(folio); + else + flush_dcache_page(page); + } + /* - * Mark the page accessed if we read the beginning. + * Mark the folio accessed if we read the beginning. */ if (!offset) folio_mark_accessed(folio); @@ -3164,9 +3237,11 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... */ - ret = copy_page_to_iter(page, offset, nr, to); + if (likely(!fallback_page_copy)) + ret = copy_folio_to_iter(folio, offset, nr, to); + else + ret = copy_page_to_iter(page, offset, nr, to); folio_put(folio); - } else if (user_backed_iter(to)) { /* * Copy to user tends to be so well optimized, but @@ -3184,9 +3259,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) } retval += ret; - offset += ret; - index += offset >> PAGE_SHIFT; - offset &= ~PAGE_MASK; + iocb->ki_pos += ret; if (!iov_iter_count(to)) break; @@ -3197,7 +3270,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) cond_resched(); } - *ppos = ((loff_t) index << PAGE_SHIFT) + offset; file_accessed(file); return retval ? retval : error; } @@ -3286,11 +3358,16 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, len = min_t(size_t, len, npages * PAGE_SIZE); do { + bool fallback_page_splice = false; + struct page *page = NULL; + pgoff_t index; + size_t size; + if (*ppos >= i_size_read(inode)) break; - error = shmem_get_folio(inode, *ppos / PAGE_SIZE, 0, &folio, - SGP_READ); + index = *ppos >> PAGE_SHIFT; + error = shmem_get_folio(inode, index, 0, &folio, SGP_READ); if (error) { if (error == -EINVAL) error = 0; @@ -3299,12 +3376,15 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, if (folio) { folio_unlock(folio); - if (folio_test_hwpoison(folio) || - (folio_test_large(folio) && - folio_test_has_hwpoisoned(folio))) { + page = folio_file_page(folio, index); + if (PageHWPoison(page)) { error = -EIO; break; } + + if (folio_test_large(folio) && + folio_test_has_hwpoisoned(folio)) + fallback_page_splice = true; } /* @@ -3318,7 +3398,17 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, isize = i_size_read(inode); if (unlikely(*ppos >= isize)) break; - part = min_t(loff_t, isize - *ppos, len); + /* + * Fallback to PAGE_SIZE splice if the large folio has hwpoisoned + * pages. + */ + size = len; + if (unlikely(fallback_page_splice)) { + size_t offset = *ppos & ~PAGE_MASK; + + size = umin(size, PAGE_SIZE - offset); + } + part = min_t(loff_t, isize - *ppos, size); if (folio) { /* @@ -3326,8 +3416,12 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. */ - if (mapping_writably_mapped(mapping)) - flush_dcache_folio(folio); + if (mapping_writably_mapped(mapping)) { + if (likely(!fallback_page_splice)) + flush_dcache_folio(folio); + else + flush_dcache_page(page); + } folio_mark_accessed(folio); /* * Ok, we have the page, and it's up-to-date, so we can @@ -3570,6 +3664,9 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode; int error; + if (!generic_ci_validate_strict_name(dir, &dentry->d_name)) + return -EINVAL; + inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -3589,7 +3686,12 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, dir->i_size += BOGO_DIRENT_SIZE; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_inc_iversion(dir); - d_instantiate(dentry, inode); + + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) + d_add(dentry, inode); + else + d_instantiate(dentry, inode); + dget(dentry); /* Extra count - pin the dentry in core */ return error; @@ -3680,7 +3782,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); ihold(inode); /* New dentry reference */ dget(dentry); /* Extra pinning count for the created dentry */ - d_instantiate(dentry, inode); + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) + d_add(dentry, inode); + else + d_instantiate(dentry, inode); out: return ret; } @@ -3700,6 +3805,14 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) inode_inc_iversion(dir); drop_nlink(inode); dput(dentry); /* Undo the count from "create" - does all the work */ + + /* + * For now, VFS can't deal with case-insensitive negative dentries, so + * we invalidate them + */ + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) + d_invalidate(dentry); + return 0; } @@ -3844,7 +3957,10 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, dir->i_size += BOGO_DIRENT_SIZE; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_inc_iversion(dir); - d_instantiate(dentry, inode); + if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) + d_add(dentry, inode); + else + d_instantiate(dentry, inode); dget(dentry); return 0; @@ -3909,16 +4025,23 @@ static int shmem_fileattr_set(struct mnt_idmap *idmap, { struct inode *inode = d_inode(dentry); struct shmem_inode_info *info = SHMEM_I(inode); + int ret, flags; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE) return -EOPNOTSUPP; - info->fsflags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) | + flags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) | (fa->flags & SHMEM_FL_USER_MODIFIABLE); - shmem_set_inode_flags(inode, info->fsflags); + ret = shmem_set_inode_flags(inode, flags, dentry); + + if (ret) + return ret; + + info->fsflags = flags; + inode_set_ctime_current(inode); inode_inc_iversion(inode); return 0; @@ -4197,6 +4320,9 @@ enum shmem_param { Opt_usrquota_inode_hardlimit, Opt_grpquota_block_hardlimit, Opt_grpquota_inode_hardlimit, + Opt_casefold_version, + Opt_casefold, + Opt_strict_encoding, }; static const struct constant_table shmem_param_enums_huge[] = { @@ -4228,9 +4354,54 @@ const struct fs_parameter_spec shmem_fs_parameters[] = { fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit), fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit), #endif + fsparam_string("casefold", Opt_casefold_version), + fsparam_flag ("casefold", Opt_casefold), + fsparam_flag ("strict_encoding", Opt_strict_encoding), {} }; +#if IS_ENABLED(CONFIG_UNICODE) +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param, + bool latest_version) +{ + struct shmem_options *ctx = fc->fs_private; + unsigned int version = UTF8_LATEST; + struct unicode_map *encoding; + char *version_str = param->string + 5; + + if (!latest_version) { + if (strncmp(param->string, "utf8-", 5)) + return invalfc(fc, "Only UTF-8 encodings are supported " + "in the format: utf8-<version number>"); + + version = utf8_parse_version(version_str); + if (version < 0) + return invalfc(fc, "Invalid UTF-8 version: %s", version_str); + } + + encoding = utf8_load(version); + + if (IS_ERR(encoding)) { + return invalfc(fc, "Failed loading UTF-8 version: utf8-%u.%u.%u\n", + unicode_major(version), unicode_minor(version), + unicode_rev(version)); + } + + pr_info("tmpfs: Using encoding : utf8-%u.%u.%u\n", + unicode_major(version), unicode_minor(version), unicode_rev(version)); + + ctx->encoding = encoding; + + return 0; +} +#else +static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param, + bool latest_version) +{ + return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n"); +} +#endif + static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) { struct shmem_options *ctx = fc->fs_private; @@ -4389,6 +4560,17 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) "Group quota inode hardlimit too large."); ctx->qlimits.grpquota_ihardlimit = size; break; + case Opt_casefold_version: + return shmem_parse_opt_casefold(fc, param, false); + case Opt_casefold: + return shmem_parse_opt_casefold(fc, param, true); + case Opt_strict_encoding: +#if IS_ENABLED(CONFIG_UNICODE) + ctx->strict_encoding = true; + break; +#else + return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n"); +#endif } return 0; @@ -4618,6 +4800,11 @@ static void shmem_put_super(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); +#if IS_ENABLED(CONFIG_UNICODE) + if (sb->s_encoding) + utf8_unload(sb->s_encoding); +#endif + #ifdef CONFIG_TMPFS_QUOTA shmem_disable_quotas(sb); #endif @@ -4628,6 +4815,14 @@ static void shmem_put_super(struct super_block *sb) sb->s_fs_info = NULL; } +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_TMPFS) +static const struct dentry_operations shmem_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, + .d_delete = always_delete_dentry, +}; +#endif + static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) { struct shmem_options *ctx = fc->fs_private; @@ -4662,9 +4857,25 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) } sb->s_export_op = &shmem_export_ops; sb->s_flags |= SB_NOSEC | SB_I_VERSION; + +#if IS_ENABLED(CONFIG_UNICODE) + if (!ctx->encoding && ctx->strict_encoding) { + pr_err("tmpfs: strict_encoding option without encoding is forbidden\n"); + error = -EINVAL; + goto failed; + } + + if (ctx->encoding) { + sb->s_encoding = ctx->encoding; + sb->s_d_op = &shmem_ci_dentry_ops; + if (ctx->strict_encoding) + sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL; + } +#endif + #else sb->s_flags |= SB_NOUSER; -#endif +#endif /* CONFIG_TMPFS */ sbinfo->max_blocks = ctx->blocks; sbinfo->max_inodes = ctx->inodes; sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE; @@ -4938,6 +5149,10 @@ int shmem_init_fs_context(struct fs_context *fc) ctx->uid = current_fsuid(); ctx->gid = current_fsgid(); +#if IS_ENABLED(CONFIG_UNICODE) + ctx->encoding = NULL; +#endif + fc->fs_private = ctx; fc->ops = &shmem_fs_context_ops; return 0; @@ -4951,9 +5166,69 @@ static struct file_system_type shmem_fs_type = { .parameters = shmem_fs_parameters, #endif .kill_sb = kill_litter_super, - .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, + .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME, }; +#if defined(CONFIG_SYSFS) && defined(CONFIG_TMPFS) + +#define __INIT_KOBJ_ATTR(_name, _mode, _show, _store) \ +{ \ + .attr = { .name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +} + +#define TMPFS_ATTR_W(_name, _store) \ + static struct kobj_attribute tmpfs_attr_##_name = \ + __INIT_KOBJ_ATTR(_name, 0200, NULL, _store) + +#define TMPFS_ATTR_RW(_name, _show, _store) \ + static struct kobj_attribute tmpfs_attr_##_name = \ + __INIT_KOBJ_ATTR(_name, 0644, _show, _store) + +#define TMPFS_ATTR_RO(_name, _show) \ + static struct kobj_attribute tmpfs_attr_##_name = \ + __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) + +#if IS_ENABLED(CONFIG_UNICODE) +static ssize_t casefold_show(struct kobject *kobj, struct kobj_attribute *a, + char *buf) +{ + return sysfs_emit(buf, "supported\n"); +} +TMPFS_ATTR_RO(casefold, casefold_show); +#endif + +static struct attribute *tmpfs_attributes[] = { +#if IS_ENABLED(CONFIG_UNICODE) + &tmpfs_attr_casefold.attr, +#endif + NULL +}; + +static const struct attribute_group tmpfs_attribute_group = { + .attrs = tmpfs_attributes, + .name = "features" +}; + +static struct kobject *tmpfs_kobj; + +static int __init tmpfs_sysfs_init(void) +{ + int ret; + + tmpfs_kobj = kobject_create_and_add("tmpfs", fs_kobj); + if (!tmpfs_kobj) + return -ENOMEM; + + ret = sysfs_create_group(tmpfs_kobj, &tmpfs_attribute_group); + if (ret) + kobject_put(tmpfs_kobj); + + return ret; +} +#endif /* CONFIG_SYSFS && CONFIG_TMPFS */ + void __init shmem_init(void) { int error; @@ -4977,6 +5252,14 @@ void __init shmem_init(void) goto out1; } +#if defined(CONFIG_SYSFS) && defined(CONFIG_TMPFS) + error = tmpfs_sysfs_init(); + if (error) { + pr_err("Could not init tmpfs sysfs\n"); + goto out1; + } +#endif + #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; @@ -4987,7 +5270,8 @@ void __init shmem_init(void) * Default to setting PMD-sized THP to inherit the global setting and * disable all other multi-size THPs. */ - huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER); + if (!shmem_orders_configured) + huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER); #endif return; @@ -5030,7 +5314,7 @@ static ssize_t shmem_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { char tmp[16]; - int huge; + int huge, err; if (count + 1 > sizeof(tmp)) return -EINVAL; @@ -5041,20 +5325,14 @@ static ssize_t shmem_enabled_store(struct kobject *kobj, huge = shmem_parse_huge(tmp); if (huge == -EINVAL) - return -EINVAL; - if (!has_transparent_hugepage() && - huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY) - return -EINVAL; - - /* Do not override huge allocation policy with non-PMD sized mTHP */ - if (huge == SHMEM_HUGE_FORCE && - huge_shmem_orders_inherit != BIT(HPAGE_PMD_ORDER)) - return -EINVAL; + return huge; shmem_huge = huge; if (shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; - return count; + + err = start_stop_khugepaged(); + return err ? err : count; } struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled); @@ -5131,6 +5409,12 @@ static ssize_t thpsize_shmem_enabled_store(struct kobject *kobj, ret = -EINVAL; } + if (ret > 0) { + int err = start_stop_khugepaged(); + + if (err) + ret = err; + } return ret; } @@ -5138,6 +5422,126 @@ struct kobj_attribute thpsize_shmem_enabled_attr = __ATTR(shmem_enabled, 0644, thpsize_shmem_enabled_show, thpsize_shmem_enabled_store); #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) + +static int __init setup_transparent_hugepage_shmem(char *str) +{ + int huge; + + huge = shmem_parse_huge(str); + if (huge == -EINVAL) { + pr_warn("transparent_hugepage_shmem= cannot parse, ignored\n"); + return huge; + } + + shmem_huge = huge; + return 1; +} +__setup("transparent_hugepage_shmem=", setup_transparent_hugepage_shmem); + +static char str_dup[PAGE_SIZE] __initdata; +static int __init setup_thp_shmem(char *str) +{ + char *token, *range, *policy, *subtoken; + unsigned long always, inherit, madvise, within_size; + char *start_size, *end_size; + int start, end, nr; + char *p; + + if (!str || strlen(str) + 1 > PAGE_SIZE) + goto err; + strscpy(str_dup, str); + + always = huge_shmem_orders_always; + inherit = huge_shmem_orders_inherit; + madvise = huge_shmem_orders_madvise; + within_size = huge_shmem_orders_within_size; + p = str_dup; + while ((token = strsep(&p, ";")) != NULL) { + range = strsep(&token, ":"); + policy = token; + + if (!policy) + goto err; + + while ((subtoken = strsep(&range, ",")) != NULL) { + if (strchr(subtoken, '-')) { + start_size = strsep(&subtoken, "-"); + end_size = subtoken; + + start = get_order_from_str(start_size, + THP_ORDERS_ALL_FILE_DEFAULT); + end = get_order_from_str(end_size, + THP_ORDERS_ALL_FILE_DEFAULT); + } else { + start_size = end_size = subtoken; + start = end = get_order_from_str(subtoken, + THP_ORDERS_ALL_FILE_DEFAULT); + } + + if (start == -EINVAL) { + pr_err("invalid size %s in thp_shmem boot parameter\n", + start_size); + goto err; + } + + if (end == -EINVAL) { + pr_err("invalid size %s in thp_shmem boot parameter\n", + end_size); + goto err; + } + + if (start < 0 || end < 0 || start > end) + goto err; + + nr = end - start + 1; + if (!strcmp(policy, "always")) { + bitmap_set(&always, start, nr); + bitmap_clear(&inherit, start, nr); + bitmap_clear(&madvise, start, nr); + bitmap_clear(&within_size, start, nr); + } else if (!strcmp(policy, "advise")) { + bitmap_set(&madvise, start, nr); + bitmap_clear(&inherit, start, nr); + bitmap_clear(&always, start, nr); + bitmap_clear(&within_size, start, nr); + } else if (!strcmp(policy, "inherit")) { + bitmap_set(&inherit, start, nr); + bitmap_clear(&madvise, start, nr); + bitmap_clear(&always, start, nr); + bitmap_clear(&within_size, start, nr); + } else if (!strcmp(policy, "within_size")) { + bitmap_set(&within_size, start, nr); + bitmap_clear(&inherit, start, nr); + bitmap_clear(&madvise, start, nr); + bitmap_clear(&always, start, nr); + } else if (!strcmp(policy, "never")) { + bitmap_clear(&inherit, start, nr); + bitmap_clear(&madvise, start, nr); + bitmap_clear(&always, start, nr); + bitmap_clear(&within_size, start, nr); + } else { + pr_err("invalid policy %s in thp_shmem boot parameter\n", policy); + goto err; + } + } + } + + huge_shmem_orders_always = always; + huge_shmem_orders_madvise = madvise; + huge_shmem_orders_inherit = inherit; + huge_shmem_orders_within_size = within_size; + shmem_orders_configured = true; + return 1; + +err: + pr_warn("thp_shmem=%s: error parsing string, ignoring setting\n", str); + return 0; +} +__setup("thp_shmem=", setup_thp_shmem); + +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + #else /* !CONFIG_SHMEM */ /* |