1
0
Fork 0
mirror of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-06-08 16:27:41 +09:00

22 hotfixes. 13 are cc:stable and the remainder address post-6.14 issues

or aren't considered necessary for -stable kernels.
 
 About half are for MM.  Five OCFS2 fixes and a few MAINTAINERS updates.
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaB/D0AAKCRDdBJ7gKXxA
 jk1lAPwNV14Sra7MJpVsLGip2BaJLgG+9vQ/Fg3pntEhwX4u0gD/fXEzTog/A73O
 xD7jQQStJYxHwu0K8CXIDUniZAXSSQw=
 =US5c
 -----END PGP SIGNATURE-----

Merge tag 'mm-hotfixes-stable-2025-05-10-14-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc hotfixes from Andrew Morton:
 "22 hotfixes. 13 are cc:stable and the remainder address post-6.14
  issues or aren't considered necessary for -stable kernels.

  About half are for MM. Five OCFS2 fixes and a few MAINTAINERS updates"

* tag 'mm-hotfixes-stable-2025-05-10-14-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (22 commits)
  mm: fix folio_pte_batch() on XEN PV
  nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs()
  mm/hugetlb: copy the CMA flag when demoting
  mm, swap: fix false warning for large allocation with !THP_SWAP
  selftests/mm: fix a build failure on powerpc
  selftests/mm: fix build break when compiling pkey_util.c
  mm: vmalloc: support more granular vrealloc() sizing
  tools/testing/selftests: fix guard region test tmpfs assumption
  ocfs2: stop quota recovery before disabling quotas
  ocfs2: implement handshaking with ocfs2 recovery thread
  ocfs2: switch osb->disable_recovery to enum
  mailmap: map Uwe's BayLibre addresses to a single one
  MAINTAINERS: add mm THP section
  mm/userfaultfd: fix uninitialized output field for -EAGAIN race
  selftests/mm: compaction_test: support platform with huge mount of memory
  MAINTAINERS: add core mm section
  ocfs2: fix panic in failed foilio allocation
  mm/huge_memory: fix dereferencing invalid pmd migration entry
  MAINTAINERS: add reverse mapping section
  x86: disable image size check for test builds
  ...
This commit is contained in:
Linus Torvalds 2025-05-10 15:50:56 -07:00
commit 3ce9925823
23 changed files with 314 additions and 95 deletions

View file

@ -447,6 +447,8 @@ Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
Luca Weiss <luca@lucaweiss.eu> <luca@z3ntu.xyz>
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
Lance Yang <lance.yang@linux.dev> <ioworker0@gmail.com>
Lance Yang <lance.yang@linux.dev> <mingzhe.yang@ly.com>
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
Maciej W. Rozycki <macro@orcam.me.uk> <macro@linux-mips.org>
Maharaja Kennadyrajan <quic_mkenna@quicinc.com> <mkenna@codeaurora.org>
@ -749,6 +751,7 @@ Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko@ursulin.net>
Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws>
Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com>
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
Uwe Kleine-König <u.kleine-koenig@baylibre.com> <ukleinek@baylibre.com>
Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Uwe Kleine-König <ukleinek@strlen.de>
Uwe Kleine-König <ukl@pengutronix.de>

View file

@ -15495,24 +15495,45 @@ F: Documentation/mm/
F: include/linux/gfp.h
F: include/linux/gfp_types.h
F: include/linux/memfd.h
F: include/linux/memory.h
F: include/linux/memory_hotplug.h
F: include/linux/memory-tiers.h
F: include/linux/mempolicy.h
F: include/linux/mempool.h
F: include/linux/memremap.h
F: include/linux/mm.h
F: include/linux/mm_*.h
F: include/linux/mmzone.h
F: include/linux/mmu_notifier.h
F: include/linux/pagewalk.h
F: include/linux/rmap.h
F: include/trace/events/ksm.h
F: mm/
F: tools/mm/
F: tools/testing/selftests/mm/
N: include/linux/page[-_]*
MEMORY MANAGEMENT - CORE
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Mike Rapoport <rppt@kernel.org>
R: Suren Baghdasaryan <surenb@google.com>
R: Michal Hocko <mhocko@suse.com>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
F: include/linux/memory.h
F: include/linux/mm.h
F: include/linux/mm_*.h
F: include/linux/mmdebug.h
F: include/linux/pagewalk.h
F: mm/Kconfig
F: mm/debug.c
F: mm/init-mm.c
F: mm/memory.c
F: mm/pagewalk.c
F: mm/util.c
MEMORY MANAGEMENT - EXECMEM
M: Andrew Morton <akpm@linux-foundation.org>
M: Mike Rapoport <rppt@kernel.org>
@ -15546,6 +15567,19 @@ F: mm/page_alloc.c
F: include/linux/gfp.h
F: include/linux/compaction.h
MEMORY MANAGEMENT - RMAP (REVERSE MAPPING)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Rik van Riel <riel@surriel.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Vlastimil Babka <vbabka@suse.cz>
R: Harry Yoo <harry.yoo@oracle.com>
L: linux-mm@kvack.org
S: Maintained
F: include/linux/rmap.h
F: mm/rmap.c
MEMORY MANAGEMENT - SECRETMEM
M: Andrew Morton <akpm@linux-foundation.org>
M: Mike Rapoport <rppt@kernel.org>
@ -15554,6 +15588,30 @@ S: Maintained
F: include/linux/secretmem.h
F: mm/secretmem.c
MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE)
M: Andrew Morton <akpm@linux-foundation.org>
M: David Hildenbrand <david@redhat.com>
R: Zi Yan <ziy@nvidia.com>
R: Baolin Wang <baolin.wang@linux.alibaba.com>
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
R: Liam R. Howlett <Liam.Howlett@oracle.com>
R: Nico Pache <npache@redhat.com>
R: Ryan Roberts <ryan.roberts@arm.com>
R: Dev Jain <dev.jain@arm.com>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
F: Documentation/admin-guide/mm/transhuge.rst
F: include/linux/huge_mm.h
F: include/linux/khugepaged.h
F: include/trace/events/huge_memory.h
F: mm/huge_memory.c
F: mm/khugepaged.c
F: tools/testing/selftests/mm/khugepaged.c
F: tools/testing/selftests/mm/split_huge_page_test.c
F: tools/testing/selftests/mm/transhuge-stress.c
MEMORY MANAGEMENT - USERFAULTFD
M: Andrew Morton <akpm@linux-foundation.org>
R: Peter Xu <peterx@redhat.com>

View file

@ -466,10 +466,18 @@ SECTIONS
}
/*
* The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
* COMPILE_TEST kernels can be large - CONFIG_KASAN, for example, can cause
* this. Let's assume that nobody will be running a COMPILE_TEST kernel and
* let's assert that fuller build coverage is more valuable than being able to
* run a COMPILE_TEST kernel.
*/
#ifndef CONFIG_COMPILE_TEST
/*
* The ASSERT() sync to . is intentional, for binutils 2.14 compatibility:
*/
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
#endif
/* needed for Clang - see arch/x86/entry/entry.S */
PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);

View file

@ -705,8 +705,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
int blocksize;
int err;
down_write(&nilfs->ns_sem);
blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
if (!blocksize) {
nilfs_err(sb, "unable to set blocksize");
@ -779,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
set_nilfs_init(nilfs);
err = 0;
out:
up_write(&nilfs->ns_sem);
return err;
failed_sbh:

View file

@ -6918,6 +6918,7 @@ static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end,
if (IS_ERR(folios[numfolios])) {
ret = PTR_ERR(folios[numfolios]);
mlog_errno(ret);
folios[numfolios] = NULL;
goto out;
}

View file

@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
struct ocfs2_recovery_map *rm;
mutex_init(&osb->recovery_lock);
osb->disable_recovery = 0;
osb->recovery_state = OCFS2_REC_ENABLED;
osb->recovery_thread_task = NULL;
init_waitqueue_head(&osb->recovery_event);
@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
return 0;
}
/* we can't grab the goofy sem lock from inside wait_event, so we use
* memory barriers to make sure that we'll see the null task before
* being woken up */
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
{
mb();
return osb->recovery_thread_task != NULL;
}
static void ocfs2_recovery_disable(struct ocfs2_super *osb,
enum ocfs2_recovery_state state)
{
mutex_lock(&osb->recovery_lock);
/*
* If recovery thread is not running, we can directly transition to
* final state.
*/
if (!ocfs2_recovery_thread_running(osb)) {
osb->recovery_state = state + 1;
goto out_lock;
}
osb->recovery_state = state;
/* Wait for recovery thread to acknowledge state transition */
wait_event_cmd(osb->recovery_event,
!ocfs2_recovery_thread_running(osb) ||
osb->recovery_state >= state + 1,
mutex_unlock(&osb->recovery_lock),
mutex_lock(&osb->recovery_lock));
out_lock:
mutex_unlock(&osb->recovery_lock);
/*
* At this point we know that no more recovery work can be queued so
* wait for any recovery completion work to complete.
*/
if (osb->ocfs2_wq)
flush_workqueue(osb->ocfs2_wq);
}
void ocfs2_recovery_disable_quota(struct ocfs2_super *osb)
{
ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE);
}
void ocfs2_recovery_exit(struct ocfs2_super *osb)
{
struct ocfs2_recovery_map *rm;
/* disable any new recovery threads and wait for any currently
* running ones to exit. Do this before setting the vol_state. */
mutex_lock(&osb->recovery_lock);
osb->disable_recovery = 1;
mutex_unlock(&osb->recovery_lock);
wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
/* At this point, we know that no more recovery threads can be
* launched, so wait for any recovery completion work to
* complete. */
if (osb->ocfs2_wq)
flush_workqueue(osb->ocfs2_wq);
ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
/*
* Now that recovery is shut down, and the osb is about to be
@ -1472,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg)
}
}
restart:
if (quota_enabled) {
mutex_lock(&osb->recovery_lock);
/* Confirm that recovery thread will no longer recover quotas */
if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) {
osb->recovery_state = OCFS2_REC_QUOTA_DISABLED;
wake_up(&osb->recovery_event);
}
if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED)
quota_enabled = 0;
mutex_unlock(&osb->recovery_lock);
}
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
mlog_errno(status);
@ -1569,12 +1603,12 @@ bail:
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL;
mb(); /* sync with ocfs2_recovery_thread_running */
if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
osb->recovery_state = OCFS2_REC_DISABLED;
wake_up(&osb->recovery_event);
mutex_unlock(&osb->recovery_lock);
if (quota_enabled)
kfree(rm_quota);
return status;
@ -1582,14 +1616,16 @@ bail:
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
{
int was_set = -1;
mutex_lock(&osb->recovery_lock);
if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
was_set = ocfs2_recovery_map_set(osb, node_num);
trace_ocfs2_recovery_thread(node_num, osb->node_num,
osb->disable_recovery, osb->recovery_thread_task,
osb->disable_recovery ?
-1 : ocfs2_recovery_map_set(osb, node_num));
osb->recovery_state, osb->recovery_thread_task, was_set);
if (osb->disable_recovery)
if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
goto out;
if (osb->recovery_thread_task)

View file

@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
void ocfs2_recovery_disable_quota(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
void ocfs2_free_replay_slots(struct ocfs2_super *osb);

View file

@ -308,6 +308,21 @@ enum ocfs2_journal_trigger_type {
void ocfs2_initialize_journal_triggers(struct super_block *sb,
struct ocfs2_triggers triggers[]);
enum ocfs2_recovery_state {
OCFS2_REC_ENABLED = 0,
OCFS2_REC_QUOTA_WANT_DISABLE,
/*
* Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for
* ocfs2_recovery_disable_quota() to work.
*/
OCFS2_REC_QUOTA_DISABLED,
OCFS2_REC_WANT_DISABLE,
/*
* Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
*/
OCFS2_REC_DISABLED,
};
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
@ -370,7 +385,7 @@ struct ocfs2_super
struct ocfs2_recovery_map *recovery_map;
struct ocfs2_replay_map *replay_map;
struct task_struct *recovery_thread_task;
int disable_recovery;
enum ocfs2_recovery_state recovery_state;
wait_queue_head_t checkpoint_event;
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;

View file

@ -453,8 +453,7 @@ out:
/* Sync changes in local quota file into global quota file and
* reinitialize local quota file.
* The function expects local quota file to be already locked and
* s_umount locked in shared mode. */
* The function expects local quota file to be already locked. */
static int ocfs2_recover_local_quota_file(struct inode *lqinode,
int type,
struct ocfs2_quota_recovery *rec)
@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
{
unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
LOCAL_GROUP_QUOTA_SYSTEM_INODE };
struct super_block *sb = osb->sb;
struct ocfs2_local_disk_dqinfo *ldinfo;
struct buffer_head *bh;
handle_t *handle;
@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
"slot %u\n", osb->dev_str, slot_num);
down_read(&sb->s_umount);
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (list_empty(&(rec->r_list[type])))
continue;
@ -677,7 +674,6 @@ out_put:
break;
}
out:
up_read(&sb->s_umount);
kfree(rec);
return status;
}
@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
/*
* s_umount held in exclusive mode protects us against racing with
* recovery thread...
* ocfs2_dismount_volume() has already aborted quota recovery...
*/
if (oinfo->dqi_rec) {
ocfs2_free_quota_recovery(oinfo->dqi_rec);

View file

@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
ac, cl);
if (PTR_ERR(bg_bh) == -ENOSPC)
if (PTR_ERR(bg_bh) == -ENOSPC) {
ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
bg_bh = ocfs2_block_group_alloc_discontig(handle,
alloc_inode,
ac, cl);
}
if (IS_ERR(bg_bh)) {
status = PTR_ERR(bg_bh);
bg_bh = NULL;
@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
{
int status;
u16 chain;
u32 contig_bits;
u64 next_group;
struct inode *alloc_inode = ac->ac_inode;
struct buffer_head *group_bh = NULL;
@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
status = -ENOSPC;
/* for now, the chain search is a bit simplistic. We just use
* the 1st group with any empty bits. */
while ((status = ac->ac_group_search(alloc_inode, group_bh,
while (1) {
if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) {
contig_bits = le16_to_cpu(bg->bg_contig_free_bits);
if (!contig_bits)
contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap,
le16_to_cpu(bg->bg_bits), 0);
if (bits_wanted > contig_bits && contig_bits >= min_bits)
bits_wanted = contig_bits;
}
status = ac->ac_group_search(alloc_inode, group_bh,
bits_wanted, min_bits,
ac->ac_max_block,
res)) == -ENOSPC) {
ac->ac_max_block, res);
if (status != -ENOSPC)
break;
if (!bg->bg_next_group)
break;
@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim;
search:
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
if (!status) {
@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
}
}
/* Chains can't supply the bits_wanted contiguous space.
* We should switch to using every single bit when allocating
* from the global bitmap. */
if (i == le16_to_cpu(cl->cl_next_free_rec) &&
status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) {
ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
ac->ac_chain = victim;
goto search;
}
set_hint:
if (status != -ENOSPC) {
/* If the next search of this group is not likely to
@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
&& ac->ac_which != OCFS2_AC_USE_MAIN);
&& ac->ac_which != OCFS2_AC_USE_MAIN
&& ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
WARN_ON(min_clusters > 1);

View file

@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
#define OCFS2_AC_USE_MAIN 2
#define OCFS2_AC_USE_INODE 3
#define OCFS2_AC_USE_META 4
#define OCFS2_AC_USE_MAIN_DISCONTIG 5
u32 ac_which;
/* these are used by the chain search */

View file

@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
/* Stop quota recovery so that we can disable quotas */
ocfs2_recovery_disable_quota(osb);
ocfs2_disable_quotas(osb);
/* All dquots should be freed by now */

View file

@ -1585,8 +1585,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
user_uffdio_copy = (struct uffdio_copy __user *) arg;
ret = -EAGAIN;
if (atomic_read(&ctx->mmap_changing))
if (unlikely(atomic_read(&ctx->mmap_changing))) {
if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
return -EFAULT;
goto out;
}
ret = -EFAULT;
if (copy_from_user(&uffdio_copy, user_uffdio_copy,
@ -1641,8 +1644,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
ret = -EAGAIN;
if (atomic_read(&ctx->mmap_changing))
if (unlikely(atomic_read(&ctx->mmap_changing))) {
if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
return -EFAULT;
goto out;
}
ret = -EFAULT;
if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage,
@ -1744,8 +1750,11 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
user_uffdio_continue = (struct uffdio_continue __user *)arg;
ret = -EAGAIN;
if (atomic_read(&ctx->mmap_changing))
if (unlikely(atomic_read(&ctx->mmap_changing))) {
if (unlikely(put_user(ret, &user_uffdio_continue->mapped)))
return -EFAULT;
goto out;
}
ret = -EFAULT;
if (copy_from_user(&uffdio_continue, user_uffdio_continue,
@ -1801,8 +1810,11 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long
user_uffdio_poison = (struct uffdio_poison __user *)arg;
ret = -EAGAIN;
if (atomic_read(&ctx->mmap_changing))
if (unlikely(atomic_read(&ctx->mmap_changing))) {
if (unlikely(put_user(ret, &user_uffdio_poison->updated)))
return -EFAULT;
goto out;
}
ret = -EFAULT;
if (copy_from_user(&uffdio_poison, user_uffdio_poison,
@ -1870,8 +1882,12 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx,
user_uffdio_move = (struct uffdio_move __user *) arg;
if (atomic_read(&ctx->mmap_changing))
return -EAGAIN;
ret = -EAGAIN;
if (unlikely(atomic_read(&ctx->mmap_changing))) {
if (unlikely(put_user(ret, &user_uffdio_move->move)))
return -EFAULT;
goto out;
}
if (copy_from_user(&uffdio_move, user_uffdio_move,
/* don't copy "move" last field */

View file

@ -61,6 +61,7 @@ struct vm_struct {
unsigned int nr_pages;
phys_addr_t phys_addr;
const void *caller;
unsigned long requested_size;
};
struct vmap_area {

View file

@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, bool freeze, struct folio *folio)
{
bool pmd_migration = is_pmd_migration_entry(*pmd);
VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
* require a folio to check the PMD against. Otherwise, there
* is a risk of replacing the wrong folio.
*/
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
is_pmd_migration_entry(*pmd)) {
if (folio && folio != pmd_folio(*pmd))
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) {
/*
* Do not apply pmd_folio() to a migration entry; and folio lock
* guarantees that it must be of the wrong folio anyway.
*/
if (folio && (pmd_migration || folio != pmd_folio(*pmd)))
return;
__split_huge_pmd_locked(vma, pmd, address, freeze);
}

View file

@ -4034,10 +4034,13 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
list_for_each_entry_safe(folio, next, src_list, lru) {
int i;
bool cma;
if (folio_test_hugetlb_vmemmap_optimized(folio))
continue;
cma = folio_test_hugetlb_cma(folio);
list_del(&folio->lru);
split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
@ -4053,6 +4056,9 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
new_folio->mapping = NULL;
init_new_hugetlb_folio(dst, new_folio);
/* Copy the CMA flag so that it is freed correctly */
if (cma)
folio_set_hugetlb_cma(new_folio);
list_add(&new_folio->lru, &dst_list);
}
}

View file

@ -248,11 +248,9 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
bool *any_writable, bool *any_young, bool *any_dirty)
{
unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
const pte_t *end_ptep = start_ptep + max_nr;
pte_t expected_pte, *ptep;
bool writable, young, dirty;
int nr;
int nr, cur_nr;
if (any_writable)
*any_writable = false;
@ -265,11 +263,15 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio);
/* Limit max_nr to the actual remaining PFNs in the folio we could batch. */
max_nr = min_t(unsigned long, max_nr,
folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte));
nr = pte_batch_hint(start_ptep, pte);
expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
ptep = start_ptep + nr;
while (ptep < end_ptep) {
while (nr < max_nr) {
pte = ptep_get(ptep);
if (any_writable)
writable = !!pte_write(pte);
@ -282,14 +284,6 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
if (!pte_same(pte, expected_pte))
break;
/*
* Stop immediately once we reached the end of the folio. In
* corner cases the next PFN might fall into a different
* folio.
*/
if (pte_pfn(pte) >= folio_end_pfn)
break;
if (any_writable)
*any_writable |= writable;
if (any_young)
@ -297,12 +291,13 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
if (any_dirty)
*any_dirty |= dirty;
nr = pte_batch_hint(ptep, pte);
expected_pte = pte_advance_pfn(expected_pte, nr);
ptep += nr;
cur_nr = pte_batch_hint(ptep, pte);
expected_pte = pte_advance_pfn(expected_pte, cur_nr);
ptep += cur_nr;
nr += cur_nr;
}
return min(ptep - start_ptep, max_nr);
return min(nr, max_nr);
}
/**

View file

@ -1272,14 +1272,23 @@ int folio_alloc_swap(struct folio *folio, gfp_t gfp)
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
if (order) {
/*
* Should not even be attempting large allocations when huge
* page swap is disabled. Warn and fail the allocation.
* Reject large allocation when THP_SWAP is disabled,
* the caller should split the folio and try again.
*/
if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) {
if (!IS_ENABLED(CONFIG_THP_SWAP))
return -EAGAIN;
/*
* Allocation size should never exceed cluster size
* (HPAGE_PMD_SIZE).
*/
if (size > SWAPFILE_CLUSTER) {
VM_WARN_ON_ONCE(1);
return -EINVAL;
}
}
local_lock(&percpu_swap_cluster.lock);
if (!swap_alloc_fast(&entry, order))

View file

@ -1940,7 +1940,7 @@ static inline void setup_vmalloc_vm(struct vm_struct *vm,
{
vm->flags = flags;
vm->addr = (void *)va->va_start;
vm->size = va_size(va);
vm->size = vm->requested_size = va_size(va);
vm->caller = caller;
va->vm = vm;
}
@ -3133,6 +3133,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size,
area->flags = flags;
area->caller = caller;
area->requested_size = requested_size;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area);
if (IS_ERR(va)) {
@ -4063,6 +4064,8 @@ EXPORT_SYMBOL(vzalloc_node_noprof);
*/
void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
{
struct vm_struct *vm = NULL;
size_t alloced_size = 0;
size_t old_size = 0;
void *n;
@ -4072,15 +4075,17 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
}
if (p) {
struct vm_struct *vm;
vm = find_vm_area(p);
if (unlikely(!vm)) {
WARN(1, "Trying to vrealloc() nonexistent vm area (%p)\n", p);
return NULL;
}
old_size = get_vm_area_size(vm);
alloced_size = get_vm_area_size(vm);
old_size = vm->requested_size;
if (WARN(alloced_size < old_size,
"vrealloc() has mismatched area vs requested sizes (%p)\n", p))
return NULL;
}
/*
@ -4088,14 +4093,26 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
* would be a good heuristic for when to shrink the vm_area?
*/
if (size <= old_size) {
/* Zero out spare memory. */
if (want_init_on_alloc(flags))
/* Zero out "freed" memory. */
if (want_init_on_free())
memset((void *)p + size, 0, old_size - size);
vm->requested_size = size;
kasan_poison_vmalloc(p + size, old_size - size);
kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL);
return (void *)p;
}
/*
* We already have the bytes available in the allocation; use them.
*/
if (size <= alloced_size) {
kasan_unpoison_vmalloc(p + old_size, size - old_size,
KASAN_VMALLOC_PROT_NORMAL);
/* Zero out "alloced" memory. */
if (want_init_on_alloc(flags))
memset((void *)p + old_size, 0, size - old_size);
vm->requested_size = size;
}
/* TODO: Grow the vm_area, i.e. allocate and map additional pages. */
n = __vmalloc_noprof(size, flags);
if (!n)

View file

@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int compaction_index = 0;
char nr_hugepages[20] = {0};
char init_nr_hugepages[24] = {0};
char target_nr_hugepages[24] = {0};
int slen;
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
goto out;
}
/* Request a large number of huge pages. The Kernel will allocate
as much as it can */
if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
strerror(errno));
/*
* Request huge pages for about half of the free memory. The Kernel
* will allocate as much as it can, and we expect it will get at least 1/3
*/
nr_hugepages_ul = mem_free / hugepage_size / 2;
snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
"%lu", nr_hugepages_ul);
slen = strlen(target_nr_hugepages);
if (write(fd, target_nr_hugepages, slen) != slen) {
ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
nr_hugepages_ul, strerror(errno));
goto close_fd;
}

View file

@ -271,12 +271,16 @@ FIXTURE_SETUP(guard_regions)
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
setup_sighandler();
if (variant->backing == ANON_BACKED)
switch (variant->backing) {
case ANON_BACKED:
return;
self->fd = open_file(
variant->backing == SHMEM_BACKED ? "/tmp/" : "",
self->path);
case LOCAL_FILE_BACKED:
self->fd = open_file("", self->path);
break;
case SHMEM_BACKED:
self->fd = memfd_create(self->path, 0);
break;
}
/* We truncate file to at least 100 pages, tests can modify as needed. */
ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0);
@ -1696,7 +1700,7 @@ TEST_F(guard_regions, readonly_file)
char *ptr;
int i;
if (variant->backing == ANON_BACKED)
if (variant->backing != LOCAL_FILE_BACKED)
SKIP(return, "Read-only test specific to file-backed");
/* Map shared so we can populate with pattern, populate it, unmap. */

View file

@ -3,6 +3,8 @@
#ifndef _PKEYS_POWERPC_H
#define _PKEYS_POWERPC_H
#include <sys/stat.h>
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 384
# define SYS_pkey_free 385
@ -102,8 +104,18 @@ static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
return;
}
#define REPEAT_8(s) s s s s s s s s
#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \
REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s)
#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \
REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s)
#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \
REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s)
#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \
REPEAT_4096(s) REPEAT_4096(s)
/* 4-byte instructions * 16384 = 64K page */
#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
#define __page_o_noops() asm(REPEAT_16384("nop\n"))
static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{

View file

@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
#define __SANE_USERSPACE_TYPES__
#include <sys/syscall.h>
#include <unistd.h>