mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-06-08 08:17:45 +09:00
22 hotfixes. 13 are cc:stable and the remainder address post-6.14 issues
or aren't considered necessary for -stable kernels. About half are for MM. Five OCFS2 fixes and a few MAINTAINERS updates. -----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCaB/D0AAKCRDdBJ7gKXxA jk1lAPwNV14Sra7MJpVsLGip2BaJLgG+9vQ/Fg3pntEhwX4u0gD/fXEzTog/A73O xD7jQQStJYxHwu0K8CXIDUniZAXSSQw= =US5c -----END PGP SIGNATURE----- Merge tag 'mm-hotfixes-stable-2025-05-10-14-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull misc hotfixes from Andrew Morton: "22 hotfixes. 13 are cc:stable and the remainder address post-6.14 issues or aren't considered necessary for -stable kernels. About half are for MM. Five OCFS2 fixes and a few MAINTAINERS updates" * tag 'mm-hotfixes-stable-2025-05-10-14-23' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (22 commits) mm: fix folio_pte_batch() on XEN PV nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs() mm/hugetlb: copy the CMA flag when demoting mm, swap: fix false warning for large allocation with !THP_SWAP selftests/mm: fix a build failure on powerpc selftests/mm: fix build break when compiling pkey_util.c mm: vmalloc: support more granular vrealloc() sizing tools/testing/selftests: fix guard region test tmpfs assumption ocfs2: stop quota recovery before disabling quotas ocfs2: implement handshaking with ocfs2 recovery thread ocfs2: switch osb->disable_recovery to enum mailmap: map Uwe's BayLibre addresses to a single one MAINTAINERS: add mm THP section mm/userfaultfd: fix uninitialized output field for -EAGAIN race selftests/mm: compaction_test: support platform with huge mount of memory MAINTAINERS: add core mm section ocfs2: fix panic in failed foilio allocation mm/huge_memory: fix dereferencing invalid pmd migration entry MAINTAINERS: add reverse mapping section x86: disable image size check for test builds ...
This commit is contained in:
commit
3ce9925823
23 changed files with 314 additions and 95 deletions
3
.mailmap
3
.mailmap
|
@ -447,6 +447,8 @@ Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
|
||||||
Luca Weiss <luca@lucaweiss.eu> <luca@z3ntu.xyz>
|
Luca Weiss <luca@lucaweiss.eu> <luca@z3ntu.xyz>
|
||||||
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
|
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
|
||||||
Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
|
Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
|
||||||
|
Lance Yang <lance.yang@linux.dev> <ioworker0@gmail.com>
|
||||||
|
Lance Yang <lance.yang@linux.dev> <mingzhe.yang@ly.com>
|
||||||
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
|
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
|
||||||
Maciej W. Rozycki <macro@orcam.me.uk> <macro@linux-mips.org>
|
Maciej W. Rozycki <macro@orcam.me.uk> <macro@linux-mips.org>
|
||||||
Maharaja Kennadyrajan <quic_mkenna@quicinc.com> <mkenna@codeaurora.org>
|
Maharaja Kennadyrajan <quic_mkenna@quicinc.com> <mkenna@codeaurora.org>
|
||||||
|
@ -749,6 +751,7 @@ Tvrtko Ursulin <tursulin@ursulin.net> <tvrtko@ursulin.net>
|
||||||
Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws>
|
Tycho Andersen <tycho@tycho.pizza> <tycho@tycho.ws>
|
||||||
Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com>
|
Tzung-Bi Shih <tzungbi@kernel.org> <tzungbi@google.com>
|
||||||
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
|
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
|
||||||
|
Uwe Kleine-König <u.kleine-koenig@baylibre.com> <ukleinek@baylibre.com>
|
||||||
Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
|
Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
|
||||||
Uwe Kleine-König <ukleinek@strlen.de>
|
Uwe Kleine-König <ukleinek@strlen.de>
|
||||||
Uwe Kleine-König <ukl@pengutronix.de>
|
Uwe Kleine-König <ukl@pengutronix.de>
|
||||||
|
|
66
MAINTAINERS
66
MAINTAINERS
|
@ -15495,24 +15495,45 @@ F: Documentation/mm/
|
||||||
F: include/linux/gfp.h
|
F: include/linux/gfp.h
|
||||||
F: include/linux/gfp_types.h
|
F: include/linux/gfp_types.h
|
||||||
F: include/linux/memfd.h
|
F: include/linux/memfd.h
|
||||||
F: include/linux/memory.h
|
|
||||||
F: include/linux/memory_hotplug.h
|
F: include/linux/memory_hotplug.h
|
||||||
F: include/linux/memory-tiers.h
|
F: include/linux/memory-tiers.h
|
||||||
F: include/linux/mempolicy.h
|
F: include/linux/mempolicy.h
|
||||||
F: include/linux/mempool.h
|
F: include/linux/mempool.h
|
||||||
F: include/linux/memremap.h
|
F: include/linux/memremap.h
|
||||||
F: include/linux/mm.h
|
|
||||||
F: include/linux/mm_*.h
|
|
||||||
F: include/linux/mmzone.h
|
F: include/linux/mmzone.h
|
||||||
F: include/linux/mmu_notifier.h
|
F: include/linux/mmu_notifier.h
|
||||||
F: include/linux/pagewalk.h
|
F: include/linux/pagewalk.h
|
||||||
F: include/linux/rmap.h
|
|
||||||
F: include/trace/events/ksm.h
|
F: include/trace/events/ksm.h
|
||||||
F: mm/
|
F: mm/
|
||||||
F: tools/mm/
|
F: tools/mm/
|
||||||
F: tools/testing/selftests/mm/
|
F: tools/testing/selftests/mm/
|
||||||
N: include/linux/page[-_]*
|
N: include/linux/page[-_]*
|
||||||
|
|
||||||
|
MEMORY MANAGEMENT - CORE
|
||||||
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
|
M: David Hildenbrand <david@redhat.com>
|
||||||
|
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||||
|
R: Liam R. Howlett <Liam.Howlett@oracle.com>
|
||||||
|
R: Vlastimil Babka <vbabka@suse.cz>
|
||||||
|
R: Mike Rapoport <rppt@kernel.org>
|
||||||
|
R: Suren Baghdasaryan <surenb@google.com>
|
||||||
|
R: Michal Hocko <mhocko@suse.com>
|
||||||
|
L: linux-mm@kvack.org
|
||||||
|
S: Maintained
|
||||||
|
W: http://www.linux-mm.org
|
||||||
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
||||||
|
F: include/linux/memory.h
|
||||||
|
F: include/linux/mm.h
|
||||||
|
F: include/linux/mm_*.h
|
||||||
|
F: include/linux/mmdebug.h
|
||||||
|
F: include/linux/pagewalk.h
|
||||||
|
F: mm/Kconfig
|
||||||
|
F: mm/debug.c
|
||||||
|
F: mm/init-mm.c
|
||||||
|
F: mm/memory.c
|
||||||
|
F: mm/pagewalk.c
|
||||||
|
F: mm/util.c
|
||||||
|
|
||||||
MEMORY MANAGEMENT - EXECMEM
|
MEMORY MANAGEMENT - EXECMEM
|
||||||
M: Andrew Morton <akpm@linux-foundation.org>
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
M: Mike Rapoport <rppt@kernel.org>
|
M: Mike Rapoport <rppt@kernel.org>
|
||||||
|
@ -15546,6 +15567,19 @@ F: mm/page_alloc.c
|
||||||
F: include/linux/gfp.h
|
F: include/linux/gfp.h
|
||||||
F: include/linux/compaction.h
|
F: include/linux/compaction.h
|
||||||
|
|
||||||
|
MEMORY MANAGEMENT - RMAP (REVERSE MAPPING)
|
||||||
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
|
M: David Hildenbrand <david@redhat.com>
|
||||||
|
M: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||||
|
R: Rik van Riel <riel@surriel.com>
|
||||||
|
R: Liam R. Howlett <Liam.Howlett@oracle.com>
|
||||||
|
R: Vlastimil Babka <vbabka@suse.cz>
|
||||||
|
R: Harry Yoo <harry.yoo@oracle.com>
|
||||||
|
L: linux-mm@kvack.org
|
||||||
|
S: Maintained
|
||||||
|
F: include/linux/rmap.h
|
||||||
|
F: mm/rmap.c
|
||||||
|
|
||||||
MEMORY MANAGEMENT - SECRETMEM
|
MEMORY MANAGEMENT - SECRETMEM
|
||||||
M: Andrew Morton <akpm@linux-foundation.org>
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
M: Mike Rapoport <rppt@kernel.org>
|
M: Mike Rapoport <rppt@kernel.org>
|
||||||
|
@ -15554,6 +15588,30 @@ S: Maintained
|
||||||
F: include/linux/secretmem.h
|
F: include/linux/secretmem.h
|
||||||
F: mm/secretmem.c
|
F: mm/secretmem.c
|
||||||
|
|
||||||
|
MEMORY MANAGEMENT - THP (TRANSPARENT HUGE PAGE)
|
||||||
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
|
M: David Hildenbrand <david@redhat.com>
|
||||||
|
R: Zi Yan <ziy@nvidia.com>
|
||||||
|
R: Baolin Wang <baolin.wang@linux.alibaba.com>
|
||||||
|
R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
|
||||||
|
R: Liam R. Howlett <Liam.Howlett@oracle.com>
|
||||||
|
R: Nico Pache <npache@redhat.com>
|
||||||
|
R: Ryan Roberts <ryan.roberts@arm.com>
|
||||||
|
R: Dev Jain <dev.jain@arm.com>
|
||||||
|
L: linux-mm@kvack.org
|
||||||
|
S: Maintained
|
||||||
|
W: http://www.linux-mm.org
|
||||||
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
|
||||||
|
F: Documentation/admin-guide/mm/transhuge.rst
|
||||||
|
F: include/linux/huge_mm.h
|
||||||
|
F: include/linux/khugepaged.h
|
||||||
|
F: include/trace/events/huge_memory.h
|
||||||
|
F: mm/huge_memory.c
|
||||||
|
F: mm/khugepaged.c
|
||||||
|
F: tools/testing/selftests/mm/khugepaged.c
|
||||||
|
F: tools/testing/selftests/mm/split_huge_page_test.c
|
||||||
|
F: tools/testing/selftests/mm/transhuge-stress.c
|
||||||
|
|
||||||
MEMORY MANAGEMENT - USERFAULTFD
|
MEMORY MANAGEMENT - USERFAULTFD
|
||||||
M: Andrew Morton <akpm@linux-foundation.org>
|
M: Andrew Morton <akpm@linux-foundation.org>
|
||||||
R: Peter Xu <peterx@redhat.com>
|
R: Peter Xu <peterx@redhat.com>
|
||||||
|
|
|
@ -466,10 +466,18 @@ SECTIONS
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
|
* COMPILE_TEST kernels can be large - CONFIG_KASAN, for example, can cause
|
||||||
|
* this. Let's assume that nobody will be running a COMPILE_TEST kernel and
|
||||||
|
* let's assert that fuller build coverage is more valuable than being able to
|
||||||
|
* run a COMPILE_TEST kernel.
|
||||||
|
*/
|
||||||
|
#ifndef CONFIG_COMPILE_TEST
|
||||||
|
/*
|
||||||
|
* The ASSERT() sync to . is intentional, for binutils 2.14 compatibility:
|
||||||
*/
|
*/
|
||||||
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
|
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
|
||||||
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
||||||
|
#endif
|
||||||
|
|
||||||
/* needed for Clang - see arch/x86/entry/entry.S */
|
/* needed for Clang - see arch/x86/entry/entry.S */
|
||||||
PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
|
PROVIDE(__ref_stack_chk_guard = __stack_chk_guard);
|
||||||
|
|
|
@ -705,8 +705,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
|
||||||
int blocksize;
|
int blocksize;
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
down_write(&nilfs->ns_sem);
|
|
||||||
|
|
||||||
blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
|
blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
|
||||||
if (!blocksize) {
|
if (!blocksize) {
|
||||||
nilfs_err(sb, "unable to set blocksize");
|
nilfs_err(sb, "unable to set blocksize");
|
||||||
|
@ -779,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
|
||||||
set_nilfs_init(nilfs);
|
set_nilfs_init(nilfs);
|
||||||
err = 0;
|
err = 0;
|
||||||
out:
|
out:
|
||||||
up_write(&nilfs->ns_sem);
|
|
||||||
return err;
|
return err;
|
||||||
|
|
||||||
failed_sbh:
|
failed_sbh:
|
||||||
|
|
|
@ -6918,6 +6918,7 @@ static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end,
|
||||||
if (IS_ERR(folios[numfolios])) {
|
if (IS_ERR(folios[numfolios])) {
|
||||||
ret = PTR_ERR(folios[numfolios]);
|
ret = PTR_ERR(folios[numfolios]);
|
||||||
mlog_errno(ret);
|
mlog_errno(ret);
|
||||||
|
folios[numfolios] = NULL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
|
||||||
struct ocfs2_recovery_map *rm;
|
struct ocfs2_recovery_map *rm;
|
||||||
|
|
||||||
mutex_init(&osb->recovery_lock);
|
mutex_init(&osb->recovery_lock);
|
||||||
osb->disable_recovery = 0;
|
osb->recovery_state = OCFS2_REC_ENABLED;
|
||||||
osb->recovery_thread_task = NULL;
|
osb->recovery_thread_task = NULL;
|
||||||
init_waitqueue_head(&osb->recovery_event);
|
init_waitqueue_head(&osb->recovery_event);
|
||||||
|
|
||||||
|
@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* we can't grab the goofy sem lock from inside wait_event, so we use
|
|
||||||
* memory barriers to make sure that we'll see the null task before
|
|
||||||
* being woken up */
|
|
||||||
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
|
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
|
||||||
{
|
{
|
||||||
mb();
|
|
||||||
return osb->recovery_thread_task != NULL;
|
return osb->recovery_thread_task != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ocfs2_recovery_disable(struct ocfs2_super *osb,
|
||||||
|
enum ocfs2_recovery_state state)
|
||||||
|
{
|
||||||
|
mutex_lock(&osb->recovery_lock);
|
||||||
|
/*
|
||||||
|
* If recovery thread is not running, we can directly transition to
|
||||||
|
* final state.
|
||||||
|
*/
|
||||||
|
if (!ocfs2_recovery_thread_running(osb)) {
|
||||||
|
osb->recovery_state = state + 1;
|
||||||
|
goto out_lock;
|
||||||
|
}
|
||||||
|
osb->recovery_state = state;
|
||||||
|
/* Wait for recovery thread to acknowledge state transition */
|
||||||
|
wait_event_cmd(osb->recovery_event,
|
||||||
|
!ocfs2_recovery_thread_running(osb) ||
|
||||||
|
osb->recovery_state >= state + 1,
|
||||||
|
mutex_unlock(&osb->recovery_lock),
|
||||||
|
mutex_lock(&osb->recovery_lock));
|
||||||
|
out_lock:
|
||||||
|
mutex_unlock(&osb->recovery_lock);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* At this point we know that no more recovery work can be queued so
|
||||||
|
* wait for any recovery completion work to complete.
|
||||||
|
*/
|
||||||
|
if (osb->ocfs2_wq)
|
||||||
|
flush_workqueue(osb->ocfs2_wq);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ocfs2_recovery_disable_quota(struct ocfs2_super *osb)
|
||||||
|
{
|
||||||
|
ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE);
|
||||||
|
}
|
||||||
|
|
||||||
void ocfs2_recovery_exit(struct ocfs2_super *osb)
|
void ocfs2_recovery_exit(struct ocfs2_super *osb)
|
||||||
{
|
{
|
||||||
struct ocfs2_recovery_map *rm;
|
struct ocfs2_recovery_map *rm;
|
||||||
|
|
||||||
/* disable any new recovery threads and wait for any currently
|
/* disable any new recovery threads and wait for any currently
|
||||||
* running ones to exit. Do this before setting the vol_state. */
|
* running ones to exit. Do this before setting the vol_state. */
|
||||||
mutex_lock(&osb->recovery_lock);
|
ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
|
||||||
osb->disable_recovery = 1;
|
|
||||||
mutex_unlock(&osb->recovery_lock);
|
|
||||||
wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
|
|
||||||
|
|
||||||
/* At this point, we know that no more recovery threads can be
|
|
||||||
* launched, so wait for any recovery completion work to
|
|
||||||
* complete. */
|
|
||||||
if (osb->ocfs2_wq)
|
|
||||||
flush_workqueue(osb->ocfs2_wq);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now that recovery is shut down, and the osb is about to be
|
* Now that recovery is shut down, and the osb is about to be
|
||||||
|
@ -1472,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
restart:
|
restart:
|
||||||
|
if (quota_enabled) {
|
||||||
|
mutex_lock(&osb->recovery_lock);
|
||||||
|
/* Confirm that recovery thread will no longer recover quotas */
|
||||||
|
if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) {
|
||||||
|
osb->recovery_state = OCFS2_REC_QUOTA_DISABLED;
|
||||||
|
wake_up(&osb->recovery_event);
|
||||||
|
}
|
||||||
|
if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED)
|
||||||
|
quota_enabled = 0;
|
||||||
|
mutex_unlock(&osb->recovery_lock);
|
||||||
|
}
|
||||||
|
|
||||||
status = ocfs2_super_lock(osb, 1);
|
status = ocfs2_super_lock(osb, 1);
|
||||||
if (status < 0) {
|
if (status < 0) {
|
||||||
mlog_errno(status);
|
mlog_errno(status);
|
||||||
|
@ -1569,27 +1603,29 @@ bail:
|
||||||
|
|
||||||
ocfs2_free_replay_slots(osb);
|
ocfs2_free_replay_slots(osb);
|
||||||
osb->recovery_thread_task = NULL;
|
osb->recovery_thread_task = NULL;
|
||||||
mb(); /* sync with ocfs2_recovery_thread_running */
|
if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
|
||||||
|
osb->recovery_state = OCFS2_REC_DISABLED;
|
||||||
wake_up(&osb->recovery_event);
|
wake_up(&osb->recovery_event);
|
||||||
|
|
||||||
mutex_unlock(&osb->recovery_lock);
|
mutex_unlock(&osb->recovery_lock);
|
||||||
|
|
||||||
if (quota_enabled)
|
kfree(rm_quota);
|
||||||
kfree(rm_quota);
|
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
|
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
|
||||||
{
|
{
|
||||||
|
int was_set = -1;
|
||||||
|
|
||||||
mutex_lock(&osb->recovery_lock);
|
mutex_lock(&osb->recovery_lock);
|
||||||
|
if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
|
||||||
|
was_set = ocfs2_recovery_map_set(osb, node_num);
|
||||||
|
|
||||||
trace_ocfs2_recovery_thread(node_num, osb->node_num,
|
trace_ocfs2_recovery_thread(node_num, osb->node_num,
|
||||||
osb->disable_recovery, osb->recovery_thread_task,
|
osb->recovery_state, osb->recovery_thread_task, was_set);
|
||||||
osb->disable_recovery ?
|
|
||||||
-1 : ocfs2_recovery_map_set(osb, node_num));
|
|
||||||
|
|
||||||
if (osb->disable_recovery)
|
if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (osb->recovery_thread_task)
|
if (osb->recovery_thread_task)
|
||||||
|
|
|
@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
|
||||||
|
|
||||||
int ocfs2_recovery_init(struct ocfs2_super *osb);
|
int ocfs2_recovery_init(struct ocfs2_super *osb);
|
||||||
void ocfs2_recovery_exit(struct ocfs2_super *osb);
|
void ocfs2_recovery_exit(struct ocfs2_super *osb);
|
||||||
|
void ocfs2_recovery_disable_quota(struct ocfs2_super *osb);
|
||||||
|
|
||||||
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
|
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
|
||||||
void ocfs2_free_replay_slots(struct ocfs2_super *osb);
|
void ocfs2_free_replay_slots(struct ocfs2_super *osb);
|
||||||
|
|
|
@ -308,6 +308,21 @@ enum ocfs2_journal_trigger_type {
|
||||||
void ocfs2_initialize_journal_triggers(struct super_block *sb,
|
void ocfs2_initialize_journal_triggers(struct super_block *sb,
|
||||||
struct ocfs2_triggers triggers[]);
|
struct ocfs2_triggers triggers[]);
|
||||||
|
|
||||||
|
enum ocfs2_recovery_state {
|
||||||
|
OCFS2_REC_ENABLED = 0,
|
||||||
|
OCFS2_REC_QUOTA_WANT_DISABLE,
|
||||||
|
/*
|
||||||
|
* Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for
|
||||||
|
* ocfs2_recovery_disable_quota() to work.
|
||||||
|
*/
|
||||||
|
OCFS2_REC_QUOTA_DISABLED,
|
||||||
|
OCFS2_REC_WANT_DISABLE,
|
||||||
|
/*
|
||||||
|
* Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
|
||||||
|
*/
|
||||||
|
OCFS2_REC_DISABLED,
|
||||||
|
};
|
||||||
|
|
||||||
struct ocfs2_journal;
|
struct ocfs2_journal;
|
||||||
struct ocfs2_slot_info;
|
struct ocfs2_slot_info;
|
||||||
struct ocfs2_recovery_map;
|
struct ocfs2_recovery_map;
|
||||||
|
@ -370,7 +385,7 @@ struct ocfs2_super
|
||||||
struct ocfs2_recovery_map *recovery_map;
|
struct ocfs2_recovery_map *recovery_map;
|
||||||
struct ocfs2_replay_map *replay_map;
|
struct ocfs2_replay_map *replay_map;
|
||||||
struct task_struct *recovery_thread_task;
|
struct task_struct *recovery_thread_task;
|
||||||
int disable_recovery;
|
enum ocfs2_recovery_state recovery_state;
|
||||||
wait_queue_head_t checkpoint_event;
|
wait_queue_head_t checkpoint_event;
|
||||||
struct ocfs2_journal *journal;
|
struct ocfs2_journal *journal;
|
||||||
unsigned long osb_commit_interval;
|
unsigned long osb_commit_interval;
|
||||||
|
|
|
@ -453,8 +453,7 @@ out:
|
||||||
|
|
||||||
/* Sync changes in local quota file into global quota file and
|
/* Sync changes in local quota file into global quota file and
|
||||||
* reinitialize local quota file.
|
* reinitialize local quota file.
|
||||||
* The function expects local quota file to be already locked and
|
* The function expects local quota file to be already locked. */
|
||||||
* s_umount locked in shared mode. */
|
|
||||||
static int ocfs2_recover_local_quota_file(struct inode *lqinode,
|
static int ocfs2_recover_local_quota_file(struct inode *lqinode,
|
||||||
int type,
|
int type,
|
||||||
struct ocfs2_quota_recovery *rec)
|
struct ocfs2_quota_recovery *rec)
|
||||||
|
@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
|
||||||
{
|
{
|
||||||
unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
|
unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
|
||||||
LOCAL_GROUP_QUOTA_SYSTEM_INODE };
|
LOCAL_GROUP_QUOTA_SYSTEM_INODE };
|
||||||
struct super_block *sb = osb->sb;
|
|
||||||
struct ocfs2_local_disk_dqinfo *ldinfo;
|
struct ocfs2_local_disk_dqinfo *ldinfo;
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
handle_t *handle;
|
handle_t *handle;
|
||||||
|
@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
|
||||||
printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
|
printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
|
||||||
"slot %u\n", osb->dev_str, slot_num);
|
"slot %u\n", osb->dev_str, slot_num);
|
||||||
|
|
||||||
down_read(&sb->s_umount);
|
|
||||||
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
|
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
|
||||||
if (list_empty(&(rec->r_list[type])))
|
if (list_empty(&(rec->r_list[type])))
|
||||||
continue;
|
continue;
|
||||||
|
@ -677,7 +674,6 @@ out_put:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
up_read(&sb->s_umount);
|
|
||||||
kfree(rec);
|
kfree(rec);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
|
||||||
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
|
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* s_umount held in exclusive mode protects us against racing with
|
* ocfs2_dismount_volume() has already aborted quota recovery...
|
||||||
* recovery thread...
|
|
||||||
*/
|
*/
|
||||||
if (oinfo->dqi_rec) {
|
if (oinfo->dqi_rec) {
|
||||||
ocfs2_free_quota_recovery(oinfo->dqi_rec);
|
ocfs2_free_quota_recovery(oinfo->dqi_rec);
|
||||||
|
|
|
@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
|
||||||
|
|
||||||
bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
|
bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
|
||||||
ac, cl);
|
ac, cl);
|
||||||
if (PTR_ERR(bg_bh) == -ENOSPC)
|
if (PTR_ERR(bg_bh) == -ENOSPC) {
|
||||||
|
ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
|
||||||
bg_bh = ocfs2_block_group_alloc_discontig(handle,
|
bg_bh = ocfs2_block_group_alloc_discontig(handle,
|
||||||
alloc_inode,
|
alloc_inode,
|
||||||
ac, cl);
|
ac, cl);
|
||||||
|
}
|
||||||
if (IS_ERR(bg_bh)) {
|
if (IS_ERR(bg_bh)) {
|
||||||
status = PTR_ERR(bg_bh);
|
status = PTR_ERR(bg_bh);
|
||||||
bg_bh = NULL;
|
bg_bh = NULL;
|
||||||
|
@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
|
||||||
{
|
{
|
||||||
int status;
|
int status;
|
||||||
u16 chain;
|
u16 chain;
|
||||||
|
u32 contig_bits;
|
||||||
u64 next_group;
|
u64 next_group;
|
||||||
struct inode *alloc_inode = ac->ac_inode;
|
struct inode *alloc_inode = ac->ac_inode;
|
||||||
struct buffer_head *group_bh = NULL;
|
struct buffer_head *group_bh = NULL;
|
||||||
|
@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
|
||||||
status = -ENOSPC;
|
status = -ENOSPC;
|
||||||
/* for now, the chain search is a bit simplistic. We just use
|
/* for now, the chain search is a bit simplistic. We just use
|
||||||
* the 1st group with any empty bits. */
|
* the 1st group with any empty bits. */
|
||||||
while ((status = ac->ac_group_search(alloc_inode, group_bh,
|
while (1) {
|
||||||
bits_wanted, min_bits,
|
if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) {
|
||||||
ac->ac_max_block,
|
contig_bits = le16_to_cpu(bg->bg_contig_free_bits);
|
||||||
res)) == -ENOSPC) {
|
if (!contig_bits)
|
||||||
|
contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap,
|
||||||
|
le16_to_cpu(bg->bg_bits), 0);
|
||||||
|
if (bits_wanted > contig_bits && contig_bits >= min_bits)
|
||||||
|
bits_wanted = contig_bits;
|
||||||
|
}
|
||||||
|
|
||||||
|
status = ac->ac_group_search(alloc_inode, group_bh,
|
||||||
|
bits_wanted, min_bits,
|
||||||
|
ac->ac_max_block, res);
|
||||||
|
if (status != -ENOSPC)
|
||||||
|
break;
|
||||||
if (!bg->bg_next_group)
|
if (!bg->bg_next_group)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
|
||||||
victim = ocfs2_find_victim_chain(cl);
|
victim = ocfs2_find_victim_chain(cl);
|
||||||
ac->ac_chain = victim;
|
ac->ac_chain = victim;
|
||||||
|
|
||||||
|
search:
|
||||||
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
|
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
|
||||||
res, &bits_left);
|
res, &bits_left);
|
||||||
if (!status) {
|
if (!status) {
|
||||||
|
@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Chains can't supply the bits_wanted contiguous space.
|
||||||
|
* We should switch to using every single bit when allocating
|
||||||
|
* from the global bitmap. */
|
||||||
|
if (i == le16_to_cpu(cl->cl_next_free_rec) &&
|
||||||
|
status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) {
|
||||||
|
ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
|
||||||
|
ac->ac_chain = victim;
|
||||||
|
goto search;
|
||||||
|
}
|
||||||
|
|
||||||
set_hint:
|
set_hint:
|
||||||
if (status != -ENOSPC) {
|
if (status != -ENOSPC) {
|
||||||
/* If the next search of this group is not likely to
|
/* If the next search of this group is not likely to
|
||||||
|
@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
|
||||||
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
|
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
|
||||||
|
|
||||||
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
|
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
|
||||||
&& ac->ac_which != OCFS2_AC_USE_MAIN);
|
&& ac->ac_which != OCFS2_AC_USE_MAIN
|
||||||
|
&& ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
|
||||||
|
|
||||||
if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
|
if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
|
||||||
WARN_ON(min_clusters > 1);
|
WARN_ON(min_clusters > 1);
|
||||||
|
|
|
@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
|
||||||
#define OCFS2_AC_USE_MAIN 2
|
#define OCFS2_AC_USE_MAIN 2
|
||||||
#define OCFS2_AC_USE_INODE 3
|
#define OCFS2_AC_USE_INODE 3
|
||||||
#define OCFS2_AC_USE_META 4
|
#define OCFS2_AC_USE_META 4
|
||||||
|
#define OCFS2_AC_USE_MAIN_DISCONTIG 5
|
||||||
u32 ac_which;
|
u32 ac_which;
|
||||||
|
|
||||||
/* these are used by the chain search */
|
/* these are used by the chain search */
|
||||||
|
|
|
@ -1812,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
|
||||||
/* Orphan scan should be stopped as early as possible */
|
/* Orphan scan should be stopped as early as possible */
|
||||||
ocfs2_orphan_scan_stop(osb);
|
ocfs2_orphan_scan_stop(osb);
|
||||||
|
|
||||||
|
/* Stop quota recovery so that we can disable quotas */
|
||||||
|
ocfs2_recovery_disable_quota(osb);
|
||||||
|
|
||||||
ocfs2_disable_quotas(osb);
|
ocfs2_disable_quotas(osb);
|
||||||
|
|
||||||
/* All dquots should be freed by now */
|
/* All dquots should be freed by now */
|
||||||
|
|
|
@ -1585,8 +1585,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
|
||||||
user_uffdio_copy = (struct uffdio_copy __user *) arg;
|
user_uffdio_copy = (struct uffdio_copy __user *) arg;
|
||||||
|
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
if (atomic_read(&ctx->mmap_changing))
|
if (unlikely(atomic_read(&ctx->mmap_changing))) {
|
||||||
|
if (unlikely(put_user(ret, &user_uffdio_copy->copy)))
|
||||||
|
return -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (copy_from_user(&uffdio_copy, user_uffdio_copy,
|
if (copy_from_user(&uffdio_copy, user_uffdio_copy,
|
||||||
|
@ -1641,8 +1644,11 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
|
||||||
user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
|
user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
|
||||||
|
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
if (atomic_read(&ctx->mmap_changing))
|
if (unlikely(atomic_read(&ctx->mmap_changing))) {
|
||||||
|
if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
|
||||||
|
return -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage,
|
if (copy_from_user(&uffdio_zeropage, user_uffdio_zeropage,
|
||||||
|
@ -1744,8 +1750,11 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
|
||||||
user_uffdio_continue = (struct uffdio_continue __user *)arg;
|
user_uffdio_continue = (struct uffdio_continue __user *)arg;
|
||||||
|
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
if (atomic_read(&ctx->mmap_changing))
|
if (unlikely(atomic_read(&ctx->mmap_changing))) {
|
||||||
|
if (unlikely(put_user(ret, &user_uffdio_continue->mapped)))
|
||||||
|
return -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (copy_from_user(&uffdio_continue, user_uffdio_continue,
|
if (copy_from_user(&uffdio_continue, user_uffdio_continue,
|
||||||
|
@ -1801,8 +1810,11 @@ static inline int userfaultfd_poison(struct userfaultfd_ctx *ctx, unsigned long
|
||||||
user_uffdio_poison = (struct uffdio_poison __user *)arg;
|
user_uffdio_poison = (struct uffdio_poison __user *)arg;
|
||||||
|
|
||||||
ret = -EAGAIN;
|
ret = -EAGAIN;
|
||||||
if (atomic_read(&ctx->mmap_changing))
|
if (unlikely(atomic_read(&ctx->mmap_changing))) {
|
||||||
|
if (unlikely(put_user(ret, &user_uffdio_poison->updated)))
|
||||||
|
return -EFAULT;
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
if (copy_from_user(&uffdio_poison, user_uffdio_poison,
|
if (copy_from_user(&uffdio_poison, user_uffdio_poison,
|
||||||
|
@ -1870,8 +1882,12 @@ static int userfaultfd_move(struct userfaultfd_ctx *ctx,
|
||||||
|
|
||||||
user_uffdio_move = (struct uffdio_move __user *) arg;
|
user_uffdio_move = (struct uffdio_move __user *) arg;
|
||||||
|
|
||||||
if (atomic_read(&ctx->mmap_changing))
|
ret = -EAGAIN;
|
||||||
return -EAGAIN;
|
if (unlikely(atomic_read(&ctx->mmap_changing))) {
|
||||||
|
if (unlikely(put_user(ret, &user_uffdio_move->move)))
|
||||||
|
return -EFAULT;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
if (copy_from_user(&uffdio_move, user_uffdio_move,
|
if (copy_from_user(&uffdio_move, user_uffdio_move,
|
||||||
/* don't copy "move" last field */
|
/* don't copy "move" last field */
|
||||||
|
|
|
@ -61,6 +61,7 @@ struct vm_struct {
|
||||||
unsigned int nr_pages;
|
unsigned int nr_pages;
|
||||||
phys_addr_t phys_addr;
|
phys_addr_t phys_addr;
|
||||||
const void *caller;
|
const void *caller;
|
||||||
|
unsigned long requested_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct vmap_area {
|
struct vmap_area {
|
||||||
|
|
|
@ -3075,6 +3075,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
|
||||||
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
|
void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
|
||||||
pmd_t *pmd, bool freeze, struct folio *folio)
|
pmd_t *pmd, bool freeze, struct folio *folio)
|
||||||
{
|
{
|
||||||
|
bool pmd_migration = is_pmd_migration_entry(*pmd);
|
||||||
|
|
||||||
VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
|
VM_WARN_ON_ONCE(folio && !folio_test_pmd_mappable(folio));
|
||||||
VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
|
VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE));
|
||||||
VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
|
VM_WARN_ON_ONCE(folio && !folio_test_locked(folio));
|
||||||
|
@ -3085,9 +3087,12 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
|
||||||
* require a folio to check the PMD against. Otherwise, there
|
* require a folio to check the PMD against. Otherwise, there
|
||||||
* is a risk of replacing the wrong folio.
|
* is a risk of replacing the wrong folio.
|
||||||
*/
|
*/
|
||||||
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
|
if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) || pmd_migration) {
|
||||||
is_pmd_migration_entry(*pmd)) {
|
/*
|
||||||
if (folio && folio != pmd_folio(*pmd))
|
* Do not apply pmd_folio() to a migration entry; and folio lock
|
||||||
|
* guarantees that it must be of the wrong folio anyway.
|
||||||
|
*/
|
||||||
|
if (folio && (pmd_migration || folio != pmd_folio(*pmd)))
|
||||||
return;
|
return;
|
||||||
__split_huge_pmd_locked(vma, pmd, address, freeze);
|
__split_huge_pmd_locked(vma, pmd, address, freeze);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4034,10 +4034,13 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
|
||||||
|
|
||||||
list_for_each_entry_safe(folio, next, src_list, lru) {
|
list_for_each_entry_safe(folio, next, src_list, lru) {
|
||||||
int i;
|
int i;
|
||||||
|
bool cma;
|
||||||
|
|
||||||
if (folio_test_hugetlb_vmemmap_optimized(folio))
|
if (folio_test_hugetlb_vmemmap_optimized(folio))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
cma = folio_test_hugetlb_cma(folio);
|
||||||
|
|
||||||
list_del(&folio->lru);
|
list_del(&folio->lru);
|
||||||
|
|
||||||
split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
|
split_page_owner(&folio->page, huge_page_order(src), huge_page_order(dst));
|
||||||
|
@ -4053,6 +4056,9 @@ static long demote_free_hugetlb_folios(struct hstate *src, struct hstate *dst,
|
||||||
|
|
||||||
new_folio->mapping = NULL;
|
new_folio->mapping = NULL;
|
||||||
init_new_hugetlb_folio(dst, new_folio);
|
init_new_hugetlb_folio(dst, new_folio);
|
||||||
|
/* Copy the CMA flag so that it is freed correctly */
|
||||||
|
if (cma)
|
||||||
|
folio_set_hugetlb_cma(new_folio);
|
||||||
list_add(&new_folio->lru, &dst_list);
|
list_add(&new_folio->lru, &dst_list);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -248,11 +248,9 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
|
||||||
pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
|
pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags,
|
||||||
bool *any_writable, bool *any_young, bool *any_dirty)
|
bool *any_writable, bool *any_young, bool *any_dirty)
|
||||||
{
|
{
|
||||||
unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);
|
|
||||||
const pte_t *end_ptep = start_ptep + max_nr;
|
|
||||||
pte_t expected_pte, *ptep;
|
pte_t expected_pte, *ptep;
|
||||||
bool writable, young, dirty;
|
bool writable, young, dirty;
|
||||||
int nr;
|
int nr, cur_nr;
|
||||||
|
|
||||||
if (any_writable)
|
if (any_writable)
|
||||||
*any_writable = false;
|
*any_writable = false;
|
||||||
|
@ -265,11 +263,15 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
|
||||||
VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
|
VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio);
|
||||||
VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio);
|
VM_WARN_ON_FOLIO(page_folio(pfn_to_page(pte_pfn(pte))) != folio, folio);
|
||||||
|
|
||||||
|
/* Limit max_nr to the actual remaining PFNs in the folio we could batch. */
|
||||||
|
max_nr = min_t(unsigned long, max_nr,
|
||||||
|
folio_pfn(folio) + folio_nr_pages(folio) - pte_pfn(pte));
|
||||||
|
|
||||||
nr = pte_batch_hint(start_ptep, pte);
|
nr = pte_batch_hint(start_ptep, pte);
|
||||||
expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
|
expected_pte = __pte_batch_clear_ignored(pte_advance_pfn(pte, nr), flags);
|
||||||
ptep = start_ptep + nr;
|
ptep = start_ptep + nr;
|
||||||
|
|
||||||
while (ptep < end_ptep) {
|
while (nr < max_nr) {
|
||||||
pte = ptep_get(ptep);
|
pte = ptep_get(ptep);
|
||||||
if (any_writable)
|
if (any_writable)
|
||||||
writable = !!pte_write(pte);
|
writable = !!pte_write(pte);
|
||||||
|
@ -282,14 +284,6 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
|
||||||
if (!pte_same(pte, expected_pte))
|
if (!pte_same(pte, expected_pte))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/*
|
|
||||||
* Stop immediately once we reached the end of the folio. In
|
|
||||||
* corner cases the next PFN might fall into a different
|
|
||||||
* folio.
|
|
||||||
*/
|
|
||||||
if (pte_pfn(pte) >= folio_end_pfn)
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (any_writable)
|
if (any_writable)
|
||||||
*any_writable |= writable;
|
*any_writable |= writable;
|
||||||
if (any_young)
|
if (any_young)
|
||||||
|
@ -297,12 +291,13 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,
|
||||||
if (any_dirty)
|
if (any_dirty)
|
||||||
*any_dirty |= dirty;
|
*any_dirty |= dirty;
|
||||||
|
|
||||||
nr = pte_batch_hint(ptep, pte);
|
cur_nr = pte_batch_hint(ptep, pte);
|
||||||
expected_pte = pte_advance_pfn(expected_pte, nr);
|
expected_pte = pte_advance_pfn(expected_pte, cur_nr);
|
||||||
ptep += nr;
|
ptep += cur_nr;
|
||||||
|
nr += cur_nr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return min(ptep - start_ptep, max_nr);
|
return min(nr, max_nr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1272,13 +1272,22 @@ int folio_alloc_swap(struct folio *folio, gfp_t gfp)
|
||||||
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
|
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
|
||||||
VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
|
VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
|
||||||
|
|
||||||
/*
|
if (order) {
|
||||||
* Should not even be attempting large allocations when huge
|
/*
|
||||||
* page swap is disabled. Warn and fail the allocation.
|
* Reject large allocation when THP_SWAP is disabled,
|
||||||
*/
|
* the caller should split the folio and try again.
|
||||||
if (order && (!IS_ENABLED(CONFIG_THP_SWAP) || size > SWAPFILE_CLUSTER)) {
|
*/
|
||||||
VM_WARN_ON_ONCE(1);
|
if (!IS_ENABLED(CONFIG_THP_SWAP))
|
||||||
return -EINVAL;
|
return -EAGAIN;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocation size should never exceed cluster size
|
||||||
|
* (HPAGE_PMD_SIZE).
|
||||||
|
*/
|
||||||
|
if (size > SWAPFILE_CLUSTER) {
|
||||||
|
VM_WARN_ON_ONCE(1);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
local_lock(&percpu_swap_cluster.lock);
|
local_lock(&percpu_swap_cluster.lock);
|
||||||
|
|
31
mm/vmalloc.c
31
mm/vmalloc.c
|
@ -1940,7 +1940,7 @@ static inline void setup_vmalloc_vm(struct vm_struct *vm,
|
||||||
{
|
{
|
||||||
vm->flags = flags;
|
vm->flags = flags;
|
||||||
vm->addr = (void *)va->va_start;
|
vm->addr = (void *)va->va_start;
|
||||||
vm->size = va_size(va);
|
vm->size = vm->requested_size = va_size(va);
|
||||||
vm->caller = caller;
|
vm->caller = caller;
|
||||||
va->vm = vm;
|
va->vm = vm;
|
||||||
}
|
}
|
||||||
|
@ -3133,6 +3133,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size,
|
||||||
|
|
||||||
area->flags = flags;
|
area->flags = flags;
|
||||||
area->caller = caller;
|
area->caller = caller;
|
||||||
|
area->requested_size = requested_size;
|
||||||
|
|
||||||
va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area);
|
va = alloc_vmap_area(size, align, start, end, node, gfp_mask, 0, area);
|
||||||
if (IS_ERR(va)) {
|
if (IS_ERR(va)) {
|
||||||
|
@ -4063,6 +4064,8 @@ EXPORT_SYMBOL(vzalloc_node_noprof);
|
||||||
*/
|
*/
|
||||||
void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
|
void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
|
||||||
{
|
{
|
||||||
|
struct vm_struct *vm = NULL;
|
||||||
|
size_t alloced_size = 0;
|
||||||
size_t old_size = 0;
|
size_t old_size = 0;
|
||||||
void *n;
|
void *n;
|
||||||
|
|
||||||
|
@ -4072,15 +4075,17 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p) {
|
if (p) {
|
||||||
struct vm_struct *vm;
|
|
||||||
|
|
||||||
vm = find_vm_area(p);
|
vm = find_vm_area(p);
|
||||||
if (unlikely(!vm)) {
|
if (unlikely(!vm)) {
|
||||||
WARN(1, "Trying to vrealloc() nonexistent vm area (%p)\n", p);
|
WARN(1, "Trying to vrealloc() nonexistent vm area (%p)\n", p);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
old_size = get_vm_area_size(vm);
|
alloced_size = get_vm_area_size(vm);
|
||||||
|
old_size = vm->requested_size;
|
||||||
|
if (WARN(alloced_size < old_size,
|
||||||
|
"vrealloc() has mismatched area vs requested sizes (%p)\n", p))
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4088,14 +4093,26 @@ void *vrealloc_noprof(const void *p, size_t size, gfp_t flags)
|
||||||
* would be a good heuristic for when to shrink the vm_area?
|
* would be a good heuristic for when to shrink the vm_area?
|
||||||
*/
|
*/
|
||||||
if (size <= old_size) {
|
if (size <= old_size) {
|
||||||
/* Zero out spare memory. */
|
/* Zero out "freed" memory. */
|
||||||
if (want_init_on_alloc(flags))
|
if (want_init_on_free())
|
||||||
memset((void *)p + size, 0, old_size - size);
|
memset((void *)p + size, 0, old_size - size);
|
||||||
|
vm->requested_size = size;
|
||||||
kasan_poison_vmalloc(p + size, old_size - size);
|
kasan_poison_vmalloc(p + size, old_size - size);
|
||||||
kasan_unpoison_vmalloc(p, size, KASAN_VMALLOC_PROT_NORMAL);
|
|
||||||
return (void *)p;
|
return (void *)p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We already have the bytes available in the allocation; use them.
|
||||||
|
*/
|
||||||
|
if (size <= alloced_size) {
|
||||||
|
kasan_unpoison_vmalloc(p + old_size, size - old_size,
|
||||||
|
KASAN_VMALLOC_PROT_NORMAL);
|
||||||
|
/* Zero out "alloced" memory. */
|
||||||
|
if (want_init_on_alloc(flags))
|
||||||
|
memset((void *)p + old_size, 0, size - old_size);
|
||||||
|
vm->requested_size = size;
|
||||||
|
}
|
||||||
|
|
||||||
/* TODO: Grow the vm_area, i.e. allocate and map additional pages. */
|
/* TODO: Grow the vm_area, i.e. allocate and map additional pages. */
|
||||||
n = __vmalloc_noprof(size, flags);
|
n = __vmalloc_noprof(size, flags);
|
||||||
if (!n)
|
if (!n)
|
||||||
|
|
|
@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
|
||||||
int compaction_index = 0;
|
int compaction_index = 0;
|
||||||
char nr_hugepages[20] = {0};
|
char nr_hugepages[20] = {0};
|
||||||
char init_nr_hugepages[24] = {0};
|
char init_nr_hugepages[24] = {0};
|
||||||
|
char target_nr_hugepages[24] = {0};
|
||||||
|
int slen;
|
||||||
|
|
||||||
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
|
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
|
||||||
"%lu", initial_nr_hugepages);
|
"%lu", initial_nr_hugepages);
|
||||||
|
@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Request a large number of huge pages. The Kernel will allocate
|
/*
|
||||||
as much as it can */
|
* Request huge pages for about half of the free memory. The Kernel
|
||||||
if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
|
* will allocate as much as it can, and we expect it will get at least 1/3
|
||||||
ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
|
*/
|
||||||
strerror(errno));
|
nr_hugepages_ul = mem_free / hugepage_size / 2;
|
||||||
|
snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
|
||||||
|
"%lu", nr_hugepages_ul);
|
||||||
|
|
||||||
|
slen = strlen(target_nr_hugepages);
|
||||||
|
if (write(fd, target_nr_hugepages, slen) != slen) {
|
||||||
|
ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
|
||||||
|
nr_hugepages_ul, strerror(errno));
|
||||||
goto close_fd;
|
goto close_fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -271,12 +271,16 @@ FIXTURE_SETUP(guard_regions)
|
||||||
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
|
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
|
||||||
setup_sighandler();
|
setup_sighandler();
|
||||||
|
|
||||||
if (variant->backing == ANON_BACKED)
|
switch (variant->backing) {
|
||||||
|
case ANON_BACKED:
|
||||||
return;
|
return;
|
||||||
|
case LOCAL_FILE_BACKED:
|
||||||
self->fd = open_file(
|
self->fd = open_file("", self->path);
|
||||||
variant->backing == SHMEM_BACKED ? "/tmp/" : "",
|
break;
|
||||||
self->path);
|
case SHMEM_BACKED:
|
||||||
|
self->fd = memfd_create(self->path, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* We truncate file to at least 100 pages, tests can modify as needed. */
|
/* We truncate file to at least 100 pages, tests can modify as needed. */
|
||||||
ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0);
|
ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0);
|
||||||
|
@ -1696,7 +1700,7 @@ TEST_F(guard_regions, readonly_file)
|
||||||
char *ptr;
|
char *ptr;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (variant->backing == ANON_BACKED)
|
if (variant->backing != LOCAL_FILE_BACKED)
|
||||||
SKIP(return, "Read-only test specific to file-backed");
|
SKIP(return, "Read-only test specific to file-backed");
|
||||||
|
|
||||||
/* Map shared so we can populate with pattern, populate it, unmap. */
|
/* Map shared so we can populate with pattern, populate it, unmap. */
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
#ifndef _PKEYS_POWERPC_H
|
#ifndef _PKEYS_POWERPC_H
|
||||||
#define _PKEYS_POWERPC_H
|
#define _PKEYS_POWERPC_H
|
||||||
|
|
||||||
|
#include <sys/stat.h>
|
||||||
|
|
||||||
#ifndef SYS_pkey_alloc
|
#ifndef SYS_pkey_alloc
|
||||||
# define SYS_pkey_alloc 384
|
# define SYS_pkey_alloc 384
|
||||||
# define SYS_pkey_free 385
|
# define SYS_pkey_free 385
|
||||||
|
@ -102,8 +104,18 @@ static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define REPEAT_8(s) s s s s s s s s
|
||||||
|
#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \
|
||||||
|
REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s)
|
||||||
|
#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \
|
||||||
|
REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s)
|
||||||
|
#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \
|
||||||
|
REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s)
|
||||||
|
#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \
|
||||||
|
REPEAT_4096(s) REPEAT_4096(s)
|
||||||
|
|
||||||
/* 4-byte instructions * 16384 = 64K page */
|
/* 4-byte instructions * 16384 = 64K page */
|
||||||
#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
|
#define __page_o_noops() asm(REPEAT_16384("nop\n"))
|
||||||
|
|
||||||
static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
|
static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-only
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
#define __SANE_USERSPACE_TYPES__
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue