Skip to content

Commit 7814442

Browse files
mingnusvijay-suman
authored andcommitted
dm cache: prevent BUG_ON by blocking retries on failed device resumes
[ Upstream commit 5da692e ] A cache device failing to resume due to mapping errors should not be retried, as the failure leaves a partially initialized policy object. Repeating the resume operation risks triggering BUG_ON when reloading cache mappings into the incomplete policy object. Reproduce steps: 1. create a cache metadata consisting of 512 or more cache blocks, with some mappings stored in the first array block of the mapping array. Here we use cache_restore v1.0 to build the metadata. cat <<EOF >> cmeta.xml <superblock uuid="" block_size="128" nr_cache_blocks="512" \ policy="smq" hint_width="4"> <mappings> <mapping cache_block="0" origin_block="0" dirty="false"/> </mappings> </superblock> EOF dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" cache_restore -i cmeta.xml -o /dev/mapper/cmeta --metadata-version=2 dmsetup remove cmeta 2. wipe the second array block of the mapping array to simulate data degradations. mapping_root=$(dd if=/dev/sdc bs=1c count=8 skip=192 \ 2>/dev/null | hexdump -e '1/8 "%u\n"') ablock=$(dd if=/dev/sdc bs=1c count=8 skip=$((4096*mapping_root+2056)) \ 2>/dev/null | hexdump -e '1/8 "%u\n"') dd if=/dev/zero of=/dev/sdc bs=4k count=1 seek=$ablock 3. try bringing up the cache device. The resume is expected to fail due to the broken array block. dmsetup create cmeta --table "0 8192 linear /dev/sdc 0" dmsetup create cdata --table "0 65536 linear /dev/sdc 8192" dmsetup create corig --table "0 524288 linear /dev/sdc 262144" dmsetup create cache --notable dmsetup load cache --table "0 524288 cache /dev/mapper/cmeta \ /dev/mapper/cdata /dev/mapper/corig 128 2 metadata2 writethrough smq 0" dmsetup resume cache 4. try resuming the cache again. An unexpected BUG_ON is triggered while loading cache mappings. dmsetup resume cache Kernel logs: (snip) ------------[ cut here ]------------ kernel BUG at drivers/md/dm-cache-policy-smq.c:752! Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI CPU: 0 UID: 0 PID: 332 Comm: dmsetup Not tainted 6.13.4 #3 RIP: 0010:smq_load_mapping+0x3e5/0x570 Fix by disallowing resume operations for devices that failed the initial attempt. Signed-off-by: Ming-Hung Tsai <[email protected]> Signed-off-by: Mikulas Patocka <[email protected]> Signed-off-by: Sasha Levin <[email protected]> (cherry picked from commit 025c8f477625eb39006ded650e7d027bcfb20e79) Signed-off-by: Vijayendra Suman <[email protected]>
1 parent e953d41 commit 7814442

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

drivers/md/dm-cache-target.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2883,6 +2883,27 @@ static dm_cblock_t get_cache_dev_size(struct cache *cache)
28832883
return to_cblock(size);
28842884
}
28852885

2886+
static bool can_resume(struct cache *cache)
2887+
{
2888+
/*
2889+
* Disallow retrying the resume operation for devices that failed the
2890+
* first resume attempt, as the failure leaves the policy object partially
2891+
* initialized. Retrying could trigger BUG_ON when loading cache mappings
2892+
* into the incomplete policy object.
2893+
*/
2894+
if (cache->sized && !cache->loaded_mappings) {
2895+
if (get_cache_mode(cache) != CM_WRITE)
2896+
DMERR("%s: unable to resume a failed-loaded cache, please check metadata.",
2897+
cache_device_name(cache));
2898+
else
2899+
DMERR("%s: unable to resume cache due to missing proper cache table reload",
2900+
cache_device_name(cache));
2901+
return false;
2902+
}
2903+
2904+
return true;
2905+
}
2906+
28862907
static bool can_resize(struct cache *cache, dm_cblock_t new_size)
28872908
{
28882909
if (from_cblock(new_size) > from_cblock(cache->cache_size)) {
@@ -2931,6 +2952,9 @@ static int cache_preresume(struct dm_target *ti)
29312952
struct cache *cache = ti->private;
29322953
dm_cblock_t csize = get_cache_dev_size(cache);
29332954

2955+
if (!can_resume(cache))
2956+
return -EINVAL;
2957+
29342958
/*
29352959
* Check to see if the cache has resized.
29362960
*/

0 commit comments

Comments
 (0)