* [mdadm PATCH] bcache: add bcache superblock
@ 2012-05-11 20:39 Dan Williams
[not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Dan Williams @ 2012-05-11 20:39 UTC (permalink / raw)
To: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA
Cc: linux-raid-u79uwXL29TY76Z2rM5mHXA, linux-bcache-u79uwXL29TY76Z2rM5mHXA
This is a hybrid proposal for supporting bcache as a md device.
Somewhat similar to the v1.x metadata format, where array assembly is
handled in userspace, but managed in the kernel. In the bcache case it
is an "external" metadata format, but then the expectation is that the
kernel "bcache" personality takes over runtime maintenance of the
metadata.
The container id for bcache is the "cache_set". The subvolume is the
backing device identifier.
This initial version only supports the runtime static portion of the
superblock, it will need to grow the ability to read the journal to
report the backing devices associated with a given cache set (i.e. in
the superblock backing devices know their cache_set container, but cache
devices need to look elsewhere to find their backing devices).
Cc: Kent Overstreet <koverstreet-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
Assemble.c | 1
Makefile | 11 +
bcache.h | 98 +++++++++
crc64.c | 129 +++++++++++
maps.c | 2
mdadm.h | 2
super-bcache.c | 634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
util.c | 2
8 files changed, 873 insertions(+), 6 deletions(-)
create mode 100644 bcache.h
create mode 100644 crc64.c
create mode 100644 super-bcache.c
diff --git a/Assemble.c b/Assemble.c
index fd94461..267a2ce 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1594,6 +1594,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
} else switch(content->array.level) {
case LEVEL_LINEAR:
case LEVEL_MULTIPATH:
+ case LEVEL_BCACHE:
case 0:
err = sysfs_set_str(content, NULL, "array_state",
"active");
diff --git a/Makefile b/Makefile
index b8d363f..7886d13 100644
--- a/Makefile
+++ b/Makefile
@@ -103,8 +103,8 @@ OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o util.o maps.o lib.o \
Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
Incremental.o \
mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
- super-mbr.o super-gpt.o \
- restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \
+ super-mbr.o super-gpt.o super-bcache.o \
+ restripe.o sysfs.o sha1.o mapfile.o crc32.o crc64.o sg_io.o msg.o \
platform-intel.o probe_roms.o
CHECK_OBJS = restripe.o sysfs.o maps.o lib.o
@@ -116,8 +116,8 @@ INCL = mdadm.h part.h bitmap.h
MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
config.o policy.o lib.o \
Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
- super-mbr.o super-gpt.o \
- super-ddf.o sha1.o crc32.o msg.o bitmap.o \
+ super-mbr.o super-gpt.o super-bcache.o \
+ super-ddf.o sha1.o crc32.o crc64.o msg.o bitmap.o \
platform-intel.o probe_roms.o
MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
@@ -128,7 +128,8 @@ STATICOBJS = pwgr.o
ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
maps.c lib.c \
super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
- platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c
+ platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c \
+ super-bcache.c crc64.c
ASSEMBLE_AUTO_SRCS := mdopen.c
ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
ifdef MDASSEMBLE_AUTO
diff --git a/bcache.h b/bcache.h
new file mode 100644
index 0000000..765e369
--- /dev/null
+++ b/bcache.h
@@ -0,0 +1,98 @@
+#ifndef _BCACHE_H
+#define _BCACHE_H
+
+#include <stdint.h>
+
+#define BITMASK(name, type, field, offset, size) \
+static inline uint64_t name(const type *k) \
+{ \
+ uint64_t field = __le64_to_cpu(k->field); \
+ return (field >> offset) & ~(((uint64_t) ~0) << size); \
+} \
+ \
+static inline void SET_##name(type *k, uint64_t v) \
+{ \
+ uint64_t field = __le64_to_cpu(k->field); \
+ field &= ~(~((uint64_t) ~0 << size) << offset); \
+ field |= v << offset; \
+ k->field = __cpu_to_le64(field); \
+}
+
+static const char bcache_magic[] = {
+ 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
+ 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 };
+
+/* Version 1: Backing dev
+ * Version 2: Seed pointer into btree node checksum
+ * Version 3: Backing dev superblock has offset of start of data
+ */
+
+#define BCACHE_SB_BDEV_VERSION 3
+#define BCACHE_SB_MAX_VERSION 3
+
+#define SB_SECTOR 8
+#define SB_SIZE 16 /* default data_offset in bcache-tools (?) */
+#define SB_LABEL_SIZE 32
+
+struct cache_sb {
+ uint64_t csum;
+ uint64_t offset; /* sector where this sb was written */
+ uint64_t version;
+#define CACHE_BACKING_DEV 1
+
+ uint8_t magic[16];
+
+ uint8_t uuid[16];
+ union {
+ uint8_t set_uuid[16];
+ uint64_t set_magic;
+ };
+ uint8_t label[SB_LABEL_SIZE];
+
+ uint64_t flags;
+ uint64_t seq;
+ uint64_t pad[8];
+
+ uint64_t nbuckets; /* device size */
+ uint16_t block_size; /* sectors */
+ uint16_t bucket_size; /* sectors */
+
+ uint16_t nr_in_set;
+ uint16_t nr_this_dev;
+
+ uint32_t last_mount; /* time_t */
+
+ uint16_t first_bucket;
+ uint16_t keys; /* number of journal buckets */
+ uint64_t d[]; /* journal buckets */
+};
+
+static inline int SB_BDEV(struct cache_sb *c)
+{
+ return __le64_to_cpu(c->version) == CACHE_BACKING_DEV;
+}
+
+BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
+BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
+BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
+
+BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
+#define CACHE_MODE_WRITETHROUGH 0U
+#define CACHE_MODE_WRITEBACK 1U
+#define CACHE_MODE_WRITEAROUND 2U
+#define CACHE_MODE_NONE 3U
+BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
+#define BDEV_STATE_NONE 0U
+#define BDEV_STATE_CLEAN 1U
+#define BDEV_STATE_DIRTY 2U
+#define BDEV_STATE_STALE 3U
+
+inline uint64_t crc64(const void *_data, size_t len);
+
+#define node(i, j) ((void *) ((i)->d + (j)))
+#define end(i) node(i, (i)->keys)
+
+#define csum_set(i) \
+ crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8))
+
+#endif
diff --git a/crc64.c b/crc64.c
new file mode 100644
index 0000000..8f37445
--- /dev/null
+++ b/crc64.c
@@ -0,0 +1,129 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+
+/*
+ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
+ * use permitted, subject to terms of PostgreSQL license; see.)
+
+ * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
+ * usual sort of implementation. (See Ross Williams' excellent introduction
+ * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
+ * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
+ * If we have no working 64-bit type, then fake it with two 32-bit registers.
+ *
+ * The present implementation is a normal (not "reflected", in Williams'
+ * terms) 64-bit CRC, using initial all-ones register contents and a final
+ * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
+ * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+*/
+
+static const uint64_t crc_table[256] = {
+ 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
+ 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
+ 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
+ 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
+ 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
+ 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
+ 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
+ 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
+ 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
+ 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
+ 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
+ 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
+ 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
+ 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
+ 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
+ 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
+ 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
+ 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
+ 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
+ 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
+ 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
+ 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
+ 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
+ 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
+ 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
+ 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
+ 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
+ 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
+ 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
+ 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
+ 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
+ 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
+ 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
+ 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
+ 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
+ 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
+ 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
+ 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
+ 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
+ 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
+ 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
+ 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
+ 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
+ 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
+ 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
+ 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
+ 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
+ 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
+ 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
+ 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
+ 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
+ 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
+ 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
+ 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
+ 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
+ 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
+ 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
+ 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
+ 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
+ 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
+ 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
+ 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
+ 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
+ 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
+ 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
+ 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
+ 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
+ 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
+ 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
+ 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
+ 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
+ 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
+ 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
+ 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
+ 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
+ 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
+ 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
+ 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
+ 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
+ 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
+ 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
+ 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
+ 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
+ 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
+ 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
+ 0x9AFCE626CE85B507ULL
+};
+
+inline uint64_t crc64(const void *_data, size_t len)
+{
+ uint64_t crc = 0xFFFFFFFFFFFFFFFFULL;
+ const unsigned char *data = _data;
+
+ while (len--) {
+ int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
+ crc = crc_table[i] ^ (crc << 8);
+ }
+
+ return crc ^ 0xFFFFFFFFFFFFFFFFULL;
+}
diff --git a/maps.c b/maps.c
index f2ba9a7..cedf548 100644
--- a/maps.c
+++ b/maps.c
@@ -94,6 +94,8 @@ mapping_t pers[] = {
{ "10", 10},
{ "faulty", LEVEL_FAULTY},
{ "container", LEVEL_CONTAINER},
+ { "bcache", LEVEL_BCACHE},
+ { "11", LEVEL_BCACHE},
{ NULL, 0}
};
diff --git a/mdadm.h b/mdadm.h
index 3bcd052..a0ccff6 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -816,6 +816,7 @@ extern struct superswitch {
extern struct superswitch super0, super1;
extern struct superswitch super_imsm, super_ddf;
extern struct superswitch mbr, gpt;
+extern struct superswitch super_bcache;
struct metadata_update {
int len;
@@ -1296,6 +1297,7 @@ static inline int xasprintf(char **strp, const char *fmt, ...) {
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)
+#define LEVEL_BCACHE (0xb)
/* kernel module doesn't know about these */
#define LEVEL_CONTAINER (-100)
diff --git a/super-bcache.c b/super-bcache.c
new file mode 100644
index 0000000..ec8f3db
--- /dev/null
+++ b/super-bcache.c
@@ -0,0 +1,634 @@
+/*
+ * mdadm - bcache support
+ *
+ * Copyright (C) 2012 Intel Corporation
+ *
+ * bcache definitions copied from bcache-tools:
+ * git://evilpiepirate.org/~kent/bcache-tools.git
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define HAVE_STDINT_H 1
+#include "mdadm.h"
+#include "bcache.h"
+
+struct bcache_super {
+ union {
+ struct cache_sb *sb;
+ void *buf;
+ };
+ struct dl {
+ int major, minor;
+ char *devname;
+ int fd;
+ } *disk;
+ int vol;
+ struct bcache_super *next;
+};
+
+enum {
+ /* FIXME this is a function of the bucket size */
+ BCACHE_MAX_DEVICES = 2,
+};
+
+static int load_cache_sb(struct bcache_super *super, int keep_fd)
+{
+ struct dl *d = super->disk;
+ int rc, fd = d->fd;
+ struct cache_sb *c;
+ struct stat s;
+
+ if (!keep_fd)
+ d->fd = -1;
+
+ rc = fstat(fd, &s);
+ if (rc)
+ return rc;
+ d->major = major(s.st_rdev);
+ d->minor = minor(s.st_rdev);
+
+ rc = posix_memalign(&super->buf, 4096, 4096);
+ if (rc)
+ return rc;
+ c = super->sb;
+
+ if (pread(fd, c, 4096, SB_SECTOR << 9) != 4096)
+ return errno;
+
+ if (csum_set(c) != __le64_to_cpu(c->csum))
+ return ENODEV;
+
+ if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0)
+ return ENODEV;
+
+ return 0;
+}
+
+static void __free_bcache(struct bcache_super *super)
+{
+ if (!super)
+ return;
+
+ while (super) {
+ struct bcache_super *next = super->next;
+ struct dl *d = super->disk;
+
+ d = super->disk;
+ if (d->fd >= 0)
+ close(d->fd);
+ free(d->devname);
+ free(d);
+ free(super->sb);
+ free(super);
+ super = next;
+ }
+}
+
+static void free_bcache(struct supertype *st)
+{
+ struct bcache_super *super = st->sb;
+
+ __free_bcache(super);
+ st->sb = NULL;
+}
+
+#ifndef MDASSEMBLE
+static void examine_bcache(struct supertype *st, char *homehost)
+{
+ const char *const cache_policies[] = { "lru", "fifo", "random", "" };
+ const char *const bdev_states[] = { "none", "clean", "dirty", "stale" };
+ const char *const bdev_modes[16] = { "writethrough", "writeback", "writearound", "none" };
+ struct bcache_super *super = st->sb;
+ uint16_t first_bucket, bucket_size;
+ struct cache_sb *c = super->sb;
+ uint64_t nbuckets, csum;
+ unsigned long long sz;
+ char nbuf[64];
+
+ printf(" Magic : %s\n",
+ memcmp(bcache_magic, c->magic, 16) ? "<unknown>" : "<bcache>");
+ printf(" Version : %d\n", (int) c->version);
+ printf(" Role : %s\n", SB_BDEV(c) ? "backing-device" : "cache");
+ __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+ printf(" Set UUID : %s\n", nbuf + 5);
+ __fname_from_uuid((int *) c->uuid, 0, nbuf, ':');
+ printf(" Cache Devs : %u\n", c->nr_in_set);
+ /* FIXME: list all cache dev uuids in the load_container case */
+ printf(" Device UUID : %s\n", nbuf + 5);
+ printf(" Flags :%s%s\n", CACHE_DISCARD(c) ? " discard" : "",
+ CACHE_SYNC(c) ? " sync" : "");
+ if (SB_BDEV(c)) {
+ printf(" State : %s\n", bdev_states[BDEV_STATE(c)]);
+ printf(" Mode : %s\n", bdev_modes[BDEV_CACHE_MODE(c)]);
+ } else {
+ printf(" Policy : %s\n", cache_policies[CACHE_REPLACEMENT(c)]);
+ /* FIXME: add reporting of backing device uuids in the cache caase */
+ }
+ printf(" Label : %.32s\n", c->label);
+ csum = __le64_to_cpu(c->csum);
+ nbuckets = __le64_to_cpu(c->nbuckets);
+ bucket_size = __le16_to_cpu(c->bucket_size);
+ first_bucket = __le16_to_cpu(c->first_bucket);
+ sz = (nbuckets - first_bucket) * bucket_size;
+ printf(" Device Size : %llu%s\n", sz, human_size(sz * 512));
+ printf(" Bucket Size : %u\n", bucket_size);
+ printf(" Num Buckets : %llu\n", (unsigned long long) nbuckets);
+ printf(" this dev : %u\n", __le16_to_cpu(c->nr_this_dev));
+ printf("First Bucket : %u\n", first_bucket);
+ printf(" Checksum : %llx %s\n", (unsigned long long) csum,
+ csum == csum_set(c) ? "correct" : "incorrect");
+}
+
+static void brief_examine_bcache(struct supertype *st, int verbose)
+{
+ struct bcache_super *super = st->sb;
+ struct cache_sb *c = super->sb;
+ char nbuf[64];
+
+ __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+ printf("ARRAY metadata=bcache UUID=%s\n", nbuf + 5);
+}
+
+static void brief_examine_subarrays_bcache(struct supertype *st, int verbose)
+{
+ struct bcache_super *super = st->sb;
+ struct cache_sb *c = super->sb;
+ char nbuf[64], nbuf1[64];
+
+ /* FIXME this needs to parse the cache device journal to find
+ * and report the backing dev uuid list
+ */
+ if (!SB_BDEV(c))
+ return;
+
+ __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+ __fname_from_uuid((int *) c->uuid, 0, nbuf1, ':');
+
+ printf("ARRAY container=%s UUID=%s\n", nbuf + 5, nbuf1 + 5);
+}
+
+static void export_examine_bcache(struct supertype *st)
+{
+ struct bcache_super *super = st->sb;
+ struct cache_sb *c = super->sb;
+ char nbuf[64];
+
+ __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+ printf("MD_METADATA=bcache\n");
+ printf("MD_LEVEL=container\n");
+ printf("MD_UUID=%s\n", nbuf+5);
+ printf("MD_DEVICES=%d\n", __le16_to_cpu(c->nr_in_set) + 1);
+}
+
+static void detail_bcache(struct supertype *st, char *homehost)
+{
+ struct bcache_super *super = st->sb;
+ struct cache_sb *c = super->sb;
+ char nbuf[64];
+
+ __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+ printf("\n UUID : %s\n", nbuf + 5);
+}
+
+static void brief_detail_bcache(struct supertype *st)
+{
+ struct bcache_super *super = st->sb;
+ struct cache_sb *c = super->sb;
+ char nbuf[64];
+
+ __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+ printf(" UUID=%s", nbuf + 5);
+}
+
+static struct bcache_super *alloc_super(const char *func)
+{
+ struct bcache_super *super = calloc(1, sizeof(*super));
+ struct dl *d = calloc(1, sizeof(*d));
+
+ if (!super || !d) {
+ fprintf(stderr, Name "%s: %s failed\n", func, __func__);
+ free(super);
+ free(d);
+ return NULL;
+ }
+
+ super->vol = -1;
+ super->disk = d;
+
+ return super;
+}
+
+static int load_container_bcache(struct supertype *st, int fd, char *devname)
+{
+ struct bcache_super *list = NULL;
+ int rc, i, cdev = 0, bdev = 0;
+ int devnum = fd2devnum(fd);
+ struct mdinfo *sra, *sd;
+
+ sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
+ if (!sra)
+ return 1;
+
+ if (sra->array.major_version != -1 ||
+ sra->array.minor_version != -2 ||
+ strcmp(sra->text_version, "bcache") != 0) {
+ rc = 1;
+ goto error;
+ }
+
+ for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
+ struct bcache_super *super = alloc_super(__func__);
+ struct cache_sb *c;
+ char nm[32];
+ int fd;
+
+ rc = 1;
+ if (!super)
+ goto error;
+ super->next = list;
+ list = super;
+
+ rc = 2;
+ sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+ fd = dev_open(nm, O_RDWR);
+ if (fd < 0)
+ goto error;
+
+ super->disk->fd = fd;
+ rc = load_cache_sb(super, 1);
+ if (rc)
+ goto error;
+ c = super->sb;
+ if (SB_BDEV(c))
+ bdev++;
+ else
+ cdev++;
+ }
+ rc = 0;
+
+ /* FIXME disambiguate multiple bdevs per set, support multiple
+ * cache devices
+ */
+ if (bdev > 1) {
+ fprintf(stderr, Name ": %d backing devices detected\n", bdev);
+ rc = 3;
+ }
+ if (cdev > 1) {
+ fprintf(stderr, Name ": %d cache devices detected\n", cdev);
+ rc = 3;
+ }
+ if (rc)
+ goto error;
+ st->sb = list;
+ list = NULL;
+
+error:
+ if (list)
+ __free_bcache(list);
+ sysfs_free(sra);
+
+ st->container_dev = devnum;
+ if (rc == 0 && st->ss == NULL) {
+ st->ss = &super_bcache;
+ st->minor_version = 0;
+ st->max_devs = BCACHE_MAX_DEVICES;
+ }
+ return rc;
+}
+#endif
+
+static int load_bcache(struct supertype *st, int fd, char *devname)
+{
+ struct bcache_super *super;
+ struct dl *d;
+ int rc;
+
+ free_bcache(st);
+
+ super = alloc_super(__func__);
+ if (!super)
+ return 1;
+
+ st->sb = super;
+ d = super->disk;
+ d->devname = devname ? strdup(devname) : NULL;
+ d->fd = fd;
+ rc = load_cache_sb(super, 0);
+ if (rc) {
+ free_bcache(st);
+ if (!devname)
+ return rc;
+ fprintf(stderr, Name ": %s failed on %s (%s)\n", __func__,
+ devname, strerror(rc));
+ return rc;
+ }
+
+ if (st->ss == NULL) {
+ st->ss = &super_bcache;
+ st->minor_version = 0;
+ st->max_devs = BCACHE_MAX_DEVICES;
+ }
+
+ return 0;
+}
+
+static int store_bcache(struct supertype *st, int fd)
+{
+ struct bcache_super *super = st->sb;
+ struct cache_sb *c = super->sb;
+
+ if (!c)
+ return 1;
+
+ if (pwrite(fd, c, sizeof(*c), SB_SECTOR << 9) != sizeof(*c))
+ return 1;
+
+ return 0;
+}
+
+static int compare_bcache(struct supertype *st, struct supertype *tst)
+{
+ struct bcache_super *a = st->sb;
+ struct bcache_super *b = tst->sb;
+
+ if (!st->sb) {
+ st->sb = tst->sb;
+ tst->sb = NULL;
+ return 0;
+ }
+
+ if (memcmp(a->sb->set_uuid, b->sb->set_uuid, sizeof(b->sb->set_uuid)) != 0)
+ return 2;
+
+ return 0;
+}
+
+static __u64 avail_size_bcache(struct supertype *st, __u64 devsize)
+{
+ /* 4k from start, 8k min data offset */
+ const uint32_t reserved_sectors = (4+8) * 2;
+
+ if (devsize < reserved_sectors)
+ return 0;
+
+ return devsize - reserved_sectors;
+}
+
+static struct supertype *match_metadata_desc_bcache(char *arg)
+{
+ struct supertype *st;
+
+ if (strcmp(arg, "bcache") != 0 &&
+ strcmp(arg, "default") != 0)
+ return NULL;
+
+ st = calloc(1, sizeof(*st));
+ if (!st)
+ return NULL;
+ st->container_dev = NoMdDev;
+ st->ss = &super_bcache;
+ st->max_devs = BCACHE_MAX_DEVICES;
+ st->minor_version = 0;
+ st->sb = NULL;
+
+ return st;
+}
+
+static int match_home_bcache(struct supertype *st, char *homehost)
+{
+ /* the bcache superblock does not specify any host
+ * identification information. maybe it should...
+ */
+
+ return -1;
+}
+
+static void uuid_from_bcache(struct supertype *st, int uuid[4])
+{
+ struct bcache_super *super = st->sb;
+ struct cache_sb *c = super->sb;
+
+ memcpy(uuid, c->set_uuid, sizeof(c->set_uuid));
+}
+
+static void getinfo_bcache_volume(struct supertype *st, struct mdinfo *info, int map_disks, char *dmap)
+{
+ char *name = devnum2devname(st->container_dev);
+ struct bcache_super *super = st->sb;
+ uint16_t bucket_size, first_bucket;
+ struct cache_sb *c = super->sb;
+ unsigned long long sz;
+ uint64_t nbuckets;
+
+ nbuckets = __le64_to_cpu(c->nbuckets);
+ bucket_size = __le16_to_cpu(c->bucket_size);
+ first_bucket = __le16_to_cpu(c->first_bucket);
+ sz = (nbuckets - first_bucket) * bucket_size;
+
+ info->container_member = super->vol;
+ info->custom_array_size = sz;
+ info->component_size = sz;
+ info->recovery_start = MaxSector;
+ info->data_offset = SB_SECTOR + SB_SIZE;
+ sprintf(info->text_version, "/%s/%d", name, super->vol);
+ snprintf(info->name, sizeof(info->name), "%s", c->label);
+ memcpy(info->uuid, c->uuid, sizeof(c->uuid));
+
+ info->array.raid_disks = __le16_to_cpu(c->nr_in_set) + 1;
+ info->array.level = LEVEL_BCACHE;
+ info->array.layout = 0;
+ info->array.md_minor = -1;
+ info->array.ctime = 0;
+ info->array.utime = 0;
+ info->array.chunk_size = bucket_size * 512;
+ info->array.major_version = -1;
+ info->array.minor_version = -2;
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+ info->disk.raid_disk = SB_BDEV(c);
+ info->disk.number = SB_BDEV(c);
+ info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
+}
+
+static void getinfo_bcache(struct supertype *st, struct mdinfo *info, char *dmap)
+{
+ int i, cset, bdev, map_disks = info->array.raid_disks;
+ struct bcache_super *super = st->sb;
+ struct cache_sb *c = super->sb;
+
+ memset(info, 0, sizeof(*info));
+
+ if (super->vol >= 0)
+ return getinfo_bcache_volume(st, info, map_disks, dmap);
+
+ /* make Assemble choose the cache target */
+ info->events = SB_BDEV(c);
+ info->recovery_start = MaxSector;
+ info->data_offset = SB_SECTOR;
+ info->component_size = SB_SIZE;
+ strcpy(info->text_version, "bcache");
+ memcpy(info->uuid, c->set_uuid, sizeof(c->set_uuid));
+
+ info->array.raid_disks = __le16_to_cpu(c->nr_in_set) + 1;
+ info->array.level = LEVEL_CONTAINER;
+ info->array.layout = 0;
+ info->array.md_minor = -1;
+ info->array.ctime = 0;
+ info->array.utime = 0;
+ info->array.chunk_size = __le16_to_cpu(c->bucket_size) * 512;
+ info->array.major_version = -1;
+ info->array.minor_version = -2;
+
+ info->disk.major = 0;
+ info->disk.minor = 0;
+ info->disk.raid_disk = SB_BDEV(c);
+ info->disk.number = SB_BDEV(c);
+ /* FIXME: need bcache superblock to identify failed devices */
+ info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
+
+ /* FIXME need to parse the journal uuid_bucket to understand
+ * which cache devs are consistent with the set
+ */
+ for (i = 0; dmap && i < map_disks; i++)
+ dmap[i] = 1;
+
+ cset = 0;
+ bdev = 0;
+ while (super) {
+ c = super->sb;
+
+ /* FIXME filter out-of-sync devices */
+ if (SB_BDEV(c))
+ bdev++;
+ else
+ cset++;
+ super = super->next;
+ }
+
+ if (cset + bdev == __le16_to_cpu(c->nr_in_set) + 1)
+ info->container_enough = 1;
+ else
+ info->container_enough = -1;
+}
+
+static int update_bcache(struct supertype *st, struct mdinfo *i, char *update,
+ char *devname, int verbose, int uuid_set, char *homehost)
+{
+ /* FIXME */
+ if (strcmp(update, "grow") == 0) {
+ return 0;
+ } else if (strcmp(update, "resync") == 0) {
+ return 0;
+ } else if (strcmp(update, "homehost") == 0) {
+ return -1;
+ } else if (strcmp(update, "name") == 0) {
+ return -1;
+ } else if (strcmp(update, "_reshape_progress") == 0) {
+ return 0;
+ } else if (strcmp(update, "assemble") == 0 ) {
+ return 0;
+ } else {
+ return -1;
+ }
+}
+
+static struct mdinfo *container_content_bcache(struct supertype *st, char *subarray)
+{
+ struct bcache_super *super = st->sb;
+ struct mdinfo *info, *disk = NULL;
+ char *ep;
+
+ info = calloc(1, sizeof(*info));
+ if (!info) {
+ fprintf(stderr, Name ": failed to allocate %zu bytes\n",
+ sizeof(*info));
+ return NULL;
+ }
+
+ /* don't support multiple backing disks per cache set */
+ if (subarray && (strtoul(subarray, &ep, 10) > 0 || *ep != '\0'))
+ goto error;
+
+ super->vol = 0;
+ getinfo_bcache(st, info, NULL);
+
+ for (; super; super = super->next) {
+ struct dl *d = super->disk;
+ struct cache_sb *c = super->sb;
+
+ disk = calloc(1, sizeof(*disk));
+ if (!disk) {
+ fprintf(stderr, Name ": failed to allocate disk\n");
+ goto error;
+ }
+ disk->next = info->devs;
+ info->devs = disk;
+
+ disk->disk.number = SB_BDEV(c);
+ disk->disk.raid_disk = SB_BDEV(c);
+ disk->disk.major = d->major;
+ disk->disk.minor = d->minor;
+ disk->recovery_start = MaxSector;
+ disk->disk.state = 1 << MD_DISK_ACTIVE;
+ disk->data_offset = info->data_offset;
+ disk->component_size = info->component_size;
+
+ info->array.working_disks++;
+ }
+
+ return info;
+
+ error:
+ disk = info->devs;
+ while (disk) {
+ struct mdinfo *next = disk->next;
+
+ free(disk);
+ disk = next;
+ }
+
+ free(info);
+ return NULL;
+}
+
+
+struct superswitch super_bcache = {
+#ifndef MDASSEMBLE
+ .examine_super = examine_bcache,
+ .brief_examine_super = brief_examine_bcache,
+ .brief_examine_subarrays = brief_examine_subarrays_bcache,
+ .export_examine_super = export_examine_bcache,
+ .detail_super = detail_bcache,
+ .brief_detail_super = brief_detail_bcache,
+ .load_container = load_container_bcache,
+#endif
+ .match_home = match_home_bcache,
+ .uuid_from_super = uuid_from_bcache,
+ .getinfo_super = getinfo_bcache,
+ .update_super = update_bcache,
+
+ .avail_size = avail_size_bcache,
+
+ .compare_super = compare_bcache,
+
+ .load_super = load_bcache,
+ .store_super = store_bcache,
+ .free_super = free_bcache,
+ .match_metadata_desc = match_metadata_desc_bcache,
+ .container_content = container_content_bcache,
+
+ .external = 1,
+ .name = "bcache",
+};
diff --git a/util.c b/util.c
index 6985a70..d9e49cf 100644
--- a/util.c
+++ b/util.c
@@ -919,7 +919,7 @@ struct superswitch *superlist[] =
{
&super0, &super1,
&super_ddf, &super_imsm,
- &mbr, &gpt,
+ &mbr, &gpt, &super_bcache,
NULL };
#if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [mdadm PATCH] bcache: add bcache superblock
[not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>
@ 2012-05-12 7:38 ` Jack Wang
2012-05-15 0:04 ` Mark Hills
1 sibling, 0 replies; 6+ messages in thread
From: Jack Wang @ 2012-05-12 7:38 UTC (permalink / raw)
To: Dan Williams
Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
linux-raid-u79uwXL29TY76Z2rM5mHXA,
linux-bcache-u79uwXL29TY76Z2rM5mHXA
Hi Dan,
So this is the alternate interface for bcache tools using mdadm to
manage bcache?
If so, could you give a example of how to using this.
Best regards.
Jack
2012/5/12 Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>:
> This is a hybrid proposal for supporting bcache as a md device.
> Somewhat similar to the v1.x metadata format, where array assembly is
> handled in userspace, but managed in the kernel. In the bcache case it
> is an "external" metadata format, but then the expectation is that the
> kernel "bcache" personality takes over runtime maintenance of the
> metadata.
>
> The container id for bcache is the "cache_set". The subvolume is the
> backing device identifier.
>
> This initial version only supports the runtime static portion of the
> superblock, it will need to grow the ability to read the journal to
> report the backing devices associated with a given cache set (i.e. in
> the superblock backing devices know their cache_set container, but cache
> devices need to look elsewhere to find their backing devices).
>
> Cc: Kent Overstreet <koverstreet-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
> Assemble.c | 1
> Makefile | 11 +
> bcache.h | 98 +++++++++
> crc64.c | 129 +++++++++++
> maps.c | 2
> mdadm.h | 2
> super-bcache.c | 634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> util.c | 2
> 8 files changed, 873 insertions(+), 6 deletions(-)
> create mode 100644 bcache.h
> create mode 100644 crc64.c
> create mode 100644 super-bcache.c
>
> diff --git a/Assemble.c b/Assemble.c
> index fd94461..267a2ce 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -1594,6 +1594,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
> } else switch(content->array.level) {
> case LEVEL_LINEAR:
> case LEVEL_MULTIPATH:
> + case LEVEL_BCACHE:
> case 0:
> err = sysfs_set_str(content, NULL, "array_state",
> "active");
> diff --git a/Makefile b/Makefile
> index b8d363f..7886d13 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -103,8 +103,8 @@ OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o util.o maps.o lib.o \
> Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
> Incremental.o \
> mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
> - super-mbr.o super-gpt.o \
> - restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \
> + super-mbr.o super-gpt.o super-bcache.o \
> + restripe.o sysfs.o sha1.o mapfile.o crc32.o crc64.o sg_io.o msg.o \
> platform-intel.o probe_roms.o
>
> CHECK_OBJS = restripe.o sysfs.o maps.o lib.o
> @@ -116,8 +116,8 @@ INCL = mdadm.h part.h bitmap.h
> MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
> config.o policy.o lib.o \
> Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
> - super-mbr.o super-gpt.o \
> - super-ddf.o sha1.o crc32.o msg.o bitmap.o \
> + super-mbr.o super-gpt.o super-bcache.o \
> + super-ddf.o sha1.o crc32.o crc64.o msg.o bitmap.o \
> platform-intel.o probe_roms.o
>
> MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
> @@ -128,7 +128,8 @@ STATICOBJS = pwgr.o
> ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
> maps.c lib.c \
> super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
> - platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c
> + platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c \
> + super-bcache.c crc64.c
> ASSEMBLE_AUTO_SRCS := mdopen.c
> ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
> ifdef MDASSEMBLE_AUTO
> diff --git a/bcache.h b/bcache.h
> new file mode 100644
> index 0000000..765e369
> --- /dev/null
> +++ b/bcache.h
> @@ -0,0 +1,98 @@
> +#ifndef _BCACHE_H
> +#define _BCACHE_H
> +
> +#include <stdint.h>
> +
> +#define BITMASK(name, type, field, offset, size) \
> +static inline uint64_t name(const type *k) \
> +{ \
> + uint64_t field = __le64_to_cpu(k->field); \
> + return (field >> offset) & ~(((uint64_t) ~0) << size); \
> +} \
> + \
> +static inline void SET_##name(type *k, uint64_t v) \
> +{ \
> + uint64_t field = __le64_to_cpu(k->field); \
> + field &= ~(~((uint64_t) ~0 << size) << offset); \
> + field |= v << offset; \
> + k->field = __cpu_to_le64(field); \
> +}
> +
> +static const char bcache_magic[] = {
> + 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
> + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 };
> +
> +/* Version 1: Backing dev
> + * Version 2: Seed pointer into btree node checksum
> + * Version 3: Backing dev superblock has offset of start of data
> + */
> +
> +#define BCACHE_SB_BDEV_VERSION 3
> +#define BCACHE_SB_MAX_VERSION 3
> +
> +#define SB_SECTOR 8
> +#define SB_SIZE 16 /* default data_offset in bcache-tools (?) */
> +#define SB_LABEL_SIZE 32
> +
> +struct cache_sb {
> + uint64_t csum;
> + uint64_t offset; /* sector where this sb was written */
> + uint64_t version;
> +#define CACHE_BACKING_DEV 1
> +
> + uint8_t magic[16];
> +
> + uint8_t uuid[16];
> + union {
> + uint8_t set_uuid[16];
> + uint64_t set_magic;
> + };
> + uint8_t label[SB_LABEL_SIZE];
> +
> + uint64_t flags;
> + uint64_t seq;
> + uint64_t pad[8];
> +
> + uint64_t nbuckets; /* device size */
> + uint16_t block_size; /* sectors */
> + uint16_t bucket_size; /* sectors */
> +
> + uint16_t nr_in_set;
> + uint16_t nr_this_dev;
> +
> + uint32_t last_mount; /* time_t */
> +
> + uint16_t first_bucket;
> + uint16_t keys; /* number of journal buckets */
> + uint64_t d[]; /* journal buckets */
> +};
> +
> +static inline int SB_BDEV(struct cache_sb *c)
> +{
> + return __le64_to_cpu(c->version) == CACHE_BACKING_DEV;
> +}
> +
> +BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1);
> +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
> +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
> +
> +BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4);
> +#define CACHE_MODE_WRITETHROUGH 0U
> +#define CACHE_MODE_WRITEBACK 1U
> +#define CACHE_MODE_WRITEAROUND 2U
> +#define CACHE_MODE_NONE 3U
> +BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2);
> +#define BDEV_STATE_NONE 0U
> +#define BDEV_STATE_CLEAN 1U
> +#define BDEV_STATE_DIRTY 2U
> +#define BDEV_STATE_STALE 3U
> +
> +inline uint64_t crc64(const void *_data, size_t len);
> +
> +#define node(i, j) ((void *) ((i)->d + (j)))
> +#define end(i) node(i, (i)->keys)
> +
> +#define csum_set(i) \
> + crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8))
> +
> +#endif
> diff --git a/crc64.c b/crc64.c
> new file mode 100644
> index 0000000..8f37445
> --- /dev/null
> +++ b/crc64.c
> @@ -0,0 +1,129 @@
> +#define _GNU_SOURCE
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <unistd.h>
> +
> +/*
> + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
> + * use permitted, subject to terms of PostgreSQL license; see.)
> +
> + * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
> + * usual sort of implementation. (See Ross Williams' excellent introduction
> + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
> + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
> + * If we have no working 64-bit type, then fake it with two 32-bit registers.
> + *
> + * The present implementation is a normal (not "reflected", in Williams'
> + * terms) 64-bit CRC, using initial all-ones register contents and a final
> + * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
> + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
> + *
> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
> + * x^7 + x^4 + x + 1
> +*/
> +
> +static const uint64_t crc_table[256] = {
> + 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
> + 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
> + 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
> + 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
> + 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
> + 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
> + 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
> + 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
> + 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
> + 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
> + 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
> + 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
> + 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
> + 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
> + 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
> + 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
> + 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
> + 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
> + 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
> + 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
> + 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
> + 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
> + 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
> + 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
> + 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
> + 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
> + 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
> + 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
> + 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
> + 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
> + 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
> + 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
> + 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
> + 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
> + 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
> + 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
> + 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
> + 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
> + 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
> + 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
> + 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
> + 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
> + 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
> + 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
> + 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
> + 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
> + 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
> + 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
> + 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
> + 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
> + 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
> + 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
> + 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
> + 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
> + 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
> + 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
> + 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
> + 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
> + 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
> + 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
> + 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
> + 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
> + 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
> + 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
> + 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
> + 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
> + 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
> + 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
> + 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
> + 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
> + 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
> + 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
> + 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
> + 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
> + 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
> + 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
> + 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
> + 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
> + 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
> + 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
> + 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
> + 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
> + 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
> + 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
> + 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
> + 0x9AFCE626CE85B507ULL
> +};
> +
> +inline uint64_t crc64(const void *_data, size_t len)
> +{
> + uint64_t crc = 0xFFFFFFFFFFFFFFFFULL;
> + const unsigned char *data = _data;
> +
> + while (len--) {
> + int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
> + crc = crc_table[i] ^ (crc << 8);
> + }
> +
> + return crc ^ 0xFFFFFFFFFFFFFFFFULL;
> +}
> diff --git a/maps.c b/maps.c
> index f2ba9a7..cedf548 100644
> --- a/maps.c
> +++ b/maps.c
> @@ -94,6 +94,8 @@ mapping_t pers[] = {
> { "10", 10},
> { "faulty", LEVEL_FAULTY},
> { "container", LEVEL_CONTAINER},
> + { "bcache", LEVEL_BCACHE},
> + { "11", LEVEL_BCACHE},
> { NULL, 0}
> };
>
> diff --git a/mdadm.h b/mdadm.h
> index 3bcd052..a0ccff6 100644
> --- a/mdadm.h
> +++ b/mdadm.h
> @@ -816,6 +816,7 @@ extern struct superswitch {
> extern struct superswitch super0, super1;
> extern struct superswitch super_imsm, super_ddf;
> extern struct superswitch mbr, gpt;
> +extern struct superswitch super_bcache;
>
> struct metadata_update {
> int len;
> @@ -1296,6 +1297,7 @@ static inline int xasprintf(char **strp, const char *fmt, ...) {
> #define LEVEL_MULTIPATH (-4)
> #define LEVEL_LINEAR (-1)
> #define LEVEL_FAULTY (-5)
> +#define LEVEL_BCACHE (0xb)
>
> /* kernel module doesn't know about these */
> #define LEVEL_CONTAINER (-100)
> diff --git a/super-bcache.c b/super-bcache.c
> new file mode 100644
> index 0000000..ec8f3db
> --- /dev/null
> +++ b/super-bcache.c
> @@ -0,0 +1,634 @@
> +/*
> + * mdadm - bcache support
> + *
> + * Copyright (C) 2012 Intel Corporation
> + *
> + * bcache definitions copied from bcache-tools:
> + * git://evilpiepirate.org/~kent/bcache-tools.git
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +#define HAVE_STDINT_H 1
> +#include "mdadm.h"
> +#include "bcache.h"
> +
> +struct bcache_super {
> + union {
> + struct cache_sb *sb;
> + void *buf;
> + };
> + struct dl {
> + int major, minor;
> + char *devname;
> + int fd;
> + } *disk;
> + int vol;
> + struct bcache_super *next;
> +};
> +
> +enum {
> + /* FIXME this is a function of the bucket size */
> + BCACHE_MAX_DEVICES = 2,
> +};
> +
> +static int load_cache_sb(struct bcache_super *super, int keep_fd)
> +{
> + struct dl *d = super->disk;
> + int rc, fd = d->fd;
> + struct cache_sb *c;
> + struct stat s;
> +
> + if (!keep_fd)
> + d->fd = -1;
> +
> + rc = fstat(fd, &s);
> + if (rc)
> + return rc;
> + d->major = major(s.st_rdev);
> + d->minor = minor(s.st_rdev);
> +
> + rc = posix_memalign(&super->buf, 4096, 4096);
> + if (rc)
> + return rc;
> + c = super->sb;
> +
> + if (pread(fd, c, 4096, SB_SECTOR << 9) != 4096)
> + return errno;
> +
> + if (csum_set(c) != __le64_to_cpu(c->csum))
> + return ENODEV;
> +
> + if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0)
> + return ENODEV;
> +
> + return 0;
> +}
> +
> +static void __free_bcache(struct bcache_super *super)
> +{
> + if (!super)
> + return;
> +
> + while (super) {
> + struct bcache_super *next = super->next;
> + struct dl *d = super->disk;
> +
> + d = super->disk;
> + if (d->fd >= 0)
> + close(d->fd);
> + free(d->devname);
> + free(d);
> + free(super->sb);
> + free(super);
> + super = next;
> + }
> +}
> +
> +static void free_bcache(struct supertype *st)
> +{
> + struct bcache_super *super = st->sb;
> +
> + __free_bcache(super);
> + st->sb = NULL;
> +}
> +
> +#ifndef MDASSEMBLE
> +static void examine_bcache(struct supertype *st, char *homehost)
> +{
> + const char *const cache_policies[] = { "lru", "fifo", "random", "" };
> + const char *const bdev_states[] = { "none", "clean", "dirty", "stale" };
> + const char *const bdev_modes[16] = { "writethrough", "writeback", "writearound", "none" };
> + struct bcache_super *super = st->sb;
> + uint16_t first_bucket, bucket_size;
> + struct cache_sb *c = super->sb;
> + uint64_t nbuckets, csum;
> + unsigned long long sz;
> + char nbuf[64];
> +
> + printf(" Magic : %s\n",
> + memcmp(bcache_magic, c->magic, 16) ? "<unknown>" : "<bcache>");
> + printf(" Version : %d\n", (int) c->version);
> + printf(" Role : %s\n", SB_BDEV(c) ? "backing-device" : "cache");
> + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> + printf(" Set UUID : %s\n", nbuf + 5);
> + __fname_from_uuid((int *) c->uuid, 0, nbuf, ':');
> + printf(" Cache Devs : %u\n", c->nr_in_set);
> + /* FIXME: list all cache dev uuids in the load_container case */
> + printf(" Device UUID : %s\n", nbuf + 5);
> + printf(" Flags :%s%s\n", CACHE_DISCARD(c) ? " discard" : "",
> + CACHE_SYNC(c) ? " sync" : "");
> + if (SB_BDEV(c)) {
> + printf(" State : %s\n", bdev_states[BDEV_STATE(c)]);
> + printf(" Mode : %s\n", bdev_modes[BDEV_CACHE_MODE(c)]);
> + } else {
> + printf(" Policy : %s\n", cache_policies[CACHE_REPLACEMENT(c)]);
> + /* FIXME: add reporting of backing device uuids in the cache caase */
> + }
> + printf(" Label : %.32s\n", c->label);
> + csum = __le64_to_cpu(c->csum);
> + nbuckets = __le64_to_cpu(c->nbuckets);
> + bucket_size = __le16_to_cpu(c->bucket_size);
> + first_bucket = __le16_to_cpu(c->first_bucket);
> + sz = (nbuckets - first_bucket) * bucket_size;
> + printf(" Device Size : %llu%s\n", sz, human_size(sz * 512));
> + printf(" Bucket Size : %u\n", bucket_size);
> + printf(" Num Buckets : %llu\n", (unsigned long long) nbuckets);
> + printf(" this dev : %u\n", __le16_to_cpu(c->nr_this_dev));
> + printf("First Bucket : %u\n", first_bucket);
> + printf(" Checksum : %llx %s\n", (unsigned long long) csum,
> + csum == csum_set(c) ? "correct" : "incorrect");
> +}
> +
> +static void brief_examine_bcache(struct supertype *st, int verbose)
> +{
> + struct bcache_super *super = st->sb;
> + struct cache_sb *c = super->sb;
> + char nbuf[64];
> +
> + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> + printf("ARRAY metadata=bcache UUID=%s\n", nbuf + 5);
> +}
> +
> +static void brief_examine_subarrays_bcache(struct supertype *st, int verbose)
> +{
> + struct bcache_super *super = st->sb;
> + struct cache_sb *c = super->sb;
> + char nbuf[64], nbuf1[64];
> +
> + /* FIXME this needs to parse the cache device journal to find
> + * and report the backing dev uuid list
> + */
> + if (!SB_BDEV(c))
> + return;
> +
> + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> + __fname_from_uuid((int *) c->uuid, 0, nbuf1, ':');
> +
> + printf("ARRAY container=%s UUID=%s\n", nbuf + 5, nbuf1 + 5);
> +}
> +
> +static void export_examine_bcache(struct supertype *st)
> +{
> + struct bcache_super *super = st->sb;
> + struct cache_sb *c = super->sb;
> + char nbuf[64];
> +
> + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> + printf("MD_METADATA=bcache\n");
> + printf("MD_LEVEL=container\n");
> + printf("MD_UUID=%s\n", nbuf+5);
> + printf("MD_DEVICES=%d\n", __le16_to_cpu(c->nr_in_set) + 1);
> +}
> +
> +static void detail_bcache(struct supertype *st, char *homehost)
> +{
> + struct bcache_super *super = st->sb;
> + struct cache_sb *c = super->sb;
> + char nbuf[64];
> +
> + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> + printf("\n UUID : %s\n", nbuf + 5);
> +}
> +
> +static void brief_detail_bcache(struct supertype *st)
> +{
> + struct bcache_super *super = st->sb;
> + struct cache_sb *c = super->sb;
> + char nbuf[64];
> +
> + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> + printf(" UUID=%s", nbuf + 5);
> +}
> +
> +static struct bcache_super *alloc_super(const char *func)
> +{
> + struct bcache_super *super = calloc(1, sizeof(*super));
> + struct dl *d = calloc(1, sizeof(*d));
> +
> + if (!super || !d) {
> + fprintf(stderr, Name "%s: %s failed\n", func, __func__);
> + free(super);
> + free(d);
> + return NULL;
> + }
> +
> + super->vol = -1;
> + super->disk = d;
> +
> + return super;
> +}
> +
> +static int load_container_bcache(struct supertype *st, int fd, char *devname)
> +{
> + struct bcache_super *list = NULL;
> + int rc, i, cdev = 0, bdev = 0;
> + int devnum = fd2devnum(fd);
> + struct mdinfo *sra, *sd;
> +
> + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
> + if (!sra)
> + return 1;
> +
> + if (sra->array.major_version != -1 ||
> + sra->array.minor_version != -2 ||
> + strcmp(sra->text_version, "bcache") != 0) {
> + rc = 1;
> + goto error;
> + }
> +
> + for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
> + struct bcache_super *super = alloc_super(__func__);
> + struct cache_sb *c;
> + char nm[32];
> + int fd;
> +
> + rc = 1;
> + if (!super)
> + goto error;
> + super->next = list;
> + list = super;
> +
> + rc = 2;
> + sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
> + fd = dev_open(nm, O_RDWR);
> + if (fd < 0)
> + goto error;
> +
> + super->disk->fd = fd;
> + rc = load_cache_sb(super, 1);
> + if (rc)
> + goto error;
> + c = super->sb;
> + if (SB_BDEV(c))
> + bdev++;
> + else
> + cdev++;
> + }
> + rc = 0;
> +
> + /* FIXME disambiguate multiple bdevs per set, support multiple
> + * cache devices
> + */
> + if (bdev > 1) {
> + fprintf(stderr, Name ": %d backing devices detected\n", bdev);
> + rc = 3;
> + }
> + if (cdev > 1) {
> + fprintf(stderr, Name ": %d cache devices detected\n", cdev);
> + rc = 3;
> + }
> + if (rc)
> + goto error;
> + st->sb = list;
> + list = NULL;
> +
> +error:
> + if (list)
> + __free_bcache(list);
> + sysfs_free(sra);
> +
> + st->container_dev = devnum;
> + if (rc == 0 && st->ss == NULL) {
> + st->ss = &super_bcache;
> + st->minor_version = 0;
> + st->max_devs = BCACHE_MAX_DEVICES;
> + }
> + return rc;
> +}
> +#endif
> +
> +static int load_bcache(struct supertype *st, int fd, char *devname)
> +{
> + struct bcache_super *super;
> + struct dl *d;
> + int rc;
> +
> + free_bcache(st);
> +
> + super = alloc_super(__func__);
> + if (!super)
> + return 1;
> +
> + st->sb = super;
> + d = super->disk;
> + d->devname = devname ? strdup(devname) : NULL;
> + d->fd = fd;
> + rc = load_cache_sb(super, 0);
> + if (rc) {
> + free_bcache(st);
> + if (!devname)
> + return rc;
> + fprintf(stderr, Name ": %s failed on %s (%s)\n", __func__,
> + devname, strerror(rc));
> + return rc;
> + }
> +
> + if (st->ss == NULL) {
> + st->ss = &super_bcache;
> + st->minor_version = 0;
> + st->max_devs = BCACHE_MAX_DEVICES;
> + }
> +
> + return 0;
> +}
> +
> +static int store_bcache(struct supertype *st, int fd)
> +{
> + struct bcache_super *super = st->sb;
> + struct cache_sb *c = super->sb;
> +
> + if (!c)
> + return 1;
> +
> + if (pwrite(fd, c, sizeof(*c), SB_SECTOR << 9) != sizeof(*c))
> + return 1;
> +
> + return 0;
> +}
> +
> +static int compare_bcache(struct supertype *st, struct supertype *tst)
> +{
> + struct bcache_super *a = st->sb;
> + struct bcache_super *b = tst->sb;
> +
> + if (!st->sb) {
> + st->sb = tst->sb;
> + tst->sb = NULL;
> + return 0;
> + }
> +
> + if (memcmp(a->sb->set_uuid, b->sb->set_uuid, sizeof(b->sb->set_uuid)) != 0)
> + return 2;
> +
> + return 0;
> +}
> +
> +static __u64 avail_size_bcache(struct supertype *st, __u64 devsize)
> +{
> + /* 4k from start, 8k min data offset */
> + const uint32_t reserved_sectors = (4+8) * 2;
> +
> + if (devsize < reserved_sectors)
> + return 0;
> +
> + return devsize - reserved_sectors;
> +}
> +
> +static struct supertype *match_metadata_desc_bcache(char *arg)
> +{
> + struct supertype *st;
> +
> + if (strcmp(arg, "bcache") != 0 &&
> + strcmp(arg, "default") != 0)
> + return NULL;
> +
> + st = calloc(1, sizeof(*st));
> + if (!st)
> + return NULL;
> + st->container_dev = NoMdDev;
> + st->ss = &super_bcache;
> + st->max_devs = BCACHE_MAX_DEVICES;
> + st->minor_version = 0;
> + st->sb = NULL;
> +
> + return st;
> +}
> +
> +static int match_home_bcache(struct supertype *st, char *homehost)
> +{
> + /* the bcache superblock does not specify any host
> + * identification information. maybe it should...
> + */
> +
> + return -1;
> +}
> +
> +static void uuid_from_bcache(struct supertype *st, int uuid[4])
> +{
> + struct bcache_super *super = st->sb;
> + struct cache_sb *c = super->sb;
> +
> + memcpy(uuid, c->set_uuid, sizeof(c->set_uuid));
> +}
> +
> +static void getinfo_bcache_volume(struct supertype *st, struct mdinfo *info, int map_disks, char *dmap)
> +{
> + char *name = devnum2devname(st->container_dev);
> + struct bcache_super *super = st->sb;
> + uint16_t bucket_size, first_bucket;
> + struct cache_sb *c = super->sb;
> + unsigned long long sz;
> + uint64_t nbuckets;
> +
> + nbuckets = __le64_to_cpu(c->nbuckets);
> + bucket_size = __le16_to_cpu(c->bucket_size);
> + first_bucket = __le16_to_cpu(c->first_bucket);
> + sz = (nbuckets - first_bucket) * bucket_size;
> +
> + info->container_member = super->vol;
> + info->custom_array_size = sz;
> + info->component_size = sz;
> + info->recovery_start = MaxSector;
> + info->data_offset = SB_SECTOR + SB_SIZE;
> + sprintf(info->text_version, "/%s/%d", name, super->vol);
> + snprintf(info->name, sizeof(info->name), "%s", c->label);
> + memcpy(info->uuid, c->uuid, sizeof(c->uuid));
> +
> + info->array.raid_disks = __le16_to_cpu(c->nr_in_set) + 1;
> + info->array.level = LEVEL_BCACHE;
> + info->array.layout = 0;
> + info->array.md_minor = -1;
> + info->array.ctime = 0;
> + info->array.utime = 0;
> + info->array.chunk_size = bucket_size * 512;
> + info->array.major_version = -1;
> + info->array.minor_version = -2;
> +
> + info->disk.major = 0;
> + info->disk.minor = 0;
> + info->disk.raid_disk = SB_BDEV(c);
> + info->disk.number = SB_BDEV(c);
> + info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
> +}
> +
> +static void getinfo_bcache(struct supertype *st, struct mdinfo *info, char *dmap)
> +{
> + int i, cset, bdev, map_disks = info->array.raid_disks;
> + struct bcache_super *super = st->sb;
> + struct cache_sb *c = super->sb;
> +
> + memset(info, 0, sizeof(*info));
> +
> + if (super->vol >= 0)
> + return getinfo_bcache_volume(st, info, map_disks, dmap);
> +
> + /* make Assemble choose the cache target */
> + info->events = SB_BDEV(c);
> + info->recovery_start = MaxSector;
> + info->data_offset = SB_SECTOR;
> + info->component_size = SB_SIZE;
> + strcpy(info->text_version, "bcache");
> + memcpy(info->uuid, c->set_uuid, sizeof(c->set_uuid));
> +
> + info->array.raid_disks = __le16_to_cpu(c->nr_in_set) + 1;
> + info->array.level = LEVEL_CONTAINER;
> + info->array.layout = 0;
> + info->array.md_minor = -1;
> + info->array.ctime = 0;
> + info->array.utime = 0;
> + info->array.chunk_size = __le16_to_cpu(c->bucket_size) * 512;
> + info->array.major_version = -1;
> + info->array.minor_version = -2;
> +
> + info->disk.major = 0;
> + info->disk.minor = 0;
> + info->disk.raid_disk = SB_BDEV(c);
> + info->disk.number = SB_BDEV(c);
> + /* FIXME: need bcache superblock to identify failed devices */
> + info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
> +
> + /* FIXME need to parse the journal uuid_bucket to understand
> + * which cache devs are consistent with the set
> + */
> + for (i = 0; dmap && i < map_disks; i++)
> + dmap[i] = 1;
> +
> + cset = 0;
> + bdev = 0;
> + while (super) {
> + c = super->sb;
> +
> + /* FIXME filter out-of-sync devices */
> + if (SB_BDEV(c))
> + bdev++;
> + else
> + cset++;
> + super = super->next;
> + }
> +
> + if (cset + bdev == __le16_to_cpu(c->nr_in_set) + 1)
> + info->container_enough = 1;
> + else
> + info->container_enough = -1;
> +}
> +
> +static int update_bcache(struct supertype *st, struct mdinfo *i, char *update,
> + char *devname, int verbose, int uuid_set, char *homehost)
> +{
> + /* FIXME */
> + if (strcmp(update, "grow") == 0) {
> + return 0;
> + } else if (strcmp(update, "resync") == 0) {
> + return 0;
> + } else if (strcmp(update, "homehost") == 0) {
> + return -1;
> + } else if (strcmp(update, "name") == 0) {
> + return -1;
> + } else if (strcmp(update, "_reshape_progress") == 0) {
> + return 0;
> + } else if (strcmp(update, "assemble") == 0 ) {
> + return 0;
> + } else {
> + return -1;
> + }
> +}
> +
> +static struct mdinfo *container_content_bcache(struct supertype *st, char *subarray)
> +{
> + struct bcache_super *super = st->sb;
> + struct mdinfo *info, *disk = NULL;
> + char *ep;
> +
> + info = calloc(1, sizeof(*info));
> + if (!info) {
> + fprintf(stderr, Name ": failed to allocate %zu bytes\n",
> + sizeof(*info));
> + return NULL;
> + }
> +
> + /* don't support multiple backing disks per cache set */
> + if (subarray && (strtoul(subarray, &ep, 10) > 0 || *ep != '\0'))
> + goto error;
> +
> + super->vol = 0;
> + getinfo_bcache(st, info, NULL);
> +
> + for (; super; super = super->next) {
> + struct dl *d = super->disk;
> + struct cache_sb *c = super->sb;
> +
> + disk = calloc(1, sizeof(*disk));
> + if (!disk) {
> + fprintf(stderr, Name ": failed to allocate disk\n");
> + goto error;
> + }
> + disk->next = info->devs;
> + info->devs = disk;
> +
> + disk->disk.number = SB_BDEV(c);
> + disk->disk.raid_disk = SB_BDEV(c);
> + disk->disk.major = d->major;
> + disk->disk.minor = d->minor;
> + disk->recovery_start = MaxSector;
> + disk->disk.state = 1 << MD_DISK_ACTIVE;
> + disk->data_offset = info->data_offset;
> + disk->component_size = info->component_size;
> +
> + info->array.working_disks++;
> + }
> +
> + return info;
> +
> + error:
> + disk = info->devs;
> + while (disk) {
> + struct mdinfo *next = disk->next;
> +
> + free(disk);
> + disk = next;
> + }
> +
> + free(info);
> + return NULL;
> +}
> +
> +
> +struct superswitch super_bcache = {
> +#ifndef MDASSEMBLE
> + .examine_super = examine_bcache,
> + .brief_examine_super = brief_examine_bcache,
> + .brief_examine_subarrays = brief_examine_subarrays_bcache,
> + .export_examine_super = export_examine_bcache,
> + .detail_super = detail_bcache,
> + .brief_detail_super = brief_detail_bcache,
> + .load_container = load_container_bcache,
> +#endif
> + .match_home = match_home_bcache,
> + .uuid_from_super = uuid_from_bcache,
> + .getinfo_super = getinfo_bcache,
> + .update_super = update_bcache,
> +
> + .avail_size = avail_size_bcache,
> +
> + .compare_super = compare_bcache,
> +
> + .load_super = load_bcache,
> + .store_super = store_bcache,
> + .free_super = free_bcache,
> + .match_metadata_desc = match_metadata_desc_bcache,
> + .container_content = container_content_bcache,
> +
> + .external = 1,
> + .name = "bcache",
> +};
> diff --git a/util.c b/util.c
> index 6985a70..d9e49cf 100644
> --- a/util.c
> +++ b/util.c
> @@ -919,7 +919,7 @@ struct superswitch *superlist[] =
> {
> &super0, &super1,
> &super_ddf, &super_imsm,
> - &mbr, &gpt,
> + &mbr, &gpt, &super_bcache,
> NULL };
>
> #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [mdadm PATCH] bcache: add bcache superblock
[not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>
2012-05-12 7:38 ` Jack Wang
@ 2012-05-15 0:04 ` Mark Hills
[not found] ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
1 sibling, 1 reply; 6+ messages in thread
From: Mark Hills @ 2012-05-15 0:04 UTC (permalink / raw)
To: Dan Williams
Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
linux-raid-u79uwXL29TY76Z2rM5mHXA,
linux-bcache-u79uwXL29TY76Z2rM5mHXA
On Fri, 11 May 2012, Dan Williams wrote:
> This is a hybrid proposal for supporting bcache as a md device.
> Somewhat similar to the v1.x metadata format, where array assembly is
> handled in userspace, but managed in the kernel. In the bcache case it
> is an "external" metadata format, but then the expectation is that the
> kernel "bcache" personality takes over runtime maintenance of the
> metadata.
I am having some trouble with this, can you clarify (perhaps by example)
how to create a pairing of a cache and backing device?
I tried creating directly:
# mdadm --create /dev/md0 --level=11 --raid-devices=2 /dev/sdb /dev/sdc
mdadm: unknown level 11
But as the code in Assemble.c is patched (and not Create.c), I had a hunch
to format the devices as before then assemble the array:
# make-bcache -C /dev/sdb
# make-bcache -B /dev/sdc
# mdadm -A /dev/md0 /dev/sdb /dev/sdc
mdadm: Cannot assemble mbr metadata on /dev/sdb
mdadm: /dev/sdb has no superblock - assembly aborted
For a hack, I changed Create.c to accept case 11 and leave the chosen
chunk size. It got further, but then failed with:
# mdadm --create /dev/md/bcache --level=11 --raid-devices=2 /dev/sdb /dev/sdc
mdadm: /dev/sdb appears to be part of a raid array:
level=raid0 devices=0 ctime=Thu Jan 1 01:00:00 1970
mdadm: partition table exists on /dev/sdb but will be lost or
meaningless after creating array
mdadm: /dev/sdc appears to be part of a raid array:
level=raid0 devices=0 ctime=Thu Jan 1 01:00:00 1970
mdadm: partition table exists on /dev/sdc but will be lost or
meaningless after creating array
mdadm: largest drive (/dev/sdc) exceeds size (117155216K) by more than 1%
Continue creating array? y
mdadm: Defaulting to version 1.2 metadata
mdadm: RUN_ARRAY failed: Cannot allocate memory
Maybe I missed something basic here, but I'm afraid can't see what? If
not, hopefully this information is useful.
I use bcache v13 patches and the MD conversion, and this patch to mdadm.
$ lsmod | grep bcache
md_bcache 3202 0
bcache 138187 1 md_bcache
md_mod 88671 5 md_bcache,raid456,raid1
$ uname -a
Linux stax 3.4.0-rc7-mh+ #190 SMP PREEMPT Sun May 13 22:36:17 BST 2012 i686 Intel(R) Pentium(R) D CPU 2.80GHz GenuineIntel GNU/Linux
Thanks
--
Mark
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [mdadm PATCH] bcache: add bcache superblock
[not found] ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
@ 2012-05-15 16:59 ` Dan Williams
[not found] ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Dan Williams @ 2012-05-15 16:59 UTC (permalink / raw)
To: Mark Hills
Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
linux-raid-u79uwXL29TY76Z2rM5mHXA,
linux-bcache-u79uwXL29TY76Z2rM5mHXA
On Mon, May 14, 2012 at 5:04 PM, Mark Hills <mark-UrrBsZIrrsb10XsdtD+oqA@public.gmane.org> wrote:
> On Fri, 11 May 2012, Dan Williams wrote:
>
>> This is a hybrid proposal for supporting bcache as a md device.
>> Somewhat similar to the v1.x metadata format, where array assembly is
>> handled in userspace, but managed in the kernel. In the bcache case it
>> is an "external" metadata format, but then the expectation is that the
>> kernel "bcache" personality takes over runtime maintenance of the
>> metadata.
>
> I am having some trouble with this, can you clarify (perhaps by example)
> how to create a pairing of a cache and backing device?
>
> I tried creating directly:
>
> # mdadm --create /dev/md0 --level=11 --raid-devices=2 /dev/sdb /dev/sdc
> mdadm: unknown level 11
>
> But as the code in Assemble.c is patched (and not Create.c), I had a hunch
> to format the devices as before then assemble the array:
Yeah, "create" support is not there yet.
> # make-bcache -C /dev/sdb
> # make-bcache -B /dev/sdc
> # mdadm -A /dev/md0 /dev/sdb /dev/sdc
> mdadm: Cannot assemble mbr metadata on /dev/sdb
I should have been more explicit in the changelog. This current patch
was only tested to assemble an existing bcache configuration. I.e. it
assumes the backing device has been attached to the cache set at least
once. By default make-bcache always creates a new cache-set id per
invocation. So in the above example it won't find sdb and sdc belong
to the same md device because the cache-set id's differ. You can
verify this with mdadm -E.
[..]
>
> Maybe I missed something basic here, but I'm afraid can't see what? If
> not, hopefully this information is useful.
>
> I use bcache v13 patches and the MD conversion, and this patch to mdadm.
For reference:
[root@fedora-virt ~]# mdadm -E /dev/vd[bc]
/dev/vdb:
Magic : <bcache>
Version : 3
Role : cache
Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1
Cache Devs : 1
Device UUID : a1b25e26:5a4c4db6:2716c1aa:eba44ec7
Flags : sync
Policy : lru
Label :
Device Size : 16776192 (8.00 GiB 8.59 GB)
Bucket Size : 1024
Num Buckets : 16384
this dev : 0
First Bucket : 1
Checksum : 45fa97cda53b0b6 correct
/dev/vdc:
Magic : <bcache>
Version : 1
Role : backing-device
Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1
Cache Devs : 1
Device UUID : 7045ff87:aa4b13c4:670d8bb2:0b4ad628
Flags : sync
State : clean
Mode : writeback
Label :
Device Size : 16776192 (8.00 GiB 8.59 GB)
Bucket Size : 1024
Num Buckets : 16384
this dev : 0
First Bucket : 1
Checksum : f36910e51451d91c correct
[root@fedora-virt ~]# mdadm -Avvv /dev/md/bcache /dev/vd[bc]
mdadm: looking for devices for /dev/md/bcache
mdadm: /dev/vdb is identified as a member of /dev/md/bcache, slot 0.
mdadm: /dev/vdc is identified as a member of /dev/md/bcache, slot 1.
mdadm: added /dev/vdb to /dev/md/bcache as 0
mdadm: added /dev/vdc to /dev/md/bcache as 1
mdadm: Container /dev/md/bcache has been assembled with 2 drives
[root@fedora-virt ~]# cat /proc/mdstat
Personalities : [bcache]
md126 : active bcache vdb[1] vdc[0]
8388096 blocks super external:/md127/0 8388096k cache-blocks
md127 : inactive vdc[1](S) vdb[0](S)
16 blocks super external:bcache
unused devices: <none>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [mdadm PATCH] bcache: add bcache superblock
[not found] ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2012-05-15 21:16 ` Mark Hills
[not found] ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Mark Hills @ 2012-05-15 21:16 UTC (permalink / raw)
To: Dan Williams
Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
linux-raid-u79uwXL29TY76Z2rM5mHXA,
linux-bcache-u79uwXL29TY76Z2rM5mHXA
[-- Attachment #1: Type: TEXT/PLAIN, Size: 3396 bytes --]
On Tue, 15 May 2012, Dan Williams wrote:
> On Mon, May 14, 2012 at 5:04 PM, Mark Hills <mark-UrrBsZIrrsb10XsdtD+oqA@public.gmane.org> wrote:
> > On Fri, 11 May 2012, Dan Williams wrote:
> >
> >> This is a hybrid proposal for supporting bcache as a md device.
> >> Somewhat similar to the v1.x metadata format, where array assembly is
> >> handled in userspace, but managed in the kernel. In the bcache case it
> >> is an "external" metadata format, but then the expectation is that the
> >> kernel "bcache" personality takes over runtime maintenance of the
> >> metadata.
> >
> > I am having some trouble with this, can you clarify (perhaps by example)
> > how to create a pairing of a cache and backing device?
[...]
> > # make-bcache -C /dev/sdb
> > # make-bcache -B /dev/sdc
> > # mdadm -A /dev/md0 /dev/sdb /dev/sdc
> > mdadm: Cannot assemble mbr metadata on /dev/sdb
>
> I should have been more explicit in the changelog. This current patch
> was only tested to assemble an existing bcache configuration. I.e. it
> assumes the backing device has been attached to the cache set at least
> once. By default make-bcache always creates a new cache-set id per
> invocation. So in the above example it won't find sdb and sdc belong
> to the same md device because the cache-set id's differ. You can
> verify this with mdadm -E.
Thanks for explaining. I suppose this is just expecially awkward at the
moment because attaching the devices can't be done without a different
kernel.
So it will be good to see the "create" support, when it is ready.
> [..]
> >
> > Maybe I missed something basic here, but I'm afraid can't see what? If
> > not, hopefully this information is useful.
> >
> > I use bcache v13 patches and the MD conversion, and this patch to mdadm.
>
> For reference:
>
> [root@fedora-virt ~]# mdadm -E /dev/vd[bc]
> /dev/vdb:
> Magic : <bcache>
> Version : 3
> Role : cache
> Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1
> Cache Devs : 1
> Device UUID : a1b25e26:5a4c4db6:2716c1aa:eba44ec7
> Flags : sync
> Policy : lru
> Label :
> Device Size : 16776192 (8.00 GiB 8.59 GB)
> Bucket Size : 1024
> Num Buckets : 16384
> this dev : 0
> First Bucket : 1
> Checksum : 45fa97cda53b0b6 correct
> /dev/vdc:
> Magic : <bcache>
> Version : 1
> Role : backing-device
> Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1
> Cache Devs : 1
> Device UUID : 7045ff87:aa4b13c4:670d8bb2:0b4ad628
> Flags : sync
> State : clean
> Mode : writeback
> Label :
> Device Size : 16776192 (8.00 GiB 8.59 GB)
> Bucket Size : 1024
> Num Buckets : 16384
> this dev : 0
> First Bucket : 1
> Checksum : f36910e51451d91c correct
Presumably there's a little more too this, as I can't even investigate the
UIDs in use:
# make-bcache -C /dev/sdb
[...]
# make-bcache -B /dev/sdc
[...]
# mdadm -E /dev/sd[bc]
/dev/sdb:
MBR Magic : aa55
/dev/sdc:
MBR Magic : aa55
I confirmed that I'm using the patched mdadm, but it still doesn't seem to
recognise the magic.
But, of course, I understand this is all work in progress. So this isn't a
complaint at all, I am just looking forward to code in this area and happy
to test where I can.
Thanks
--
Mark
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [mdadm PATCH] bcache: add bcache superblock
[not found] ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
@ 2012-07-12 15:03 ` Jacek Danecki
0 siblings, 0 replies; 6+ messages in thread
From: Jacek Danecki @ 2012-07-12 15:03 UTC (permalink / raw)
To: Mark Hills
Cc: Dan Williams, neilb-l3A5Bk7waGM,
koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
linux-raid-u79uwXL29TY76Z2rM5mHXA,
linux-bcache-u79uwXL29TY76Z2rM5mHXA
On 05/15/12 23:16, Mark Hills wrote:
> Presumably there's a little more too this, as I can't even investigate the
> UIDs in use:
>
> # make-bcache -C /dev/sdb
> [...]
> # make-bcache -B /dev/sdc
> [...]
> # mdadm -E /dev/sd[bc]
> /dev/sdb:
> MBR Magic : aa55
> /dev/sdc:
> MBR Magic : aa55
>
> I confirmed that I'm using the patched mdadm, but it still doesn't seem to
> recognise the magic.
As I can see the problem could be in mdadm which can recognize mbr superblock
if partition table exists on disk, and then uses load_super_mbr() instead of load_bcache().
You can look at guess_super_type() function in util.c file. See log below.
I've attached small patch, which I used to find out where issue is.
Btw, I've not idea why getinfo_super() for imsm and bcache set info.array.ctime always to 0.
Probably it should be fixed also because this value is used by guess_super_type() function.
rv = ss->load_super(st, fd, NULL);
if (rv == 0) {
struct mdinfo info;
st->ss->getinfo_super(st, &info, NULL);
if (bestsuper == -1 ||
besttime < info.array.ctime) {
bestsuper = i;
besttime = info.array.ctime;
}
ss->free_super(st);
}
./mdadm -E /dev/sdb
check 0.90 0x685a60
load super
check 1.x 0x685be0
load super
check ddf 0x686060
load super
check imsm 0x686240
load super
check mbr 0x686400
load super
getinfo_super for mbr bestsuper=-1
set bestsuper to 4
check gpt 0x686580
load super
check bcache 0x686700
load super
load_cache_sb
getinfo_super for bcache bestsuper=4
load super again: mbr 0x45a3c1
load_super: 0x686400 mbr 0x45a3c1
/dev/sdb: 0x686400 mbr 0x45a3c1
MBR Magic : aa55
To workaround this issue I've destroyed partition table on /dev/sdb,
and now mdadm can examine bcache superblock correctly.
./mdadm -E /dev/sdb
check 0.90 0x685a60
load super
check 1.x 0x685be0
load super
check ddf 0x686060
load super
check imsm 0x686240
load super
check mbr 0x686400
load super
check gpt 0x686580
load super
check bcache 0x686700
load super
load_cache_sb
getinfo_super for bcache bestsuper=-1
set bestsuper to 6
load super again: bcache 0x45bb6b
load_cache_sb
load_super: 0x686700 bcache 0x45bb6b
load_cache_sb
/dev/sdb: 0x686700 bcache 0x45bb6b
Magic : <bcache>
Version : 0
Role : cache
Set UUID : 1eefbc0e:e4405d53:e6c630a7:83b523c0
Cache Devs : 1
Device UUID : 4a9e3223:cb403537:b05209b0:26948206
Flags :
Policy : lru
Label :
Device Size : 234439680 (111.79 GiB 120.03 GB)
Bucket Size : 1024
Num Buckets : 228946
this dev : 0
First Bucket : 1
Checksum : 6730f38d517c4db2 correct
diff --git a/Examine.c b/Examine.c
index 5d71e53..5173f4a 100644
--- a/Examine.c
+++ b/Examine.c
@@ -88,10 +88,12 @@ int Examine(struct mddev_dev *devlist, int brief, int export, int scan,
if (st) {
err = 1;
st->ignore_hw_compat = 1;
- if (!container)
+ if (!container) {
+ printf("load_super: %p %s %p\n", st->ss, st->ss->name, st->ss->load_super);
err = st->ss->load_super(st, fd,
(brief||scan) ? NULL
:devlist->devname);
+ }
if (err && st->ss->load_container) {
err = st->ss->load_container(st, fd,
(brief||scan) ? NULL
@@ -149,7 +151,7 @@ int Examine(struct mddev_dev *devlist, int brief, int export, int scan,
st->ss->export_examine_super(st);
st->ss->free_super(st);
} else {
- printf("%s:\n",devlist->devname);
+ printf("%s: %p %s %p\n",devlist->devname, st->ss, st->ss->name, st->ss->load_super);
st->ss->examine_super(st, homehost);
st->ss->free_super(st);
}
diff --git a/super-bcache.c b/super-bcache.c
index ec8f3db..ebee248 100644
--- a/super-bcache.c
+++ b/super-bcache.c
@@ -72,6 +72,7 @@ static int load_cache_sb(struct bcache_super *super, int keep_fd)
if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0)
return ENODEV;
+ printf("load_cache_sb\n");
return 0;
}
diff --git a/util.c b/util.c
index d9e49cf..5aaa986 100644
--- a/util.c
+++ b/util.c
@@ -1043,19 +1043,23 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
for (i=0 ; superlist[i]; i++) {
int rv;
ss = superlist[i];
+ printf("check %s %p\n", ss->name, ss);
if (guess_type == guess_array && ss->add_to_super == NULL)
continue;
if (guess_type == guess_partitions && ss->add_to_super != NULL)
continue;
memset(st, 0, sizeof(*st));
st->ignore_hw_compat = 1;
+ printf("load super\n");
rv = ss->load_super(st, fd, NULL);
if (rv == 0) {
struct mdinfo info;
+ printf("getinfo_super for %s bestsuper=%d\n", st->ss->name, bestsuper);
st->ss->getinfo_super(st, &info, NULL);
if (bestsuper == -1 ||
besttime < info.array.ctime) {
bestsuper = i;
+ printf("set bestsuper to %d\n", i);
besttime = info.array.ctime;
}
ss->free_super(st);
@@ -1065,6 +1069,7 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
int rv;
memset(st, 0, sizeof(*st));
st->ignore_hw_compat = 1;
+ printf("load super again: %s %p\n", superlist[bestsuper]->name, superlist[bestsuper]->load_super);
rv = superlist[bestsuper]->load_super(st, fd, NULL);
if (rv == 0) {
superlist[bestsuper]->free_super(st);
--
Jacek
^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2012-07-12 15:03 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-11 20:39 [mdadm PATCH] bcache: add bcache superblock Dan Williams
[not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>
2012-05-12 7:38 ` Jack Wang
2012-05-15 0:04 ` Mark Hills
[not found] ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
2012-05-15 16:59 ` Dan Williams
[not found] ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-05-15 21:16 ` Mark Hills
[not found] ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
2012-07-12 15:03 ` Jacek Danecki
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.