All of lore.kernel.org
 help / color / mirror / Atom feed
* [mdadm PATCH] bcache: add bcache superblock
@ 2012-05-11 20:39 Dan Williams
       [not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Dan Williams @ 2012-05-11 20:39 UTC (permalink / raw)
  To: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA
  Cc: linux-raid-u79uwXL29TY76Z2rM5mHXA, linux-bcache-u79uwXL29TY76Z2rM5mHXA

This is a hybrid proposal for supporting bcache as a md device.
Somewhat similar to the v1.x metadata format, where array assembly is
handled in userspace, but managed in the kernel.  In the bcache case it
is an "external" metadata format, but then the expectation is that the
kernel "bcache" personality takes over runtime maintenance of the
metadata.

The container id for bcache is the "cache_set".  The subvolume is the
backing device identifier.

This initial version only supports the runtime static portion of the
superblock, it will need to grow the ability to read the journal to
report the backing devices associated with a given cache set (i.e. in
the superblock backing devices know their cache_set container, but cache
devices need to look elsewhere to find their backing devices).

Cc: Kent Overstreet <koverstreet-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
 Assemble.c     |    1 
 Makefile       |   11 +
 bcache.h       |   98 +++++++++
 crc64.c        |  129 +++++++++++
 maps.c         |    2 
 mdadm.h        |    2 
 super-bcache.c |  634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 util.c         |    2 
 8 files changed, 873 insertions(+), 6 deletions(-)
 create mode 100644 bcache.h
 create mode 100644 crc64.c
 create mode 100644 super-bcache.c

diff --git a/Assemble.c b/Assemble.c
index fd94461..267a2ce 100644
--- a/Assemble.c
+++ b/Assemble.c
@@ -1594,6 +1594,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
 		} else switch(content->array.level) {
 		case LEVEL_LINEAR:
 		case LEVEL_MULTIPATH:
+		case LEVEL_BCACHE:
 		case 0:
 			err = sysfs_set_str(content, NULL, "array_state",
 					    "active");
diff --git a/Makefile b/Makefile
index b8d363f..7886d13 100644
--- a/Makefile
+++ b/Makefile
@@ -103,8 +103,8 @@ OBJS =  mdadm.o config.o policy.o mdstat.o  ReadMe.o util.o maps.o lib.o \
 	Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
 	Incremental.o \
 	mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
-	super-mbr.o super-gpt.o \
-	restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \
+	super-mbr.o super-gpt.o super-bcache.o \
+	restripe.o sysfs.o sha1.o mapfile.o crc32.o crc64.o sg_io.o msg.o \
 	platform-intel.o probe_roms.o
 
 CHECK_OBJS = restripe.o sysfs.o maps.o lib.o
@@ -116,8 +116,8 @@ INCL = mdadm.h part.h bitmap.h
 MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
 	config.o policy.o lib.o \
 	Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
-	super-mbr.o super-gpt.o \
-	super-ddf.o sha1.o crc32.o msg.o bitmap.o \
+	super-mbr.o super-gpt.o super-bcache.o \
+	super-ddf.o sha1.o crc32.o crc64.o msg.o bitmap.o \
 	platform-intel.o probe_roms.o
 
 MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
@@ -128,7 +128,8 @@ STATICOBJS = pwgr.o
 ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
 	maps.c lib.c \
 	super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
-	platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c
+	platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c \
+	super-bcache.c crc64.c
 ASSEMBLE_AUTO_SRCS := mdopen.c
 ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
 ifdef MDASSEMBLE_AUTO
diff --git a/bcache.h b/bcache.h
new file mode 100644
index 0000000..765e369
--- /dev/null
+++ b/bcache.h
@@ -0,0 +1,98 @@
+#ifndef _BCACHE_H
+#define _BCACHE_H
+
+#include <stdint.h>
+
+#define BITMASK(name, type, field, offset, size)		\
+static inline uint64_t name(const type *k)			\
+{								\
+	uint64_t field = __le64_to_cpu(k->field);		\
+	return (field >> offset) & ~(((uint64_t) ~0) << size);	\
+}								\
+								\
+static inline void SET_##name(type *k, uint64_t v)		\
+{								\
+	uint64_t field = __le64_to_cpu(k->field);		\
+	field &= ~(~((uint64_t) ~0 << size) << offset);		\
+	field |= v << offset;					\
+	k->field = __cpu_to_le64(field);			\
+}
+
+static const char bcache_magic[] = {
+	0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
+	0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 };
+
+/* Version 1: Backing dev
+ * Version 2: Seed pointer into btree node checksum
+ * Version 3: Backing dev superblock has offset of start of data
+ */
+
+#define BCACHE_SB_BDEV_VERSION	3
+#define BCACHE_SB_MAX_VERSION	3
+
+#define SB_SECTOR		8
+#define SB_SIZE			16 /* default data_offset in bcache-tools (?) */
+#define SB_LABEL_SIZE		32
+
+struct cache_sb {
+	uint64_t		csum;
+	uint64_t		offset;	/* sector where this sb was written */
+	uint64_t		version;
+#define CACHE_BACKING_DEV	1
+
+	uint8_t			magic[16];
+
+	uint8_t			uuid[16];
+	union {
+		uint8_t		set_uuid[16];
+		uint64_t	set_magic;
+	};
+	uint8_t			label[SB_LABEL_SIZE];
+
+	uint64_t		flags;
+	uint64_t		seq;
+	uint64_t		pad[8];
+
+	uint64_t		nbuckets;	/* device size */
+	uint16_t		block_size;	/* sectors */
+	uint16_t		bucket_size;	/* sectors */
+
+	uint16_t		nr_in_set;
+	uint16_t		nr_this_dev;
+
+	uint32_t		last_mount;	/* time_t */
+
+	uint16_t		first_bucket;
+	uint16_t		keys;		/* number of journal buckets */
+	uint64_t		d[];		/* journal buckets */
+};
+
+static inline int SB_BDEV(struct cache_sb *c)
+{
+	return __le64_to_cpu(c->version) == CACHE_BACKING_DEV;
+}
+
+BITMASK(CACHE_SYNC,     struct cache_sb, flags, 0, 1);
+BITMASK(CACHE_DISCARD,	struct cache_sb, flags, 1, 1);
+BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
+
+BITMASK(BDEV_CACHE_MODE,        struct cache_sb, flags, 0, 4);
+#define CACHE_MODE_WRITETHROUGH 0U
+#define CACHE_MODE_WRITEBACK    1U
+#define CACHE_MODE_WRITEAROUND  2U
+#define CACHE_MODE_NONE         3U
+BITMASK(BDEV_STATE,             struct cache_sb, flags, 61, 2);
+#define BDEV_STATE_NONE         0U
+#define BDEV_STATE_CLEAN        1U
+#define BDEV_STATE_DIRTY        2U
+#define BDEV_STATE_STALE        3U
+
+inline uint64_t crc64(const void *_data, size_t len);
+
+#define node(i, j)		((void *) ((i)->d + (j)))
+#define end(i)			node(i, (i)->keys)
+
+#define csum_set(i)							\
+	crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8))
+
+#endif
diff --git a/crc64.c b/crc64.c
new file mode 100644
index 0000000..8f37445
--- /dev/null
+++ b/crc64.c
@@ -0,0 +1,129 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+
+/*
+ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
+ * use permitted, subject to terms of PostgreSQL license; see.)
+
+ * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
+ * usual sort of implementation. (See Ross Williams' excellent introduction
+ * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
+ * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
+ * If we have no working 64-bit type, then fake it with two 32-bit registers.
+ *
+ * The present implementation is a normal (not "reflected", in Williams'
+ * terms) 64-bit CRC, using initial all-ones register contents and a final
+ * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
+ * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
+ *
+ * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ * x^7 + x^4 + x + 1
+*/
+
+static const uint64_t crc_table[256] = {
+	0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
+	0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
+	0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
+	0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
+	0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
+	0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
+	0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
+	0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
+	0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
+	0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
+	0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
+	0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
+	0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
+	0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
+	0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
+	0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
+	0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
+	0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
+	0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
+	0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
+	0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
+	0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
+	0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
+	0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
+	0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
+	0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
+	0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
+	0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
+	0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
+	0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
+	0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
+	0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
+	0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
+	0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
+	0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
+	0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
+	0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
+	0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
+	0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
+	0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
+	0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
+	0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
+	0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
+	0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
+	0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
+	0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
+	0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
+	0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
+	0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
+	0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
+	0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
+	0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
+	0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
+	0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
+	0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
+	0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
+	0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
+	0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
+	0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
+	0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
+	0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
+	0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
+	0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
+	0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
+	0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
+	0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
+	0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
+	0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
+	0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
+	0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
+	0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
+	0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
+	0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
+	0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
+	0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
+	0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
+	0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
+	0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
+	0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
+	0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
+	0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
+	0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
+	0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
+	0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
+	0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
+	0x9AFCE626CE85B507ULL
+};
+
+inline uint64_t crc64(const void *_data, size_t len)
+{
+	uint64_t crc = 0xFFFFFFFFFFFFFFFFULL;
+	const unsigned char *data = _data;
+
+	while (len--) {
+		int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
+		crc = crc_table[i] ^ (crc << 8);
+	}
+
+	return crc ^ 0xFFFFFFFFFFFFFFFFULL;
+}
diff --git a/maps.c b/maps.c
index f2ba9a7..cedf548 100644
--- a/maps.c
+++ b/maps.c
@@ -94,6 +94,8 @@ mapping_t pers[] = {
 	{ "10", 10},
 	{ "faulty", LEVEL_FAULTY},
 	{ "container", LEVEL_CONTAINER},
+	{ "bcache", LEVEL_BCACHE},
+	{ "11", LEVEL_BCACHE},
 	{ NULL, 0}
 };
 
diff --git a/mdadm.h b/mdadm.h
index 3bcd052..a0ccff6 100644
--- a/mdadm.h
+++ b/mdadm.h
@@ -816,6 +816,7 @@ extern struct superswitch {
 extern struct superswitch super0, super1;
 extern struct superswitch super_imsm, super_ddf;
 extern struct superswitch mbr, gpt;
+extern struct superswitch super_bcache;
 
 struct metadata_update {
 	int	len;
@@ -1296,6 +1297,7 @@ static inline int xasprintf(char **strp, const char *fmt, ...) {
 #define	LEVEL_MULTIPATH		(-4)
 #define	LEVEL_LINEAR		(-1)
 #define	LEVEL_FAULTY		(-5)
+#define LEVEL_BCACHE		(0xb)
 
 /* kernel module doesn't know about these */
 #define LEVEL_CONTAINER		(-100)
diff --git a/super-bcache.c b/super-bcache.c
new file mode 100644
index 0000000..ec8f3db
--- /dev/null
+++ b/super-bcache.c
@@ -0,0 +1,634 @@
+/*
+ * mdadm - bcache support
+ *
+ * Copyright (C) 2012 Intel Corporation
+ *
+ * bcache definitions copied from bcache-tools:
+ * git://evilpiepirate.org/~kent/bcache-tools.git
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define HAVE_STDINT_H 1
+#include "mdadm.h"
+#include "bcache.h"
+
+struct bcache_super {
+	union {
+		struct cache_sb *sb;
+		void *buf;
+	};
+	struct dl {
+		int major, minor;
+		char *devname;
+		int fd;
+	} *disk;
+	int vol;
+	struct bcache_super *next;
+};
+
+enum {
+	/* FIXME this is a function of the bucket size */
+	BCACHE_MAX_DEVICES = 2,
+};
+
+static int load_cache_sb(struct bcache_super *super, int keep_fd)
+{
+	struct dl *d = super->disk;
+	int rc, fd = d->fd;
+	struct cache_sb *c;
+	struct stat s;
+
+	if (!keep_fd)
+		d->fd = -1;
+
+	rc = fstat(fd, &s);
+	if (rc)
+		return rc;
+	d->major = major(s.st_rdev);
+	d->minor = minor(s.st_rdev);
+
+	rc = posix_memalign(&super->buf, 4096, 4096);
+	if (rc)
+		return rc;
+	c = super->sb;
+
+	if (pread(fd, c, 4096, SB_SECTOR << 9) != 4096)
+		return errno;
+
+	if (csum_set(c) != __le64_to_cpu(c->csum))
+		return ENODEV;
+
+	if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0)
+		return ENODEV;
+
+	return 0;
+}
+
+static void __free_bcache(struct bcache_super *super)
+{
+	if (!super)
+		return;
+
+	while (super) {
+		struct bcache_super *next = super->next;
+		struct dl *d = super->disk;
+
+		d = super->disk;
+		if (d->fd >= 0)
+			close(d->fd);
+		free(d->devname);
+		free(d);
+		free(super->sb);
+		free(super);
+		super = next;
+	}
+}
+
+static void free_bcache(struct supertype *st)
+{
+	struct bcache_super *super = st->sb;
+
+	__free_bcache(super);
+	st->sb = NULL;
+}
+
+#ifndef MDASSEMBLE
+static void examine_bcache(struct supertype *st, char *homehost)
+{
+	const char *const cache_policies[] = { "lru", "fifo", "random", "" };
+	const char *const bdev_states[] = { "none", "clean", "dirty", "stale" };
+	const char *const bdev_modes[16] = { "writethrough", "writeback", "writearound", "none" };
+	struct bcache_super *super = st->sb;
+	uint16_t first_bucket, bucket_size;
+	struct cache_sb *c = super->sb;
+	uint64_t nbuckets, csum;
+	unsigned long long sz;
+	char nbuf[64];
+
+	printf("       Magic : %s\n",
+	       memcmp(bcache_magic, c->magic, 16) ? "<unknown>" : "<bcache>");
+	printf("     Version : %d\n", (int) c->version);
+	printf("        Role : %s\n", SB_BDEV(c) ? "backing-device" : "cache");
+	__fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+	printf("    Set UUID : %s\n", nbuf + 5);
+	__fname_from_uuid((int *) c->uuid, 0, nbuf, ':');
+	printf("  Cache Devs : %u\n", c->nr_in_set);
+	/* FIXME: list all cache dev uuids in the load_container case */
+	printf(" Device UUID : %s\n", nbuf + 5);
+	printf("       Flags :%s%s\n", CACHE_DISCARD(c) ? " discard" : "",
+				       CACHE_SYNC(c) ? " sync" : "");
+	if (SB_BDEV(c)) {
+		printf("       State : %s\n", bdev_states[BDEV_STATE(c)]);
+		printf("        Mode : %s\n", bdev_modes[BDEV_CACHE_MODE(c)]);
+	} else {
+		printf("      Policy : %s\n", cache_policies[CACHE_REPLACEMENT(c)]);
+		/* FIXME: add reporting of backing device uuids in the cache caase */
+	}
+	printf("       Label : %.32s\n", c->label);
+	csum = __le64_to_cpu(c->csum);
+	nbuckets = __le64_to_cpu(c->nbuckets);
+	bucket_size = __le16_to_cpu(c->bucket_size);
+	first_bucket = __le16_to_cpu(c->first_bucket);
+	sz = (nbuckets - first_bucket) * bucket_size;
+	printf(" Device Size : %llu%s\n", sz, human_size(sz * 512));
+	printf(" Bucket Size : %u\n", bucket_size);
+	printf(" Num Buckets : %llu\n", (unsigned long long) nbuckets);
+	printf("    this dev : %u\n", __le16_to_cpu(c->nr_this_dev));
+	printf("First Bucket : %u\n", first_bucket);
+	printf("    Checksum : %llx %s\n", (unsigned long long) csum,
+	       csum == csum_set(c) ? "correct" : "incorrect");
+}
+
+static void brief_examine_bcache(struct supertype *st, int verbose)
+{
+	struct bcache_super *super = st->sb;
+	struct cache_sb *c = super->sb;
+	char nbuf[64];
+
+	__fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+	printf("ARRAY metadata=bcache UUID=%s\n", nbuf + 5);
+}
+
+static void brief_examine_subarrays_bcache(struct supertype *st, int verbose)
+{
+	struct bcache_super *super = st->sb;
+	struct cache_sb *c = super->sb;
+	char nbuf[64], nbuf1[64];
+
+	/* FIXME this needs to parse the cache device journal to find
+	 * and report the backing dev uuid list
+	 */
+	if (!SB_BDEV(c))
+		return;
+
+	__fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+	__fname_from_uuid((int *) c->uuid, 0, nbuf1, ':');
+
+	printf("ARRAY container=%s UUID=%s\n", nbuf + 5, nbuf1 + 5);
+}
+
+static void export_examine_bcache(struct supertype *st)
+{
+	struct bcache_super *super = st->sb;
+	struct cache_sb *c = super->sb;
+	char nbuf[64];
+
+	__fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+	printf("MD_METADATA=bcache\n");
+	printf("MD_LEVEL=container\n");
+	printf("MD_UUID=%s\n", nbuf+5);
+	printf("MD_DEVICES=%d\n", __le16_to_cpu(c->nr_in_set) + 1);
+}
+
+static void detail_bcache(struct supertype *st, char *homehost)
+{
+	struct bcache_super *super = st->sb;
+	struct cache_sb *c = super->sb;
+	char nbuf[64];
+
+	__fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+	printf("\n           UUID : %s\n", nbuf + 5);
+}
+
+static void brief_detail_bcache(struct supertype *st)
+{
+	struct bcache_super *super = st->sb;
+	struct cache_sb *c = super->sb;
+	char nbuf[64];
+
+	__fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
+	printf(" UUID=%s", nbuf + 5);
+}
+
+static struct bcache_super *alloc_super(const char *func)
+{
+	struct bcache_super *super = calloc(1, sizeof(*super));
+	struct dl *d = calloc(1, sizeof(*d));
+
+	if (!super || !d) {
+		fprintf(stderr, Name "%s: %s failed\n", func, __func__);
+		free(super);
+		free(d);
+		return NULL;
+	}
+
+	super->vol = -1;
+	super->disk = d;
+
+	return super;
+}
+
+static int load_container_bcache(struct supertype *st, int fd, char *devname)
+{
+	struct bcache_super *list = NULL;
+	int rc, i, cdev = 0, bdev = 0;
+	int devnum = fd2devnum(fd);
+	struct mdinfo *sra, *sd;
+
+	sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
+	if (!sra)
+		return 1;
+
+	if (sra->array.major_version != -1 ||
+	    sra->array.minor_version != -2 ||
+	    strcmp(sra->text_version, "bcache") != 0) {
+		rc = 1;
+		goto error;
+	}
+
+	for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
+		struct bcache_super *super = alloc_super(__func__);
+		struct cache_sb *c;
+		char nm[32];
+		int fd;
+
+		rc = 1;
+		if (!super)
+			goto error;
+		super->next = list;
+		list = super;
+
+		rc = 2;
+		sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
+		fd = dev_open(nm, O_RDWR);
+		if (fd < 0)
+			goto error;
+
+		super->disk->fd = fd;
+		rc = load_cache_sb(super, 1);
+		if (rc)
+			goto error;
+		c = super->sb;
+		if (SB_BDEV(c))
+			bdev++;
+		else
+			cdev++;
+	}
+	rc = 0;
+
+	/* FIXME disambiguate multiple bdevs per set, support multiple
+	 * cache devices
+	 */
+	if (bdev > 1) {
+		fprintf(stderr, Name ": %d backing devices detected\n", bdev);
+		rc = 3;
+	}
+	if (cdev > 1) {
+		fprintf(stderr, Name ": %d cache devices detected\n", cdev);
+		rc = 3;
+	}
+	if (rc)
+		goto error;
+	st->sb = list;
+	list = NULL;
+
+error:
+	if (list)
+		__free_bcache(list);
+	sysfs_free(sra);
+
+	st->container_dev = devnum;
+	if (rc == 0 && st->ss == NULL) {
+		st->ss = &super_bcache;
+		st->minor_version = 0;
+		st->max_devs = BCACHE_MAX_DEVICES;
+	}
+	return rc;
+}
+#endif
+
+static int load_bcache(struct supertype *st, int fd, char *devname)
+{
+	struct bcache_super *super;
+	struct dl *d;
+	int rc;
+
+	free_bcache(st);
+
+	super = alloc_super(__func__);
+	if (!super)
+		return 1;
+
+	st->sb = super;
+	d = super->disk;
+	d->devname = devname ? strdup(devname) : NULL;
+	d->fd = fd;
+	rc = load_cache_sb(super, 0);
+	if (rc) {
+		free_bcache(st);
+		if (!devname)
+			return rc;
+		fprintf(stderr, Name ": %s failed on %s (%s)\n", __func__,
+			devname, strerror(rc));
+		return rc;
+	}
+
+	if (st->ss == NULL) {
+		st->ss = &super_bcache;
+		st->minor_version = 0;
+		st->max_devs = BCACHE_MAX_DEVICES;
+	}
+
+	return 0;
+}
+
+static int store_bcache(struct supertype *st, int fd)
+{
+	struct bcache_super *super = st->sb;
+	struct cache_sb *c = super->sb;
+
+	if (!c)
+		return 1;
+
+	if (pwrite(fd, c, sizeof(*c), SB_SECTOR << 9) != sizeof(*c))
+		return 1;
+
+	return 0;
+}
+
+static int compare_bcache(struct supertype *st, struct supertype *tst)
+{
+	struct bcache_super *a = st->sb;
+	struct bcache_super *b = tst->sb;
+
+        if (!st->sb) {
+                st->sb = tst->sb;
+                tst->sb = NULL;
+                return 0;
+        }
+
+	if (memcmp(a->sb->set_uuid, b->sb->set_uuid, sizeof(b->sb->set_uuid)) != 0)
+		return 2;
+
+	return 0;
+}
+
+static __u64 avail_size_bcache(struct supertype *st, __u64 devsize)
+{
+	/* 4k from start, 8k min data offset */
+	const uint32_t reserved_sectors = (4+8) * 2; 
+
+	if (devsize < reserved_sectors)
+		return 0;
+
+	return devsize - reserved_sectors;
+}
+
+static struct supertype *match_metadata_desc_bcache(char *arg)
+{
+	struct supertype *st;
+
+	if (strcmp(arg, "bcache") != 0 &&
+	    strcmp(arg, "default") != 0)
+		return NULL;
+
+	st = calloc(1, sizeof(*st));
+	if (!st)
+		return NULL;
+	st->container_dev = NoMdDev;
+	st->ss = &super_bcache;
+	st->max_devs = BCACHE_MAX_DEVICES;
+	st->minor_version = 0;
+	st->sb = NULL;
+
+	return st;
+}
+
+static int match_home_bcache(struct supertype *st, char *homehost)
+{
+	/* the bcache superblock does not specify any host
+	 * identification information.  maybe it should...
+	 */
+
+	return -1;
+}
+
+static void uuid_from_bcache(struct supertype *st, int uuid[4])
+{
+	struct bcache_super *super = st->sb;
+	struct cache_sb *c = super->sb;
+
+	memcpy(uuid, c->set_uuid, sizeof(c->set_uuid));
+}
+
+static void getinfo_bcache_volume(struct supertype *st, struct mdinfo *info, int map_disks, char *dmap)
+{
+	char *name = devnum2devname(st->container_dev);
+	struct bcache_super *super = st->sb;
+	uint16_t bucket_size, first_bucket;
+	struct cache_sb *c = super->sb;
+	unsigned long long sz;
+	uint64_t nbuckets;
+
+	nbuckets = __le64_to_cpu(c->nbuckets);
+	bucket_size = __le16_to_cpu(c->bucket_size);
+	first_bucket = __le16_to_cpu(c->first_bucket);
+	sz = (nbuckets - first_bucket) * bucket_size;
+
+	info->container_member	  = super->vol;
+	info->custom_array_size   = sz;
+	info->component_size	  = sz;
+	info->recovery_start	  = MaxSector;
+	info->data_offset	  = SB_SECTOR + SB_SIZE;
+	sprintf(info->text_version, "/%s/%d", name, super->vol);
+	snprintf(info->name, sizeof(info->name), "%s", c->label);
+	memcpy(info->uuid, c->uuid, sizeof(c->uuid));
+
+	info->array.raid_disks    = __le16_to_cpu(c->nr_in_set) + 1;
+	info->array.level         = LEVEL_BCACHE;
+	info->array.layout        = 0;
+	info->array.md_minor      = -1;
+	info->array.ctime         = 0;
+	info->array.utime         = 0;
+	info->array.chunk_size    = bucket_size * 512;
+	info->array.major_version = -1;
+	info->array.minor_version = -2;
+
+	info->disk.major = 0;
+	info->disk.minor = 0;
+	info->disk.raid_disk = SB_BDEV(c);
+	info->disk.number = SB_BDEV(c);
+	info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
+}
+
+static void getinfo_bcache(struct supertype *st, struct mdinfo *info, char *dmap)
+{
+	int i, cset, bdev, map_disks = info->array.raid_disks;
+	struct bcache_super *super = st->sb;
+	struct cache_sb *c = super->sb;
+
+	memset(info, 0, sizeof(*info));
+
+	if (super->vol >= 0)
+		return getinfo_bcache_volume(st, info, map_disks, dmap);
+
+	/* make Assemble choose the cache target */
+	info->events = SB_BDEV(c);
+	info->recovery_start = MaxSector;
+	info->data_offset = SB_SECTOR;
+	info->component_size = SB_SIZE;
+	strcpy(info->text_version, "bcache");
+	memcpy(info->uuid, c->set_uuid, sizeof(c->set_uuid));
+
+	info->array.raid_disks    = __le16_to_cpu(c->nr_in_set) + 1;
+	info->array.level         = LEVEL_CONTAINER;
+	info->array.layout        = 0;
+	info->array.md_minor      = -1;
+	info->array.ctime         = 0;
+	info->array.utime         = 0;
+	info->array.chunk_size    = __le16_to_cpu(c->bucket_size) * 512;
+	info->array.major_version = -1;
+	info->array.minor_version = -2;
+
+	info->disk.major = 0;
+	info->disk.minor = 0;
+	info->disk.raid_disk = SB_BDEV(c);
+	info->disk.number = SB_BDEV(c);
+	/* FIXME: need bcache superblock to identify failed devices */
+	info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
+
+	/* FIXME need to parse the journal uuid_bucket to understand
+	 * which cache devs are consistent with the set
+	 */
+	for (i = 0; dmap && i < map_disks; i++)
+		dmap[i] = 1;
+
+	cset = 0;
+	bdev = 0;
+	while (super) {
+		c = super->sb;
+
+		/* FIXME filter out-of-sync devices */
+		if (SB_BDEV(c))
+			bdev++;
+		else
+			cset++;
+		super = super->next;
+	}
+
+	if (cset + bdev == __le16_to_cpu(c->nr_in_set) + 1)
+		info->container_enough = 1;
+	else
+		info->container_enough = -1;
+}
+
+static int update_bcache(struct supertype *st, struct mdinfo *i, char *update,
+			 char *devname, int verbose, int uuid_set, char *homehost)
+{
+	/* FIXME */
+	if (strcmp(update, "grow") == 0) {
+		return 0;
+	} else if (strcmp(update, "resync") == 0) {
+		return 0;
+	} else if (strcmp(update, "homehost") == 0) {
+		return -1;
+	} else if (strcmp(update, "name") == 0) {
+		return -1;
+	} else if (strcmp(update, "_reshape_progress") == 0) {
+		return 0;
+	} else if (strcmp(update, "assemble") == 0 ) {
+		return 0;
+	} else {
+		return -1;
+	}
+}
+
+static struct mdinfo *container_content_bcache(struct supertype *st, char *subarray)
+{
+	struct bcache_super *super = st->sb;
+	struct mdinfo *info, *disk = NULL;
+	char *ep;
+
+	info = calloc(1, sizeof(*info));
+	if (!info) {
+		fprintf(stderr, Name ": failed to allocate %zu bytes\n",
+			sizeof(*info));
+		return NULL;
+	}
+
+	/* don't support multiple backing disks per cache set */
+	if (subarray && (strtoul(subarray, &ep, 10) > 0 || *ep != '\0'))
+		goto error;
+
+	super->vol = 0;
+	getinfo_bcache(st, info, NULL);
+
+	for (; super; super = super->next) {
+		struct dl *d = super->disk;
+		struct cache_sb *c = super->sb;
+
+		disk = calloc(1, sizeof(*disk));
+		if (!disk) {
+			fprintf(stderr, Name ": failed to allocate disk\n");
+			goto error;
+		}
+		disk->next = info->devs;
+		info->devs = disk;
+
+		disk->disk.number = SB_BDEV(c);
+		disk->disk.raid_disk = SB_BDEV(c);
+		disk->disk.major = d->major;
+		disk->disk.minor = d->minor;
+		disk->recovery_start = MaxSector;
+		disk->disk.state = 1 << MD_DISK_ACTIVE;
+		disk->data_offset = info->data_offset;
+		disk->component_size = info->component_size;
+
+		info->array.working_disks++;
+	}
+
+	return info;
+
+ error:
+	disk = info->devs;
+	while (disk) {
+		struct mdinfo *next = disk->next;
+
+		free(disk);
+		disk = next;
+	}
+
+	free(info);
+	return NULL;
+}
+
+
+struct superswitch super_bcache = {
+#ifndef	MDASSEMBLE
+	.examine_super	         = examine_bcache,
+	.brief_examine_super     = brief_examine_bcache,
+	.brief_examine_subarrays = brief_examine_subarrays_bcache,
+	.export_examine_super    = export_examine_bcache,
+	.detail_super	         = detail_bcache,
+	.brief_detail_super      = brief_detail_bcache,
+	.load_container	         = load_container_bcache,
+#endif
+	.match_home              = match_home_bcache,
+	.uuid_from_super         = uuid_from_bcache,
+	.getinfo_super           = getinfo_bcache,
+	.update_super		 = update_bcache,
+
+	.avail_size              = avail_size_bcache,
+
+	.compare_super           = compare_bcache,
+
+	.load_super	         = load_bcache,
+	.store_super	         = store_bcache,
+	.free_super	         = free_bcache,
+	.match_metadata_desc     = match_metadata_desc_bcache,
+	.container_content       = container_content_bcache,
+
+	.external                = 1,
+	.name		         = "bcache",
+};
diff --git a/util.c b/util.c
index 6985a70..d9e49cf 100644
--- a/util.c
+++ b/util.c
@@ -919,7 +919,7 @@ struct superswitch *superlist[] =
 {
 	&super0, &super1,
 	&super_ddf, &super_imsm,
-	&mbr, &gpt,
+	&mbr, &gpt, &super_bcache,
 	NULL };
 
 #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [mdadm PATCH] bcache: add bcache superblock
       [not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>
@ 2012-05-12  7:38   ` Jack Wang
  2012-05-15  0:04   ` Mark Hills
  1 sibling, 0 replies; 6+ messages in thread
From: Jack Wang @ 2012-05-12  7:38 UTC (permalink / raw)
  To: Dan Williams
  Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
	linux-raid-u79uwXL29TY76Z2rM5mHXA,
	linux-bcache-u79uwXL29TY76Z2rM5mHXA

Hi Dan,

So this is the alternate interface for bcache tools using mdadm to
manage bcache?
If so, could you give a example of how to using this.

Best regards.

Jack

2012/5/12 Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>:
> This is a hybrid proposal for supporting bcache as a md device.
> Somewhat similar to the v1.x metadata format, where array assembly is
> handled in userspace, but managed in the kernel.  In the bcache case it
> is an "external" metadata format, but then the expectation is that the
> kernel "bcache" personality takes over runtime maintenance of the
> metadata.
>
> The container id for bcache is the "cache_set".  The subvolume is the
> backing device identifier.
>
> This initial version only supports the runtime static portion of the
> superblock, it will need to grow the ability to read the journal to
> report the backing devices associated with a given cache set (i.e. in
> the superblock backing devices know their cache_set container, but cache
> devices need to look elsewhere to find their backing devices).
>
> Cc: Kent Overstreet <koverstreet-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
> Signed-off-by: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
> ---
>  Assemble.c     |    1
>  Makefile       |   11 +
>  bcache.h       |   98 +++++++++
>  crc64.c        |  129 +++++++++++
>  maps.c         |    2
>  mdadm.h        |    2
>  super-bcache.c |  634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  util.c         |    2
>  8 files changed, 873 insertions(+), 6 deletions(-)
>  create mode 100644 bcache.h
>  create mode 100644 crc64.c
>  create mode 100644 super-bcache.c
>
> diff --git a/Assemble.c b/Assemble.c
> index fd94461..267a2ce 100644
> --- a/Assemble.c
> +++ b/Assemble.c
> @@ -1594,6 +1594,7 @@ int assemble_container_content(struct supertype *st, int mdfd,
>                } else switch(content->array.level) {
>                case LEVEL_LINEAR:
>                case LEVEL_MULTIPATH:
> +               case LEVEL_BCACHE:
>                case 0:
>                        err = sysfs_set_str(content, NULL, "array_state",
>                                            "active");
> diff --git a/Makefile b/Makefile
> index b8d363f..7886d13 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -103,8 +103,8 @@ OBJS =  mdadm.o config.o policy.o mdstat.o  ReadMe.o util.o maps.o lib.o \
>        Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \
>        Incremental.o \
>        mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \
> -       super-mbr.o super-gpt.o \
> -       restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \
> +       super-mbr.o super-gpt.o super-bcache.o \
> +       restripe.o sysfs.o sha1.o mapfile.o crc32.o crc64.o sg_io.o msg.o \
>        platform-intel.o probe_roms.o
>
>  CHECK_OBJS = restripe.o sysfs.o maps.o lib.o
> @@ -116,8 +116,8 @@ INCL = mdadm.h part.h bitmap.h
>  MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \
>        config.o policy.o lib.o \
>        Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \
> -       super-mbr.o super-gpt.o \
> -       super-ddf.o sha1.o crc32.o msg.o bitmap.o \
> +       super-mbr.o super-gpt.o super-bcache.o \
> +       super-ddf.o sha1.o crc32.o crc64.o msg.o bitmap.o \
>        platform-intel.o probe_roms.o
>
>  MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS))
> @@ -128,7 +128,8 @@ STATICOBJS = pwgr.o
>  ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \
>        maps.c lib.c \
>        super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \
> -       platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c
> +       platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c \
> +       super-bcache.c crc64.c
>  ASSEMBLE_AUTO_SRCS := mdopen.c
>  ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE
>  ifdef MDASSEMBLE_AUTO
> diff --git a/bcache.h b/bcache.h
> new file mode 100644
> index 0000000..765e369
> --- /dev/null
> +++ b/bcache.h
> @@ -0,0 +1,98 @@
> +#ifndef _BCACHE_H
> +#define _BCACHE_H
> +
> +#include <stdint.h>
> +
> +#define BITMASK(name, type, field, offset, size)               \
> +static inline uint64_t name(const type *k)                     \
> +{                                                              \
> +       uint64_t field = __le64_to_cpu(k->field);               \
> +       return (field >> offset) & ~(((uint64_t) ~0) << size);  \
> +}                                                              \
> +                                                               \
> +static inline void SET_##name(type *k, uint64_t v)             \
> +{                                                              \
> +       uint64_t field = __le64_to_cpu(k->field);               \
> +       field &= ~(~((uint64_t) ~0 << size) << offset);         \
> +       field |= v << offset;                                   \
> +       k->field = __cpu_to_le64(field);                        \
> +}
> +
> +static const char bcache_magic[] = {
> +       0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca,
> +       0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 };
> +
> +/* Version 1: Backing dev
> + * Version 2: Seed pointer into btree node checksum
> + * Version 3: Backing dev superblock has offset of start of data
> + */
> +
> +#define BCACHE_SB_BDEV_VERSION 3
> +#define BCACHE_SB_MAX_VERSION  3
> +
> +#define SB_SECTOR              8
> +#define SB_SIZE                        16 /* default data_offset in bcache-tools (?) */
> +#define SB_LABEL_SIZE          32
> +
> +struct cache_sb {
> +       uint64_t                csum;
> +       uint64_t                offset; /* sector where this sb was written */
> +       uint64_t                version;
> +#define CACHE_BACKING_DEV      1
> +
> +       uint8_t                 magic[16];
> +
> +       uint8_t                 uuid[16];
> +       union {
> +               uint8_t         set_uuid[16];
> +               uint64_t        set_magic;
> +       };
> +       uint8_t                 label[SB_LABEL_SIZE];
> +
> +       uint64_t                flags;
> +       uint64_t                seq;
> +       uint64_t                pad[8];
> +
> +       uint64_t                nbuckets;       /* device size */
> +       uint16_t                block_size;     /* sectors */
> +       uint16_t                bucket_size;    /* sectors */
> +
> +       uint16_t                nr_in_set;
> +       uint16_t                nr_this_dev;
> +
> +       uint32_t                last_mount;     /* time_t */
> +
> +       uint16_t                first_bucket;
> +       uint16_t                keys;           /* number of journal buckets */
> +       uint64_t                d[];            /* journal buckets */
> +};
> +
> +static inline int SB_BDEV(struct cache_sb *c)
> +{
> +       return __le64_to_cpu(c->version) == CACHE_BACKING_DEV;
> +}
> +
> +BITMASK(CACHE_SYNC,     struct cache_sb, flags, 0, 1);
> +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1);
> +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3);
> +
> +BITMASK(BDEV_CACHE_MODE,        struct cache_sb, flags, 0, 4);
> +#define CACHE_MODE_WRITETHROUGH 0U
> +#define CACHE_MODE_WRITEBACK    1U
> +#define CACHE_MODE_WRITEAROUND  2U
> +#define CACHE_MODE_NONE         3U
> +BITMASK(BDEV_STATE,             struct cache_sb, flags, 61, 2);
> +#define BDEV_STATE_NONE         0U
> +#define BDEV_STATE_CLEAN        1U
> +#define BDEV_STATE_DIRTY        2U
> +#define BDEV_STATE_STALE        3U
> +
> +inline uint64_t crc64(const void *_data, size_t len);
> +
> +#define node(i, j)             ((void *) ((i)->d + (j)))
> +#define end(i)                 node(i, (i)->keys)
> +
> +#define csum_set(i)                                                    \
> +       crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8))
> +
> +#endif
> diff --git a/crc64.c b/crc64.c
> new file mode 100644
> index 0000000..8f37445
> --- /dev/null
> +++ b/crc64.c
> @@ -0,0 +1,129 @@
> +#define _GNU_SOURCE
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <unistd.h>
> +
> +/*
> + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
> + * use permitted, subject to terms of PostgreSQL license; see.)
> +
> + * If we have a 64-bit integer type, then a 64-bit CRC looks just like the
> + * usual sort of implementation. (See Ross Williams' excellent introduction
> + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from
> + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.)
> + * If we have no working 64-bit type, then fake it with two 32-bit registers.
> + *
> + * The present implementation is a normal (not "reflected", in Williams'
> + * terms) 64-bit CRC, using initial all-ones register contents and a final
> + * bit inversion. The chosen polynomial is borrowed from the DLT1 spec
> + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM):
> + *
> + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
> + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
> + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
> + * x^7 + x^4 + x + 1
> +*/
> +
> +static const uint64_t crc_table[256] = {
> +       0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL,
> +       0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL,
> +       0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL,
> +       0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL,
> +       0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL,
> +       0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL,
> +       0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL,
> +       0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL,
> +       0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL,
> +       0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL,
> +       0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL,
> +       0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL,
> +       0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL,
> +       0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL,
> +       0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL,
> +       0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL,
> +       0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL,
> +       0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL,
> +       0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL,
> +       0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL,
> +       0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL,
> +       0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL,
> +       0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL,
> +       0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL,
> +       0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL,
> +       0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL,
> +       0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL,
> +       0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL,
> +       0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL,
> +       0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL,
> +       0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL,
> +       0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL,
> +       0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL,
> +       0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL,
> +       0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL,
> +       0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL,
> +       0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL,
> +       0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL,
> +       0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL,
> +       0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL,
> +       0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL,
> +       0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL,
> +       0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL,
> +       0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL,
> +       0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL,
> +       0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL,
> +       0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL,
> +       0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL,
> +       0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL,
> +       0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL,
> +       0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL,
> +       0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL,
> +       0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL,
> +       0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL,
> +       0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL,
> +       0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL,
> +       0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL,
> +       0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL,
> +       0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL,
> +       0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL,
> +       0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL,
> +       0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL,
> +       0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL,
> +       0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL,
> +       0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL,
> +       0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL,
> +       0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL,
> +       0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL,
> +       0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL,
> +       0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL,
> +       0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL,
> +       0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL,
> +       0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL,
> +       0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL,
> +       0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL,
> +       0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL,
> +       0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL,
> +       0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL,
> +       0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL,
> +       0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL,
> +       0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL,
> +       0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL,
> +       0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL,
> +       0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL,
> +       0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL,
> +       0x9AFCE626CE85B507ULL
> +};
> +
> +inline uint64_t crc64(const void *_data, size_t len)
> +{
> +       uint64_t crc = 0xFFFFFFFFFFFFFFFFULL;
> +       const unsigned char *data = _data;
> +
> +       while (len--) {
> +               int i = ((int) (crc >> 56) ^ *data++) & 0xFF;
> +               crc = crc_table[i] ^ (crc << 8);
> +       }
> +
> +       return crc ^ 0xFFFFFFFFFFFFFFFFULL;
> +}
> diff --git a/maps.c b/maps.c
> index f2ba9a7..cedf548 100644
> --- a/maps.c
> +++ b/maps.c
> @@ -94,6 +94,8 @@ mapping_t pers[] = {
>        { "10", 10},
>        { "faulty", LEVEL_FAULTY},
>        { "container", LEVEL_CONTAINER},
> +       { "bcache", LEVEL_BCACHE},
> +       { "11", LEVEL_BCACHE},
>        { NULL, 0}
>  };
>
> diff --git a/mdadm.h b/mdadm.h
> index 3bcd052..a0ccff6 100644
> --- a/mdadm.h
> +++ b/mdadm.h
> @@ -816,6 +816,7 @@ extern struct superswitch {
>  extern struct superswitch super0, super1;
>  extern struct superswitch super_imsm, super_ddf;
>  extern struct superswitch mbr, gpt;
> +extern struct superswitch super_bcache;
>
>  struct metadata_update {
>        int     len;
> @@ -1296,6 +1297,7 @@ static inline int xasprintf(char **strp, const char *fmt, ...) {
>  #define        LEVEL_MULTIPATH         (-4)
>  #define        LEVEL_LINEAR            (-1)
>  #define        LEVEL_FAULTY            (-5)
> +#define LEVEL_BCACHE           (0xb)
>
>  /* kernel module doesn't know about these */
>  #define LEVEL_CONTAINER                (-100)
> diff --git a/super-bcache.c b/super-bcache.c
> new file mode 100644
> index 0000000..ec8f3db
> --- /dev/null
> +++ b/super-bcache.c
> @@ -0,0 +1,634 @@
> +/*
> + * mdadm - bcache support
> + *
> + * Copyright (C) 2012 Intel Corporation
> + *
> + * bcache definitions copied from bcache-tools:
> + * git://evilpiepirate.org/~kent/bcache-tools.git
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; if not, write to the Free Software Foundation, Inc.,
> + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
> + */
> +#define HAVE_STDINT_H 1
> +#include "mdadm.h"
> +#include "bcache.h"
> +
> +struct bcache_super {
> +       union {
> +               struct cache_sb *sb;
> +               void *buf;
> +       };
> +       struct dl {
> +               int major, minor;
> +               char *devname;
> +               int fd;
> +       } *disk;
> +       int vol;
> +       struct bcache_super *next;
> +};
> +
> +enum {
> +       /* FIXME this is a function of the bucket size */
> +       BCACHE_MAX_DEVICES = 2,
> +};
> +
> +static int load_cache_sb(struct bcache_super *super, int keep_fd)
> +{
> +       struct dl *d = super->disk;
> +       int rc, fd = d->fd;
> +       struct cache_sb *c;
> +       struct stat s;
> +
> +       if (!keep_fd)
> +               d->fd = -1;
> +
> +       rc = fstat(fd, &s);
> +       if (rc)
> +               return rc;
> +       d->major = major(s.st_rdev);
> +       d->minor = minor(s.st_rdev);
> +
> +       rc = posix_memalign(&super->buf, 4096, 4096);
> +       if (rc)
> +               return rc;
> +       c = super->sb;
> +
> +       if (pread(fd, c, 4096, SB_SECTOR << 9) != 4096)
> +               return errno;
> +
> +       if (csum_set(c) != __le64_to_cpu(c->csum))
> +               return ENODEV;
> +
> +       if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0)
> +               return ENODEV;
> +
> +       return 0;
> +}
> +
> +static void __free_bcache(struct bcache_super *super)
> +{
> +       if (!super)
> +               return;
> +
> +       while (super) {
> +               struct bcache_super *next = super->next;
> +               struct dl *d = super->disk;
> +
> +               d = super->disk;
> +               if (d->fd >= 0)
> +                       close(d->fd);
> +               free(d->devname);
> +               free(d);
> +               free(super->sb);
> +               free(super);
> +               super = next;
> +       }
> +}
> +
> +static void free_bcache(struct supertype *st)
> +{
> +       struct bcache_super *super = st->sb;
> +
> +       __free_bcache(super);
> +       st->sb = NULL;
> +}
> +
> +#ifndef MDASSEMBLE
> +static void examine_bcache(struct supertype *st, char *homehost)
> +{
> +       const char *const cache_policies[] = { "lru", "fifo", "random", "" };
> +       const char *const bdev_states[] = { "none", "clean", "dirty", "stale" };
> +       const char *const bdev_modes[16] = { "writethrough", "writeback", "writearound", "none" };
> +       struct bcache_super *super = st->sb;
> +       uint16_t first_bucket, bucket_size;
> +       struct cache_sb *c = super->sb;
> +       uint64_t nbuckets, csum;
> +       unsigned long long sz;
> +       char nbuf[64];
> +
> +       printf("       Magic : %s\n",
> +              memcmp(bcache_magic, c->magic, 16) ? "<unknown>" : "<bcache>");
> +       printf("     Version : %d\n", (int) c->version);
> +       printf("        Role : %s\n", SB_BDEV(c) ? "backing-device" : "cache");
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf("    Set UUID : %s\n", nbuf + 5);
> +       __fname_from_uuid((int *) c->uuid, 0, nbuf, ':');
> +       printf("  Cache Devs : %u\n", c->nr_in_set);
> +       /* FIXME: list all cache dev uuids in the load_container case */
> +       printf(" Device UUID : %s\n", nbuf + 5);
> +       printf("       Flags :%s%s\n", CACHE_DISCARD(c) ? " discard" : "",
> +                                      CACHE_SYNC(c) ? " sync" : "");
> +       if (SB_BDEV(c)) {
> +               printf("       State : %s\n", bdev_states[BDEV_STATE(c)]);
> +               printf("        Mode : %s\n", bdev_modes[BDEV_CACHE_MODE(c)]);
> +       } else {
> +               printf("      Policy : %s\n", cache_policies[CACHE_REPLACEMENT(c)]);
> +               /* FIXME: add reporting of backing device uuids in the cache caase */
> +       }
> +       printf("       Label : %.32s\n", c->label);
> +       csum = __le64_to_cpu(c->csum);
> +       nbuckets = __le64_to_cpu(c->nbuckets);
> +       bucket_size = __le16_to_cpu(c->bucket_size);
> +       first_bucket = __le16_to_cpu(c->first_bucket);
> +       sz = (nbuckets - first_bucket) * bucket_size;
> +       printf(" Device Size : %llu%s\n", sz, human_size(sz * 512));
> +       printf(" Bucket Size : %u\n", bucket_size);
> +       printf(" Num Buckets : %llu\n", (unsigned long long) nbuckets);
> +       printf("    this dev : %u\n", __le16_to_cpu(c->nr_this_dev));
> +       printf("First Bucket : %u\n", first_bucket);
> +       printf("    Checksum : %llx %s\n", (unsigned long long) csum,
> +              csum == csum_set(c) ? "correct" : "incorrect");
> +}
> +
> +static void brief_examine_bcache(struct supertype *st, int verbose)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64];
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf("ARRAY metadata=bcache UUID=%s\n", nbuf + 5);
> +}
> +
> +static void brief_examine_subarrays_bcache(struct supertype *st, int verbose)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64], nbuf1[64];
> +
> +       /* FIXME this needs to parse the cache device journal to find
> +        * and report the backing dev uuid list
> +        */
> +       if (!SB_BDEV(c))
> +               return;
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       __fname_from_uuid((int *) c->uuid, 0, nbuf1, ':');
> +
> +       printf("ARRAY container=%s UUID=%s\n", nbuf + 5, nbuf1 + 5);
> +}
> +
> +static void export_examine_bcache(struct supertype *st)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64];
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf("MD_METADATA=bcache\n");
> +       printf("MD_LEVEL=container\n");
> +       printf("MD_UUID=%s\n", nbuf+5);
> +       printf("MD_DEVICES=%d\n", __le16_to_cpu(c->nr_in_set) + 1);
> +}
> +
> +static void detail_bcache(struct supertype *st, char *homehost)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64];
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf("\n           UUID : %s\n", nbuf + 5);
> +}
> +
> +static void brief_detail_bcache(struct supertype *st)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +       char nbuf[64];
> +
> +       __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':');
> +       printf(" UUID=%s", nbuf + 5);
> +}
> +
> +static struct bcache_super *alloc_super(const char *func)
> +{
> +       struct bcache_super *super = calloc(1, sizeof(*super));
> +       struct dl *d = calloc(1, sizeof(*d));
> +
> +       if (!super || !d) {
> +               fprintf(stderr, Name "%s: %s failed\n", func, __func__);
> +               free(super);
> +               free(d);
> +               return NULL;
> +       }
> +
> +       super->vol = -1;
> +       super->disk = d;
> +
> +       return super;
> +}
> +
> +static int load_container_bcache(struct supertype *st, int fd, char *devname)
> +{
> +       struct bcache_super *list = NULL;
> +       int rc, i, cdev = 0, bdev = 0;
> +       int devnum = fd2devnum(fd);
> +       struct mdinfo *sra, *sd;
> +
> +       sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE);
> +       if (!sra)
> +               return 1;
> +
> +       if (sra->array.major_version != -1 ||
> +           sra->array.minor_version != -2 ||
> +           strcmp(sra->text_version, "bcache") != 0) {
> +               rc = 1;
> +               goto error;
> +       }
> +
> +       for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) {
> +               struct bcache_super *super = alloc_super(__func__);
> +               struct cache_sb *c;
> +               char nm[32];
> +               int fd;
> +
> +               rc = 1;
> +               if (!super)
> +                       goto error;
> +               super->next = list;
> +               list = super;
> +
> +               rc = 2;
> +               sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor);
> +               fd = dev_open(nm, O_RDWR);
> +               if (fd < 0)
> +                       goto error;
> +
> +               super->disk->fd = fd;
> +               rc = load_cache_sb(super, 1);
> +               if (rc)
> +                       goto error;
> +               c = super->sb;
> +               if (SB_BDEV(c))
> +                       bdev++;
> +               else
> +                       cdev++;
> +       }
> +       rc = 0;
> +
> +       /* FIXME disambiguate multiple bdevs per set, support multiple
> +        * cache devices
> +        */
> +       if (bdev > 1) {
> +               fprintf(stderr, Name ": %d backing devices detected\n", bdev);
> +               rc = 3;
> +       }
> +       if (cdev > 1) {
> +               fprintf(stderr, Name ": %d cache devices detected\n", cdev);
> +               rc = 3;
> +       }
> +       if (rc)
> +               goto error;
> +       st->sb = list;
> +       list = NULL;
> +
> +error:
> +       if (list)
> +               __free_bcache(list);
> +       sysfs_free(sra);
> +
> +       st->container_dev = devnum;
> +       if (rc == 0 && st->ss == NULL) {
> +               st->ss = &super_bcache;
> +               st->minor_version = 0;
> +               st->max_devs = BCACHE_MAX_DEVICES;
> +       }
> +       return rc;
> +}
> +#endif
> +
> +static int load_bcache(struct supertype *st, int fd, char *devname)
> +{
> +       struct bcache_super *super;
> +       struct dl *d;
> +       int rc;
> +
> +       free_bcache(st);
> +
> +       super = alloc_super(__func__);
> +       if (!super)
> +               return 1;
> +
> +       st->sb = super;
> +       d = super->disk;
> +       d->devname = devname ? strdup(devname) : NULL;
> +       d->fd = fd;
> +       rc = load_cache_sb(super, 0);
> +       if (rc) {
> +               free_bcache(st);
> +               if (!devname)
> +                       return rc;
> +               fprintf(stderr, Name ": %s failed on %s (%s)\n", __func__,
> +                       devname, strerror(rc));
> +               return rc;
> +       }
> +
> +       if (st->ss == NULL) {
> +               st->ss = &super_bcache;
> +               st->minor_version = 0;
> +               st->max_devs = BCACHE_MAX_DEVICES;
> +       }
> +
> +       return 0;
> +}
> +
> +static int store_bcache(struct supertype *st, int fd)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +
> +       if (!c)
> +               return 1;
> +
> +       if (pwrite(fd, c, sizeof(*c), SB_SECTOR << 9) != sizeof(*c))
> +               return 1;
> +
> +       return 0;
> +}
> +
> +static int compare_bcache(struct supertype *st, struct supertype *tst)
> +{
> +       struct bcache_super *a = st->sb;
> +       struct bcache_super *b = tst->sb;
> +
> +        if (!st->sb) {
> +                st->sb = tst->sb;
> +                tst->sb = NULL;
> +                return 0;
> +        }
> +
> +       if (memcmp(a->sb->set_uuid, b->sb->set_uuid, sizeof(b->sb->set_uuid)) != 0)
> +               return 2;
> +
> +       return 0;
> +}
> +
> +static __u64 avail_size_bcache(struct supertype *st, __u64 devsize)
> +{
> +       /* 4k from start, 8k min data offset */
> +       const uint32_t reserved_sectors = (4+8) * 2;
> +
> +       if (devsize < reserved_sectors)
> +               return 0;
> +
> +       return devsize - reserved_sectors;
> +}
> +
> +static struct supertype *match_metadata_desc_bcache(char *arg)
> +{
> +       struct supertype *st;
> +
> +       if (strcmp(arg, "bcache") != 0 &&
> +           strcmp(arg, "default") != 0)
> +               return NULL;
> +
> +       st = calloc(1, sizeof(*st));
> +       if (!st)
> +               return NULL;
> +       st->container_dev = NoMdDev;
> +       st->ss = &super_bcache;
> +       st->max_devs = BCACHE_MAX_DEVICES;
> +       st->minor_version = 0;
> +       st->sb = NULL;
> +
> +       return st;
> +}
> +
> +static int match_home_bcache(struct supertype *st, char *homehost)
> +{
> +       /* the bcache superblock does not specify any host
> +        * identification information.  maybe it should...
> +        */
> +
> +       return -1;
> +}
> +
> +static void uuid_from_bcache(struct supertype *st, int uuid[4])
> +{
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +
> +       memcpy(uuid, c->set_uuid, sizeof(c->set_uuid));
> +}
> +
> +static void getinfo_bcache_volume(struct supertype *st, struct mdinfo *info, int map_disks, char *dmap)
> +{
> +       char *name = devnum2devname(st->container_dev);
> +       struct bcache_super *super = st->sb;
> +       uint16_t bucket_size, first_bucket;
> +       struct cache_sb *c = super->sb;
> +       unsigned long long sz;
> +       uint64_t nbuckets;
> +
> +       nbuckets = __le64_to_cpu(c->nbuckets);
> +       bucket_size = __le16_to_cpu(c->bucket_size);
> +       first_bucket = __le16_to_cpu(c->first_bucket);
> +       sz = (nbuckets - first_bucket) * bucket_size;
> +
> +       info->container_member    = super->vol;
> +       info->custom_array_size   = sz;
> +       info->component_size      = sz;
> +       info->recovery_start      = MaxSector;
> +       info->data_offset         = SB_SECTOR + SB_SIZE;
> +       sprintf(info->text_version, "/%s/%d", name, super->vol);
> +       snprintf(info->name, sizeof(info->name), "%s", c->label);
> +       memcpy(info->uuid, c->uuid, sizeof(c->uuid));
> +
> +       info->array.raid_disks    = __le16_to_cpu(c->nr_in_set) + 1;
> +       info->array.level         = LEVEL_BCACHE;
> +       info->array.layout        = 0;
> +       info->array.md_minor      = -1;
> +       info->array.ctime         = 0;
> +       info->array.utime         = 0;
> +       info->array.chunk_size    = bucket_size * 512;
> +       info->array.major_version = -1;
> +       info->array.minor_version = -2;
> +
> +       info->disk.major = 0;
> +       info->disk.minor = 0;
> +       info->disk.raid_disk = SB_BDEV(c);
> +       info->disk.number = SB_BDEV(c);
> +       info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
> +}
> +
> +static void getinfo_bcache(struct supertype *st, struct mdinfo *info, char *dmap)
> +{
> +       int i, cset, bdev, map_disks = info->array.raid_disks;
> +       struct bcache_super *super = st->sb;
> +       struct cache_sb *c = super->sb;
> +
> +       memset(info, 0, sizeof(*info));
> +
> +       if (super->vol >= 0)
> +               return getinfo_bcache_volume(st, info, map_disks, dmap);
> +
> +       /* make Assemble choose the cache target */
> +       info->events = SB_BDEV(c);
> +       info->recovery_start = MaxSector;
> +       info->data_offset = SB_SECTOR;
> +       info->component_size = SB_SIZE;
> +       strcpy(info->text_version, "bcache");
> +       memcpy(info->uuid, c->set_uuid, sizeof(c->set_uuid));
> +
> +       info->array.raid_disks    = __le16_to_cpu(c->nr_in_set) + 1;
> +       info->array.level         = LEVEL_CONTAINER;
> +       info->array.layout        = 0;
> +       info->array.md_minor      = -1;
> +       info->array.ctime         = 0;
> +       info->array.utime         = 0;
> +       info->array.chunk_size    = __le16_to_cpu(c->bucket_size) * 512;
> +       info->array.major_version = -1;
> +       info->array.minor_version = -2;
> +
> +       info->disk.major = 0;
> +       info->disk.minor = 0;
> +       info->disk.raid_disk = SB_BDEV(c);
> +       info->disk.number = SB_BDEV(c);
> +       /* FIXME: need bcache superblock to identify failed devices */
> +       info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC;
> +
> +       /* FIXME need to parse the journal uuid_bucket to understand
> +        * which cache devs are consistent with the set
> +        */
> +       for (i = 0; dmap && i < map_disks; i++)
> +               dmap[i] = 1;
> +
> +       cset = 0;
> +       bdev = 0;
> +       while (super) {
> +               c = super->sb;
> +
> +               /* FIXME filter out-of-sync devices */
> +               if (SB_BDEV(c))
> +                       bdev++;
> +               else
> +                       cset++;
> +               super = super->next;
> +       }
> +
> +       if (cset + bdev == __le16_to_cpu(c->nr_in_set) + 1)
> +               info->container_enough = 1;
> +       else
> +               info->container_enough = -1;
> +}
> +
> +static int update_bcache(struct supertype *st, struct mdinfo *i, char *update,
> +                        char *devname, int verbose, int uuid_set, char *homehost)
> +{
> +       /* FIXME */
> +       if (strcmp(update, "grow") == 0) {
> +               return 0;
> +       } else if (strcmp(update, "resync") == 0) {
> +               return 0;
> +       } else if (strcmp(update, "homehost") == 0) {
> +               return -1;
> +       } else if (strcmp(update, "name") == 0) {
> +               return -1;
> +       } else if (strcmp(update, "_reshape_progress") == 0) {
> +               return 0;
> +       } else if (strcmp(update, "assemble") == 0 ) {
> +               return 0;
> +       } else {
> +               return -1;
> +       }
> +}
> +
> +static struct mdinfo *container_content_bcache(struct supertype *st, char *subarray)
> +{
> +       struct bcache_super *super = st->sb;
> +       struct mdinfo *info, *disk = NULL;
> +       char *ep;
> +
> +       info = calloc(1, sizeof(*info));
> +       if (!info) {
> +               fprintf(stderr, Name ": failed to allocate %zu bytes\n",
> +                       sizeof(*info));
> +               return NULL;
> +       }
> +
> +       /* don't support multiple backing disks per cache set */
> +       if (subarray && (strtoul(subarray, &ep, 10) > 0 || *ep != '\0'))
> +               goto error;
> +
> +       super->vol = 0;
> +       getinfo_bcache(st, info, NULL);
> +
> +       for (; super; super = super->next) {
> +               struct dl *d = super->disk;
> +               struct cache_sb *c = super->sb;
> +
> +               disk = calloc(1, sizeof(*disk));
> +               if (!disk) {
> +                       fprintf(stderr, Name ": failed to allocate disk\n");
> +                       goto error;
> +               }
> +               disk->next = info->devs;
> +               info->devs = disk;
> +
> +               disk->disk.number = SB_BDEV(c);
> +               disk->disk.raid_disk = SB_BDEV(c);
> +               disk->disk.major = d->major;
> +               disk->disk.minor = d->minor;
> +               disk->recovery_start = MaxSector;
> +               disk->disk.state = 1 << MD_DISK_ACTIVE;
> +               disk->data_offset = info->data_offset;
> +               disk->component_size = info->component_size;
> +
> +               info->array.working_disks++;
> +       }
> +
> +       return info;
> +
> + error:
> +       disk = info->devs;
> +       while (disk) {
> +               struct mdinfo *next = disk->next;
> +
> +               free(disk);
> +               disk = next;
> +       }
> +
> +       free(info);
> +       return NULL;
> +}
> +
> +
> +struct superswitch super_bcache = {
> +#ifndef        MDASSEMBLE
> +       .examine_super           = examine_bcache,
> +       .brief_examine_super     = brief_examine_bcache,
> +       .brief_examine_subarrays = brief_examine_subarrays_bcache,
> +       .export_examine_super    = export_examine_bcache,
> +       .detail_super            = detail_bcache,
> +       .brief_detail_super      = brief_detail_bcache,
> +       .load_container          = load_container_bcache,
> +#endif
> +       .match_home              = match_home_bcache,
> +       .uuid_from_super         = uuid_from_bcache,
> +       .getinfo_super           = getinfo_bcache,
> +       .update_super            = update_bcache,
> +
> +       .avail_size              = avail_size_bcache,
> +
> +       .compare_super           = compare_bcache,
> +
> +       .load_super              = load_bcache,
> +       .store_super             = store_bcache,
> +       .free_super              = free_bcache,
> +       .match_metadata_desc     = match_metadata_desc_bcache,
> +       .container_content       = container_content_bcache,
> +
> +       .external                = 1,
> +       .name                    = "bcache",
> +};
> diff --git a/util.c b/util.c
> index 6985a70..d9e49cf 100644
> --- a/util.c
> +++ b/util.c
> @@ -919,7 +919,7 @@ struct superswitch *superlist[] =
>  {
>        &super0, &super1,
>        &super_ddf, &super_imsm,
> -       &mbr, &gpt,
> +       &mbr, &gpt, &super_bcache,
>        NULL };
>
>  #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-bcache" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [mdadm PATCH] bcache: add bcache superblock
       [not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>
  2012-05-12  7:38   ` Jack Wang
@ 2012-05-15  0:04   ` Mark Hills
       [not found]     ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
  1 sibling, 1 reply; 6+ messages in thread
From: Mark Hills @ 2012-05-15  0:04 UTC (permalink / raw)
  To: Dan Williams
  Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
	linux-raid-u79uwXL29TY76Z2rM5mHXA,
	linux-bcache-u79uwXL29TY76Z2rM5mHXA

On Fri, 11 May 2012, Dan Williams wrote:

> This is a hybrid proposal for supporting bcache as a md device.
> Somewhat similar to the v1.x metadata format, where array assembly is
> handled in userspace, but managed in the kernel.  In the bcache case it
> is an "external" metadata format, but then the expectation is that the
> kernel "bcache" personality takes over runtime maintenance of the
> metadata.

I am having some trouble with this, can you clarify (perhaps by example) 
how to create a pairing of a cache and backing device?

I tried creating directly:

  # mdadm --create /dev/md0 --level=11 --raid-devices=2 /dev/sdb /dev/sdc 
  mdadm: unknown level 11

But as the code in Assemble.c is patched (and not Create.c), I had a hunch 
to format the devices as before then assemble the array:

  # make-bcache -C /dev/sdb
  # make-bcache -B /dev/sdc
  # mdadm -A /dev/md0 /dev/sdb /dev/sdc 
  mdadm: Cannot assemble mbr metadata on /dev/sdb
  mdadm: /dev/sdb has no superblock - assembly aborted

For a hack, I changed Create.c to accept case 11 and leave the chosen 
chunk size. It got further, but then failed with:

  # mdadm --create /dev/md/bcache --level=11 --raid-devices=2 /dev/sdb /dev/sdc 
  mdadm: /dev/sdb appears to be part of a raid array:
      level=raid0 devices=0 ctime=Thu Jan  1 01:00:00 1970
  mdadm: partition table exists on /dev/sdb but will be lost or
         meaningless after creating array
  mdadm: /dev/sdc appears to be part of a raid array:
      level=raid0 devices=0 ctime=Thu Jan  1 01:00:00 1970
  mdadm: partition table exists on /dev/sdc but will be lost or
         meaningless after creating array
  mdadm: largest drive (/dev/sdc) exceeds size (117155216K) by more than 1%
  Continue creating array? y
  mdadm: Defaulting to version 1.2 metadata
  mdadm: RUN_ARRAY failed: Cannot allocate memory

Maybe I missed something basic here, but I'm afraid can't see what? If 
not, hopefully this information is useful.

I use bcache v13 patches and the MD conversion, and this patch to mdadm.

  $ lsmod | grep bcache
  md_bcache               3202  0 
  bcache                138187  1 md_bcache
  md_mod                 88671  5 md_bcache,raid456,raid1

  $ uname -a
  Linux stax 3.4.0-rc7-mh+ #190 SMP PREEMPT Sun May 13 22:36:17 BST 2012 i686 Intel(R) Pentium(R) D CPU 2.80GHz GenuineIntel GNU/Linux

Thanks

-- 
Mark

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [mdadm PATCH] bcache: add bcache superblock
       [not found]     ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
@ 2012-05-15 16:59       ` Dan Williams
       [not found]         ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Dan Williams @ 2012-05-15 16:59 UTC (permalink / raw)
  To: Mark Hills
  Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
	linux-raid-u79uwXL29TY76Z2rM5mHXA,
	linux-bcache-u79uwXL29TY76Z2rM5mHXA

On Mon, May 14, 2012 at 5:04 PM, Mark Hills <mark-UrrBsZIrrsb10XsdtD+oqA@public.gmane.org> wrote:
> On Fri, 11 May 2012, Dan Williams wrote:
>
>> This is a hybrid proposal for supporting bcache as a md device.
>> Somewhat similar to the v1.x metadata format, where array assembly is
>> handled in userspace, but managed in the kernel.  In the bcache case it
>> is an "external" metadata format, but then the expectation is that the
>> kernel "bcache" personality takes over runtime maintenance of the
>> metadata.
>
> I am having some trouble with this, can you clarify (perhaps by example)
> how to create a pairing of a cache and backing device?
>
> I tried creating directly:
>
>  # mdadm --create /dev/md0 --level=11 --raid-devices=2 /dev/sdb /dev/sdc
>  mdadm: unknown level 11
>
> But as the code in Assemble.c is patched (and not Create.c), I had a hunch
> to format the devices as before then assemble the array:

Yeah, "create" support is not there yet.

>  # make-bcache -C /dev/sdb
>  # make-bcache -B /dev/sdc
>  # mdadm -A /dev/md0 /dev/sdb /dev/sdc
>  mdadm: Cannot assemble mbr metadata on /dev/sdb

I should have been more explicit in the changelog.  This current patch
was only tested to assemble an existing bcache configuration.  I.e. it
assumes the backing device has been attached to the cache set at least
once.  By default make-bcache always creates a new cache-set id per
invocation.  So in the above example it won't find sdb and sdc belong
to the same md device because the cache-set id's differ.  You can
verify this with mdadm -E.

[..]
>
> Maybe I missed something basic here, but I'm afraid can't see what? If
> not, hopefully this information is useful.
>
> I use bcache v13 patches and the MD conversion, and this patch to mdadm.

For reference:

[root@fedora-virt ~]# mdadm -E /dev/vd[bc]
/dev/vdb:
       Magic : <bcache>
     Version : 3
        Role : cache
    Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1
  Cache Devs : 1
 Device UUID : a1b25e26:5a4c4db6:2716c1aa:eba44ec7
       Flags : sync
      Policy : lru
       Label :
 Device Size : 16776192 (8.00 GiB 8.59 GB)
 Bucket Size : 1024
 Num Buckets : 16384
    this dev : 0
First Bucket : 1
    Checksum : 45fa97cda53b0b6 correct
/dev/vdc:
       Magic : <bcache>
     Version : 1
        Role : backing-device
    Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1
  Cache Devs : 1
 Device UUID : 7045ff87:aa4b13c4:670d8bb2:0b4ad628
       Flags : sync
       State : clean
        Mode : writeback
       Label :
 Device Size : 16776192 (8.00 GiB 8.59 GB)
 Bucket Size : 1024
 Num Buckets : 16384
    this dev : 0
First Bucket : 1
    Checksum : f36910e51451d91c correct
[root@fedora-virt ~]# mdadm -Avvv /dev/md/bcache /dev/vd[bc]
mdadm: looking for devices for /dev/md/bcache
mdadm: /dev/vdb is identified as a member of /dev/md/bcache, slot 0.
mdadm: /dev/vdc is identified as a member of /dev/md/bcache, slot 1.
mdadm: added /dev/vdb to /dev/md/bcache as 0
mdadm: added /dev/vdc to /dev/md/bcache as 1
mdadm: Container /dev/md/bcache has been assembled with 2 drives
[root@fedora-virt ~]# cat /proc/mdstat
Personalities : [bcache]
md126 : active bcache vdb[1] vdc[0]
      8388096 blocks super external:/md127/0 8388096k cache-blocks

md127 : inactive vdc[1](S) vdb[0](S)
      16 blocks super external:bcache

unused devices: <none>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [mdadm PATCH] bcache: add bcache superblock
       [not found]         ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2012-05-15 21:16           ` Mark Hills
       [not found]             ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Hills @ 2012-05-15 21:16 UTC (permalink / raw)
  To: Dan Williams
  Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
	linux-raid-u79uwXL29TY76Z2rM5mHXA,
	linux-bcache-u79uwXL29TY76Z2rM5mHXA

[-- Attachment #1: Type: TEXT/PLAIN, Size: 3396 bytes --]

On Tue, 15 May 2012, Dan Williams wrote:

> On Mon, May 14, 2012 at 5:04 PM, Mark Hills <mark-UrrBsZIrrsb10XsdtD+oqA@public.gmane.org> wrote:
> > On Fri, 11 May 2012, Dan Williams wrote:
> >
> >> This is a hybrid proposal for supporting bcache as a md device.
> >> Somewhat similar to the v1.x metadata format, where array assembly is
> >> handled in userspace, but managed in the kernel.  In the bcache case it
> >> is an "external" metadata format, but then the expectation is that the
> >> kernel "bcache" personality takes over runtime maintenance of the
> >> metadata.
> >
> > I am having some trouble with this, can you clarify (perhaps by example)
> > how to create a pairing of a cache and backing device?
[...]
> >  # make-bcache -C /dev/sdb
> >  # make-bcache -B /dev/sdc
> >  # mdadm -A /dev/md0 /dev/sdb /dev/sdc
> >  mdadm: Cannot assemble mbr metadata on /dev/sdb
> 
> I should have been more explicit in the changelog.  This current patch
> was only tested to assemble an existing bcache configuration.  I.e. it
> assumes the backing device has been attached to the cache set at least
> once.  By default make-bcache always creates a new cache-set id per
> invocation.  So in the above example it won't find sdb and sdc belong
> to the same md device because the cache-set id's differ.  You can
> verify this with mdadm -E.

Thanks for explaining. I suppose this is just expecially awkward at the 
moment because attaching the devices can't be done without a different 
kernel.

So it will be good to see the "create" support, when it is ready.

> [..]
> >
> > Maybe I missed something basic here, but I'm afraid can't see what? If
> > not, hopefully this information is useful.
> >
> > I use bcache v13 patches and the MD conversion, and this patch to mdadm.
> 
> For reference:
> 
> [root@fedora-virt ~]# mdadm -E /dev/vd[bc]
> /dev/vdb:
>        Magic : <bcache>
>      Version : 3
>         Role : cache
>     Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1
>   Cache Devs : 1
>  Device UUID : a1b25e26:5a4c4db6:2716c1aa:eba44ec7
>        Flags : sync
>       Policy : lru
>        Label :
>  Device Size : 16776192 (8.00 GiB 8.59 GB)
>  Bucket Size : 1024
>  Num Buckets : 16384
>     this dev : 0
> First Bucket : 1
>     Checksum : 45fa97cda53b0b6 correct
> /dev/vdc:
>        Magic : <bcache>
>      Version : 1
>         Role : backing-device
>     Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1
>   Cache Devs : 1
>  Device UUID : 7045ff87:aa4b13c4:670d8bb2:0b4ad628
>        Flags : sync
>        State : clean
>         Mode : writeback
>        Label :
>  Device Size : 16776192 (8.00 GiB 8.59 GB)
>  Bucket Size : 1024
>  Num Buckets : 16384
>     this dev : 0
> First Bucket : 1
>     Checksum : f36910e51451d91c correct

Presumably there's a little more too this, as I can't even investigate the 
UIDs in use:

  # make-bcache -C /dev/sdb
  [...]
  # make-bcache -B /dev/sdc
  [...]
  # mdadm -E /dev/sd[bc]
  /dev/sdb:
     MBR Magic : aa55
  /dev/sdc:
     MBR Magic : aa55

I confirmed that I'm using the patched mdadm, but it still doesn't seem to 
recognise the magic.

But, of course, I understand this is all work in progress. So this isn't a 
complaint at all, I am just looking forward to code in this area and happy 
to test where I can.

Thanks

-- 
Mark

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [mdadm PATCH] bcache: add bcache superblock
       [not found]             ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
@ 2012-07-12 15:03               ` Jacek Danecki
  0 siblings, 0 replies; 6+ messages in thread
From: Jacek Danecki @ 2012-07-12 15:03 UTC (permalink / raw)
  To: Mark Hills
  Cc: Dan Williams, neilb-l3A5Bk7waGM,
	koverstreet-hpIqsD4AKlfQT0dZR+AlfA,
	linux-raid-u79uwXL29TY76Z2rM5mHXA,
	linux-bcache-u79uwXL29TY76Z2rM5mHXA

On 05/15/12 23:16, Mark Hills wrote:
> Presumably there's a little more too this, as I can't even investigate the
> UIDs in use:
>
>    # make-bcache -C /dev/sdb
>    [...]
>    # make-bcache -B /dev/sdc
>    [...]
>    # mdadm -E /dev/sd[bc]
>    /dev/sdb:
>       MBR Magic : aa55
>    /dev/sdc:
>       MBR Magic : aa55
>
> I confirmed that I'm using the patched mdadm, but it still doesn't seem to
> recognise the magic.

As I can see the problem could be in mdadm which can recognize mbr superblock
if partition table exists on disk, and then uses load_super_mbr() instead of load_bcache().
You can look at guess_super_type() function in util.c file. See log below.
I've attached small patch, which I used to find out where issue is.
Btw, I've not idea why getinfo_super() for imsm and bcache set info.array.ctime always to 0.
Probably it should be fixed also because this value is used by guess_super_type() function.

		rv = ss->load_super(st, fd, NULL);
		if (rv == 0) {
			struct mdinfo info;
			st->ss->getinfo_super(st, &info, NULL);
			if (bestsuper == -1 ||
			    besttime < info.array.ctime) {
				bestsuper = i;
				besttime = info.array.ctime;
			}
			ss->free_super(st);
		}


  ./mdadm -E /dev/sdb
check 0.90 0x685a60
load super
check 1.x 0x685be0
load super
check ddf 0x686060
load super
check imsm 0x686240
load super
check mbr 0x686400
load super
getinfo_super for mbr bestsuper=-1
set bestsuper to 4
check gpt 0x686580
load super
check bcache 0x686700
load super
load_cache_sb
getinfo_super for bcache bestsuper=4
load super again: mbr 0x45a3c1
load_super: 0x686400 mbr 0x45a3c1
/dev/sdb: 0x686400 mbr 0x45a3c1
    MBR Magic : aa55

To workaround this issue I've destroyed partition table on /dev/sdb,
and now mdadm can examine bcache superblock correctly.

./mdadm -E /dev/sdb
check 0.90 0x685a60
load super
check 1.x 0x685be0
load super
check ddf 0x686060
load super
check imsm 0x686240
load super
check mbr 0x686400
load super
check gpt 0x686580
load super
check bcache 0x686700
load super
load_cache_sb
getinfo_super for bcache bestsuper=-1
set bestsuper to 6
load super again: bcache 0x45bb6b
load_cache_sb
load_super: 0x686700 bcache 0x45bb6b
load_cache_sb
/dev/sdb: 0x686700 bcache 0x45bb6b
        Magic : <bcache>
      Version : 0
         Role : cache
     Set UUID : 1eefbc0e:e4405d53:e6c630a7:83b523c0
   Cache Devs : 1
  Device UUID : 4a9e3223:cb403537:b05209b0:26948206
        Flags :
       Policy : lru
        Label :
  Device Size : 234439680 (111.79 GiB 120.03 GB)
  Bucket Size : 1024
  Num Buckets : 228946
     this dev : 0
First Bucket : 1
     Checksum : 6730f38d517c4db2 correct


diff --git a/Examine.c b/Examine.c
index 5d71e53..5173f4a 100644
--- a/Examine.c
+++ b/Examine.c
@@ -88,10 +88,12 @@ int Examine(struct mddev_dev *devlist, int brief, int export, int scan,
                         if (st) {
                                 err = 1;
                                 st->ignore_hw_compat = 1;
-                               if (!container)
+                               if (!container) {
+                                       printf("load_super: %p %s %p\n", st->ss, st->ss->name, st->ss->load_super);
                                         err = st->ss->load_super(st, fd,
                                                                  (brief||scan) ? NULL
                                                                  :devlist->devname);
+                               }
                                 if (err && st->ss->load_container) {
                                         err = st->ss->load_container(st, fd,
                                                                  (brief||scan) ? NULL
@@ -149,7 +151,7 @@ int Examine(struct mddev_dev *devlist, int brief, int export, int scan,
                                 st->ss->export_examine_super(st);
                         st->ss->free_super(st);
                 } else {
-                       printf("%s:\n",devlist->devname);
+                       printf("%s: %p %s %p\n",devlist->devname, st->ss, st->ss->name, st->ss->load_super);
                         st->ss->examine_super(st, homehost);
                         st->ss->free_super(st);
                 }
diff --git a/super-bcache.c b/super-bcache.c
index ec8f3db..ebee248 100644
--- a/super-bcache.c
+++ b/super-bcache.c
@@ -72,6 +72,7 @@ static int load_cache_sb(struct bcache_super *super, int keep_fd)
         if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0)
                 return ENODEV;

+       printf("load_cache_sb\n");
         return 0;
  }

diff --git a/util.c b/util.c
index d9e49cf..5aaa986 100644
--- a/util.c
+++ b/util.c
@@ -1043,19 +1043,23 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
         for (i=0 ; superlist[i]; i++) {
                 int rv;
                 ss = superlist[i];
+               printf("check %s %p\n", ss->name, ss);
                 if (guess_type == guess_array && ss->add_to_super == NULL)
                         continue;
                 if (guess_type == guess_partitions && ss->add_to_super != NULL)
                         continue;
                 memset(st, 0, sizeof(*st));
                 st->ignore_hw_compat = 1;
+               printf("load super\n");
                 rv = ss->load_super(st, fd, NULL);
                 if (rv == 0) {
                         struct mdinfo info;
+                       printf("getinfo_super for %s bestsuper=%d\n", st->ss->name, bestsuper);
                         st->ss->getinfo_super(st, &info, NULL);
                         if (bestsuper == -1 ||
                             besttime < info.array.ctime) {
                                 bestsuper = i;
+                               printf("set bestsuper to %d\n", i);
                                 besttime = info.array.ctime;
                         }
                         ss->free_super(st);
@@ -1065,6 +1069,7 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type)
                 int rv;
                 memset(st, 0, sizeof(*st));
                 st->ignore_hw_compat = 1;
+               printf("load super again: %s %p\n", superlist[bestsuper]->name, superlist[bestsuper]->load_super);
                 rv = superlist[bestsuper]->load_super(st, fd, NULL);
                 if (rv == 0) {
                         superlist[bestsuper]->free_super(st);
-- 
Jacek

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-07-12 15:03 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-11 20:39 [mdadm PATCH] bcache: add bcache superblock Dan Williams
     [not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>
2012-05-12  7:38   ` Jack Wang
2012-05-15  0:04   ` Mark Hills
     [not found]     ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
2012-05-15 16:59       ` Dan Williams
     [not found]         ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2012-05-15 21:16           ` Mark Hills
     [not found]             ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>
2012-07-12 15:03               ` Jacek Danecki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.