All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andreas Hindborg <nmi@metaspace.dk>
To: Jens Axboe <axboe@kernel.dk>, Christoph Hellwig <hch@lst.de>,
	Keith Busch <kbusch@kernel.org>,
	Damien Le Moal <Damien.LeMoal@wdc.com>,
	Bart Van Assche <bvanassche@acm.org>,
	Hannes Reinecke <hare@suse.de>,
	"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>
Cc: "Andreas Hindborg" <a.hindborg@samsung.com>,
	"Niklas Cassel" <Niklas.Cassel@wdc.com>,
	"Greg KH" <gregkh@linuxfoundation.org>,
	"Matthew Wilcox" <willy@infradead.org>,
	"Miguel Ojeda" <ojeda@kernel.org>,
	"Alex Gaynor" <alex.gaynor@gmail.com>,
	"Wedson Almeida Filho" <wedsonaf@gmail.com>,
	"Boqun Feng" <boqun.feng@gmail.com>,
	"Gary Guo" <gary@garyguo.net>,
	"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
	"Benno Lossin" <benno.lossin@proton.me>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"Chaitanya Kulkarni" <chaitanyak@nvidia.com>,
	"Luis Chamberlain" <mcgrof@kernel.org>,
	"Yexuan Yang" <1182282462@bupt.edu.cn>,
	"Sergio González Collado" <sergio.collado@gmail.com>,
	"Joel Granados" <j.granados@samsung.com>,
	"Pankaj Raghav (Samsung)" <kernel@pankajraghav.com>,
	"Daniel Gomez" <da.gomez@samsung.com>,
	"open list" <linux-kernel@vger.kernel.org>,
	"rust-for-linux@vger.kernel.org" <rust-for-linux@vger.kernel.org>,
	"lsf-pc@lists.linux-foundation.org"
	<lsf-pc@lists.linux-foundation.org>,
	"gost.dev@samsung.com" <gost.dev@samsung.com>
Subject: [RFC PATCH 4/5] rust: block: add rnull, Rust null_blk implementation
Date: Wed, 13 Mar 2024 12:05:11 +0100	[thread overview]
Message-ID: <20240313110515.70088-5-nmi@metaspace.dk> (raw)
In-Reply-To: <20240313110515.70088-1-nmi@metaspace.dk>

From: Andreas Hindborg <a.hindborg@samsung.com>

Signed-off-by: Andreas Hindborg <a.hindborg@samsung.com>
---
 drivers/block/Kconfig  |   4 +
 drivers/block/Makefile |   3 +
 drivers/block/rnull.rs | 323 +++++++++++++++++++++++++++++++++++++++++
 rust/helpers.c         |   1 +
 scripts/Makefile.build |   2 +-
 5 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 drivers/block/rnull.rs

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 5b9d4aaebb81..fb877d4f8ddf 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -354,6 +354,10 @@ config VIRTIO_BLK
 	  This is the virtual block driver for virtio.  It can be used with
           QEMU based VMMs (like KVM or Xen).  Say Y or M.
 
+config BLK_DEV_RUST_NULL
+	tristate "Rust null block driver"
+	depends on RUST
+
 config BLK_DEV_RBD
 	tristate "Rados block device (RBD)"
 	depends on INET && BLOCK
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 101612cba303..1105a2d4fdcb 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -9,6 +9,9 @@
 # needed for trace events
 ccflags-y				+= -I$(src)
 
+obj-$(CONFIG_BLK_DEV_RUST_NULL) += rnull_mod.o
+rnull_mod-y := rnull.o
+
 obj-$(CONFIG_MAC_FLOPPY)	+= swim3.o
 obj-$(CONFIG_BLK_DEV_SWIM)	+= swim_mod.o
 obj-$(CONFIG_BLK_DEV_FD)	+= floppy.o
diff --git a/drivers/block/rnull.rs b/drivers/block/rnull.rs
new file mode 100644
index 000000000000..05fef30e910c
--- /dev/null
+++ b/drivers/block/rnull.rs
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! This is a Rust implementation of the C null block driver.
+//!
+//! Supported features:
+//!
+//! - optional memory backing
+//! - blk-mq interface
+//! - direct completion
+//! - softirq completion
+//! - timer completion
+//!
+//! The driver is configured at module load time by parameters
+//! `param_memory_backed`, `param_capacity_mib`, `param_irq_mode` and
+//! `param_completion_time_nsec!.
+
+use core::ops::Deref;
+
+use kernel::{
+    bindings,
+    block::{
+        bio::Segment,
+        mq::{self, GenDisk, Operations, RequestDataRef, TagSet},
+    },
+    error::Result,
+    folio::*,
+    hrtimer::{RawTimer, TimerCallback},
+    new_mutex, pr_info,
+    prelude::*,
+    sync::{Arc, Mutex},
+    types::{ARef, ForeignOwnable},
+    xarray::XArray,
+};
+
+use kernel::new_spinlock;
+use kernel::CacheAligned;
+use kernel::sync::SpinLock;
+
+module! {
+    type: NullBlkModule,
+    name: "rnull_mod",
+    author: "Andreas Hindborg",
+    license: "GPL v2",
+    params: {
+        param_memory_backed: bool {
+            default: true,
+            permissions: 0,
+            description: "Use memory backing",
+        },
+        // Problems with pin_init when `irq_mode`
+        param_irq_mode: u8 {
+            default: 0,
+            permissions: 0,
+            description: "IRQ Mode (0: None, 1: Soft, 2: Timer)",
+        },
+        param_capacity_mib: u64 {
+            default: 4096,
+            permissions: 0,
+            description: "Device capacity in MiB",
+        },
+        param_completion_time_nsec: u64 {
+            default: 1_000_000,
+            permissions: 0,
+            description: "Completion time in nano seconds for timer mode",
+        },
+        param_block_size: u16 {
+            default: 4096,
+            permissions: 0,
+            description: "Block size in bytes",
+        },
+    },
+}
+
+#[derive(Debug)]
+enum IRQMode {
+    None,
+    Soft,
+    Timer,
+}
+
+impl TryFrom<u8> for IRQMode {
+    type Error = kernel::error::Error;
+
+    fn try_from(value: u8) -> Result<Self> {
+        match value {
+            0 => Ok(Self::None),
+            1 => Ok(Self::Soft),
+            2 => Ok(Self::Timer),
+            _ => Err(kernel::error::code::EINVAL),
+        }
+    }
+}
+
+struct NullBlkModule {
+    _disk: Pin<Box<Mutex<GenDisk<NullBlkDevice>>>>,
+}
+
+fn add_disk(tagset: Arc<TagSet<NullBlkDevice>>) -> Result<GenDisk<NullBlkDevice>> {
+    let block_size = *param_block_size.read();
+    if block_size % 512 != 0 || !(512..=4096).contains(&block_size) {
+        return Err(kernel::error::code::EINVAL);
+    }
+
+    let irq_mode = (*param_irq_mode.read()).try_into()?;
+
+    let queue_data = Box::pin_init(pin_init!(
+        QueueData {
+            tree <- TreeContainer::new(),
+            completion_time_nsec: *param_completion_time_nsec.read(),
+            irq_mode,
+            memory_backed: *param_memory_backed.read(),
+            block_size,
+        }
+    ))?;
+
+    let block_size = queue_data.block_size;
+
+    let mut disk = GenDisk::try_new(tagset, queue_data)?;
+    disk.set_name(format_args!("rnullb{}", 0))?;
+    disk.set_capacity_sectors(*param_capacity_mib.read() << 11);
+    disk.set_queue_logical_block_size(block_size.into());
+    disk.set_queue_physical_block_size(block_size.into());
+    disk.set_rotational(false);
+    Ok(disk)
+}
+
+impl kernel::Module for NullBlkModule {
+    fn init(_module: &'static ThisModule) -> Result<Self> {
+        pr_info!("Rust null_blk loaded\n");
+        let tagset = Arc::pin_init(TagSet::try_new(1, (), 256, 1))?;
+        let disk = Box::pin_init(new_mutex!(add_disk(tagset)?, "nullb:disk"))?;
+
+        disk.lock().add()?;
+
+        Ok(Self { _disk: disk })
+    }
+}
+
+impl Drop for NullBlkModule {
+    fn drop(&mut self) {
+        pr_info!("Dropping rnullb\n");
+    }
+}
+
+struct NullBlkDevice;
+
+type Tree = XArray<Box<UniqueFolio>>;
+type TreeRef<'a> = &'a Tree;
+
+#[pin_data]
+struct TreeContainer {
+    // `XArray` is safe to use without a lock, as it applies internal locking.
+    // However, there are two reasons to use an external lock: a) cache line
+    // contention and b) we don't want to take the lock for each page we
+    // process.
+    //
+    // A: The `XArray` lock (xa_lock) is located on the same cache line as the
+    // xarray data pointer (xa_head). The effect of this arrangement is that
+    // under heavy contention, we often get a cache miss when we try to follow
+    // the data pointer after acquiring the lock. We would rather have consumers
+    // spinning on another lock, so we do not get a miss on xa_head. This issue
+    // can potentially be fixed by padding the C `struct xarray`.
+    //
+    // B: The current `XArray` Rust API requires that we take the `xa_lock` for
+    // each `XArray` operation. This is very inefficient when the lock is
+    // contended and we have many operations to perform. Eventually we should
+    // update the `XArray` API to allow multiple tree operations under a single
+    // lock acquisition. For now, serialize tree access with an external lock.
+    #[pin]
+    tree: CacheAligned<Tree>,
+    #[pin]
+    lock: CacheAligned<SpinLock<()>>,
+}
+
+impl TreeContainer {
+    fn new() -> impl PinInit<Self> {
+        pin_init!(TreeContainer {
+            tree <- CacheAligned::new_initializer(XArray::new(0)),
+            lock <- CacheAligned::new_initializer(new_spinlock!((), "rnullb:mem")),
+        })
+    }
+}
+
+#[pin_data]
+struct QueueData {
+    #[pin]
+    tree: TreeContainer,
+    completion_time_nsec: u64,
+    irq_mode: IRQMode,
+    memory_backed: bool,
+    block_size: u16,
+}
+
+impl NullBlkDevice {
+    #[inline(always)]
+    fn write(tree: TreeRef<'_>, sector: usize, segment: &Segment<'_>) -> Result {
+        let idx = sector >> bindings::PAGE_SECTORS_SHIFT;
+
+        let mut folio = if let Some(page) = tree.get_locked(idx) {
+            page
+        } else {
+            tree.set(idx, Box::try_new(Folio::try_new(0)?)?)?;
+            tree.get_locked(idx).unwrap()
+        };
+
+        segment.copy_to_folio(&mut folio)?;
+
+        Ok(())
+    }
+
+    #[inline(always)]
+    fn read(tree: TreeRef<'_>, sector: usize, segment: &mut Segment<'_>) -> Result {
+        let idx = sector >> bindings::PAGE_SECTORS_SHIFT;
+
+        if let Some(folio) = tree.get_locked(idx) {
+            segment.copy_from_folio(folio.deref())?;
+        }
+
+        Ok(())
+    }
+
+    #[inline(never)]
+    fn transfer(
+        command: bindings::req_op,
+        tree: TreeRef<'_>,
+        sector: usize,
+        segment: &mut Segment<'_>,
+    ) -> Result {
+        match command {
+            bindings::req_op_REQ_OP_WRITE => Self::write(tree, sector, segment)?,
+            bindings::req_op_REQ_OP_READ => Self::read(tree, sector, segment)?,
+            _ => (),
+        }
+        Ok(())
+    }
+}
+
+#[pin_data]
+struct Pdu {
+    #[pin]
+    timer: kernel::hrtimer::Timer<Self>,
+}
+
+impl TimerCallback for Pdu {
+    type Receiver = RequestDataRef<NullBlkDevice>;
+
+    fn run(this: Self::Receiver) {
+        this.request().end_ok();
+    }
+}
+
+kernel::impl_has_timer! {
+    impl HasTimer<Self> for Pdu { self.timer }
+}
+
+#[vtable]
+impl Operations for NullBlkDevice {
+    type RequestData = Pdu;
+    type RequestDataInit = impl PinInit<Pdu>;
+    type QueueData = Pin<Box<QueueData>>;
+    type HwData = ();
+    type TagSetData = ();
+
+    fn new_request_data(
+        _tagset_data: <Self::TagSetData as ForeignOwnable>::Borrowed<'_>,
+    ) -> Self::RequestDataInit {
+        pin_init!( Pdu {
+            timer <- kernel::hrtimer::Timer::new(),
+        })
+    }
+
+    #[inline(always)]
+    fn queue_rq(
+        _hw_data: (),
+        queue_data: &QueueData,
+        rq: ARef<mq::Request<Self>>,
+        _is_last: bool,
+    ) -> Result {
+        rq.start();
+        if queue_data.memory_backed {
+            let guard = queue_data.tree.lock.lock();
+            let tree = queue_data.tree.tree.deref();
+
+            let mut sector = rq.sector();
+            for bio in rq.bio_iter() {
+                for mut segment in bio.segment_iter() {
+                    Self::transfer(rq.command(), tree, sector, &mut segment)?;
+                    sector += segment.len() >> bindings::SECTOR_SHIFT;
+                }
+            }
+
+            drop(guard);
+        }
+
+
+        match queue_data.irq_mode {
+            IRQMode::None => rq.end_ok(),
+            IRQMode::Soft => rq.complete(),
+            IRQMode::Timer => {
+                mq::Request::owned_data_ref(rq).schedule(queue_data.completion_time_nsec)
+            }
+        }
+
+        Ok(())
+    }
+
+    fn commit_rqs(
+        _hw_data: <Self::HwData as ForeignOwnable>::Borrowed<'_>,
+        _queue_data: <Self::QueueData as ForeignOwnable>::Borrowed<'_>,
+    ) {
+    }
+
+    fn complete(rq: &mq::Request<Self>) {
+        rq.end_ok();
+    }
+
+    fn init_hctx(
+        _tagset_data: <Self::TagSetData as ForeignOwnable>::Borrowed<'_>,
+        _hctx_idx: u32,
+    ) -> Result<Self::HwData> {
+        Ok(())
+    }
+}
diff --git a/rust/helpers.c b/rust/helpers.c
index 017fa90366e6..9c8976629e90 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -200,6 +200,7 @@ struct page *rust_helper_folio_page(struct folio *folio, size_t n)
 {
 	return folio_page(folio, n);
 }
+EXPORT_SYMBOL_GPL(rust_helper_folio_page);
 
 loff_t rust_helper_folio_pos(struct folio *folio)
 {
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index dae447a1ad30..f64be2310010 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -262,7 +262,7 @@ $(obj)/%.lst: $(src)/%.c FORCE
 # Compile Rust sources (.rs)
 # ---------------------------------------------------------------------------
 
-rust_allowed_features := new_uninit,offset_of
+rust_allowed_features := new_uninit,offset_of,allocator_api,impl_trait_in_assoc_type
 
 # `--out-dir` is required to avoid temporaries being created by `rustc` in the
 # current working directory, which may be not accessible in the out-of-tree
-- 
2.44.0


  parent reply	other threads:[~2024-03-13 11:06 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-13 11:05 [RFC PATCH 0/5] Rust block device driver API and null block driver Andreas Hindborg
2024-03-13 11:05 ` [RFC PATCH 1/5] rust: block: introduce `kernel::block::mq` module Andreas Hindborg
2024-03-13 23:55   ` Boqun Feng
2024-03-14  8:58     ` Andreas Hindborg
2024-03-14 18:55       ` Miguel Ojeda
2024-03-14 19:22         ` Andreas Hindborg
2024-03-14 19:41           ` Andreas Hindborg
2024-03-14 19:41           ` Miguel Ojeda
2024-03-14 20:56             ` Miguel Ojeda
2024-03-15  7:52             ` Andreas Hindborg
2024-03-15 12:17               ` Ming Lei
2024-03-15 12:46                 ` Andreas Hindborg
2024-03-15 15:24                   ` Ming Lei
2024-03-15 17:49                     ` Andreas Hindborg
2024-03-16 14:48                       ` Ming Lei
2024-03-16 17:27                         ` Andreas Hindborg
2024-03-13 11:05 ` [RFC PATCH 2/5] rust: block: introduce `kernel::block::bio` module Andreas Hindborg
2024-03-13 11:05 ` [RFC PATCH 3/5] rust: block: allow `hrtimer::Timer` in `RequestData` Andreas Hindborg
2024-03-23 10:51   ` Benno Lossin
2024-04-02 12:43     ` Andreas Hindborg
2024-03-13 11:05 ` Andreas Hindborg [this message]
2024-03-23 11:33   ` [RFC PATCH 4/5] rust: block: add rnull, Rust null_blk implementation Benno Lossin
2024-04-02 12:52     ` Andreas Hindborg
2024-04-02 22:35       ` Benno Lossin
2024-04-03  9:47         ` Andreas Hindborg
2024-04-03 10:29           ` Benno Lossin
2024-03-13 11:05 ` [RFC PATCH 5/5] MAINTAINERS: add entry for Rust block device driver API Andreas Hindborg
2024-03-13 18:02 ` [RFC PATCH 0/5] Rust block device driver API and null block driver Bart Van Assche
2024-03-13 18:22   ` Boqun Feng
2024-03-13 19:03     ` Andreas Hindborg
2024-03-13 19:11       ` Bart Van Assche
2024-03-13 19:12   ` Matthew Wilcox
2024-03-14 12:14   ` Philipp Stanner
2024-03-14 17:03     ` Bart Van Assche
2024-03-14 17:16       ` Conor Dooley
2024-03-14 17:43       ` Andreas Hindborg
2024-03-17  2:50 ` Matthew Wilcox
2024-03-17  7:09   ` Andreas Hindborg
2024-03-17 21:34     ` Matthew Wilcox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240313110515.70088-5-nmi@metaspace.dk \
    --to=nmi@metaspace.dk \
    --cc=1182282462@bupt.edu.cn \
    --cc=Damien.LeMoal@wdc.com \
    --cc=Niklas.Cassel@wdc.com \
    --cc=a.hindborg@samsung.com \
    --cc=alex.gaynor@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=axboe@kernel.dk \
    --cc=benno.lossin@proton.me \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=bvanassche@acm.org \
    --cc=chaitanyak@nvidia.com \
    --cc=da.gomez@samsung.com \
    --cc=gary@garyguo.net \
    --cc=gost.dev@samsung.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=j.granados@samsung.com \
    --cc=kbusch@kernel.org \
    --cc=kernel@pankajraghav.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lsf-pc@lists.linux-foundation.org \
    --cc=mcgrof@kernel.org \
    --cc=ojeda@kernel.org \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=sergio.collado@gmail.com \
    --cc=wedsonaf@gmail.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.