diff options
Diffstat (limited to 'rust/kernel')
33 files changed, 4296 insertions, 116 deletions
diff --git a/rust/kernel/.gitignore b/rust/kernel/.gitignore new file mode 100644 index 000000000000..6ba39a178f30 --- /dev/null +++ b/rust/kernel/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +/generated_arch_static_branch_asm.rs diff --git a/rust/kernel/alloc/box_ext.rs b/rust/kernel/alloc/box_ext.rs index 9f1c1c489189..7009ad78d4e0 100644 --- a/rust/kernel/alloc/box_ext.rs +++ b/rust/kernel/alloc/box_ext.rs @@ -4,7 +4,7 @@ use super::{AllocError, Flags}; use alloc::boxed::Box; -use core::mem::MaybeUninit; +use core::{mem::MaybeUninit, ptr, result::Result}; /// Extensions to [`Box`]. pub trait BoxExt<T>: Sized { @@ -17,6 +17,24 @@ pub trait BoxExt<T>: Sized { /// /// The allocation may fail, in which case an error is returned. fn new_uninit(flags: Flags) -> Result<Box<MaybeUninit<T>>, AllocError>; + + /// Drops the contents, but keeps the allocation. + /// + /// # Examples + /// + /// ``` + /// use kernel::alloc::{flags, box_ext::BoxExt}; + /// let value = Box::new([0; 32], flags::GFP_KERNEL)?; + /// assert_eq!(*value, [0; 32]); + /// let mut value = Box::drop_contents(value); + /// // Now we can re-use `value`: + /// value.write([1; 32]); + /// // SAFETY: We just wrote to it. + /// let value = unsafe { value.assume_init() }; + /// assert_eq!(*value, [1; 32]); + /// # Ok::<(), Error>(()) + /// ``` + fn drop_contents(this: Self) -> Box<MaybeUninit<T>>; } impl<T> BoxExt<T> for Box<T> { @@ -55,4 +73,17 @@ impl<T> BoxExt<T> for Box<T> { // zero-sized types, we use `NonNull::dangling`. Ok(unsafe { Box::from_raw(ptr) }) } + + fn drop_contents(this: Self) -> Box<MaybeUninit<T>> { + let ptr = Box::into_raw(this); + // SAFETY: `ptr` is valid, because it came from `Box::into_raw`. + unsafe { ptr::drop_in_place(ptr) }; + + // CAST: `MaybeUninit<T>` is a transparent wrapper of `T`. + let ptr = ptr.cast::<MaybeUninit<T>>(); + + // SAFETY: `ptr` is valid for writes, because it came from `Box::into_raw` and it is valid for + // reads, since the pointer came from `Box::into_raw` and the type is `MaybeUninit<T>`. + unsafe { Box::from_raw(ptr) } + } } diff --git a/rust/kernel/cred.rs b/rust/kernel/cred.rs new file mode 100644 index 000000000000..81d67789b16f --- /dev/null +++ b/rust/kernel/cred.rs @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Credentials management. +//! +//! C header: [`include/linux/cred.h`](srctree/include/linux/cred.h). +//! +//! Reference: <https://www.kernel.org/doc/html/latest/security/credentials.html> + +use crate::{ + bindings, + task::Kuid, + types::{AlwaysRefCounted, Opaque}, +}; + +/// Wraps the kernel's `struct cred`. +/// +/// Credentials are used for various security checks in the kernel. +/// +/// Most fields of credentials are immutable. When things have their credentials changed, that +/// happens by replacing the credential instead of changing an existing credential. See the [kernel +/// documentation][ref] for more info on this. +/// +/// # Invariants +/// +/// Instances of this type are always ref-counted, that is, a call to `get_cred` ensures that the +/// allocation remains valid at least until the matching call to `put_cred`. +/// +/// [ref]: https://www.kernel.org/doc/html/latest/security/credentials.html +#[repr(transparent)] +pub struct Credential(Opaque<bindings::cred>); + +// SAFETY: +// - `Credential::dec_ref` can be called from any thread. +// - It is okay to send ownership of `Credential` across thread boundaries. +unsafe impl Send for Credential {} + +// SAFETY: It's OK to access `Credential` through shared references from other threads because +// we're either accessing properties that don't change or that are properly synchronised by C code. +unsafe impl Sync for Credential {} + +impl Credential { + /// Creates a reference to a [`Credential`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that `ptr` is valid and remains valid for the lifetime of the + /// returned [`Credential`] reference. + pub unsafe fn from_ptr<'a>(ptr: *const bindings::cred) -> &'a Credential { + // SAFETY: The safety requirements guarantee the validity of the dereference, while the + // `Credential` type being transparent makes the cast ok. + unsafe { &*ptr.cast() } + } + + /// Get the id for this security context. + pub fn get_secid(&self) -> u32 { + let mut secid = 0; + // SAFETY: The invariants of this type ensures that the pointer is valid. + unsafe { bindings::security_cred_getsecid(self.0.get(), &mut secid) }; + secid + } + + /// Returns the effective UID of the given credential. + pub fn euid(&self) -> Kuid { + // SAFETY: By the type invariant, we know that `self.0` is valid. Furthermore, the `euid` + // field of a credential is never changed after initialization, so there is no potential + // for data races. + Kuid::from_raw(unsafe { (*self.0.get()).euid }) + } +} + +// SAFETY: The type invariants guarantee that `Credential` is always ref-counted. +unsafe impl AlwaysRefCounted for Credential { + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_cred(self.0.get()) }; + } + + unsafe fn dec_ref(obj: core::ptr::NonNull<Credential>) { + // SAFETY: The safety requirements guarantee that the refcount is nonzero. The cast is okay + // because `Credential` has the same representation as `struct cred`. + unsafe { bindings::put_cred(obj.cast().as_ptr()) }; + } +} diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs index 851018eef885..c8199ee079ef 100644 --- a/rust/kernel/device.rs +++ b/rust/kernel/device.rs @@ -51,18 +51,9 @@ impl Device { /// /// It must also be ensured that `bindings::device::release` can be called from any thread. /// While not officially documented, this should be the case for any `struct device`. - pub unsafe fn from_raw(ptr: *mut bindings::device) -> ARef<Self> { - // SAFETY: By the safety requirements, ptr is valid. - // Initially increase the reference count by one to compensate for the final decrement once - // this newly created `ARef<Device>` instance is dropped. - unsafe { bindings::get_device(ptr) }; - - // CAST: `Self` is a `repr(transparent)` wrapper around `bindings::device`. - let ptr = ptr.cast::<Self>(); - - // SAFETY: `ptr` is valid by the safety requirements of this function. By the above call to - // `bindings::get_device` we also own a reference to the underlying `struct device`. - unsafe { ARef::from_raw(ptr::NonNull::new_unchecked(ptr)) } + pub unsafe fn get_device(ptr: *mut bindings::device) -> ARef<Self> { + // SAFETY: By the safety requirements ptr is valid + unsafe { Self::as_ref(ptr) }.into() } /// Obtain the raw `struct device *`. diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 145f5c397009..6f1587a2524e 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -135,8 +135,11 @@ impl Error { /// Returns the error encoded as a pointer. #[allow(dead_code)] pub(crate) fn to_ptr<T>(self) -> *mut T { + #[cfg_attr(target_pointer_width = "32", allow(clippy::useless_conversion))] // SAFETY: `self.0` is a valid error due to its invariant. - unsafe { bindings::ERR_PTR(self.0.into()) as *mut _ } + unsafe { + bindings::ERR_PTR(self.0.into()) as *mut _ + } } /// Returns a string representing the error, if one exists. diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index dee5b4b18aec..13a374a5cdb7 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -44,7 +44,7 @@ impl FwFunc { /// /// # fn no_run() -> Result<(), Error> { /// # // SAFETY: *NOT* safe, just for the example to get an `ARef<Device>` instance -/// # let dev = unsafe { Device::from_raw(core::ptr::null_mut()) }; +/// # let dev = unsafe { Device::get_device(core::ptr::null_mut()) }; /// /// let fw = Firmware::request(c_str!("path/to/firmware.bin"), &dev)?; /// let blob = fw.data(); diff --git a/rust/kernel/fs.rs b/rust/kernel/fs.rs new file mode 100644 index 000000000000..0121b38c59e6 --- /dev/null +++ b/rust/kernel/fs.rs @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Kernel file systems. +//! +//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) + +pub mod file; +pub use self::file::{File, LocalFile}; diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs new file mode 100644 index 000000000000..e03dbe14d62a --- /dev/null +++ b/rust/kernel/fs/file.rs @@ -0,0 +1,461 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Files and file descriptors. +//! +//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) and +//! [`include/linux/file.h`](srctree/include/linux/file.h) + +use crate::{ + bindings, + cred::Credential, + error::{code::*, Error, Result}, + types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque}, +}; +use core::ptr; + +/// Flags associated with a [`File`]. +pub mod flags { + /// File is opened in append mode. + pub const O_APPEND: u32 = bindings::O_APPEND; + + /// Signal-driven I/O is enabled. + pub const O_ASYNC: u32 = bindings::FASYNC; + + /// Close-on-exec flag is set. + pub const O_CLOEXEC: u32 = bindings::O_CLOEXEC; + + /// File was created if it didn't already exist. + pub const O_CREAT: u32 = bindings::O_CREAT; + + /// Direct I/O is enabled for this file. + pub const O_DIRECT: u32 = bindings::O_DIRECT; + + /// File must be a directory. + pub const O_DIRECTORY: u32 = bindings::O_DIRECTORY; + + /// Like [`O_SYNC`] except metadata is not synced. + pub const O_DSYNC: u32 = bindings::O_DSYNC; + + /// Ensure that this file is created with the `open(2)` call. + pub const O_EXCL: u32 = bindings::O_EXCL; + + /// Large file size enabled (`off64_t` over `off_t`). + pub const O_LARGEFILE: u32 = bindings::O_LARGEFILE; + + /// Do not update the file last access time. + pub const O_NOATIME: u32 = bindings::O_NOATIME; + + /// File should not be used as process's controlling terminal. + pub const O_NOCTTY: u32 = bindings::O_NOCTTY; + + /// If basename of path is a symbolic link, fail open. + pub const O_NOFOLLOW: u32 = bindings::O_NOFOLLOW; + + /// File is using nonblocking I/O. + pub const O_NONBLOCK: u32 = bindings::O_NONBLOCK; + + /// File is using nonblocking I/O. + /// + /// This is effectively the same flag as [`O_NONBLOCK`] on all architectures + /// except SPARC64. + pub const O_NDELAY: u32 = bindings::O_NDELAY; + + /// Used to obtain a path file descriptor. + pub const O_PATH: u32 = bindings::O_PATH; + + /// Write operations on this file will flush data and metadata. + pub const O_SYNC: u32 = bindings::O_SYNC; + + /// This file is an unnamed temporary regular file. + pub const O_TMPFILE: u32 = bindings::O_TMPFILE; + + /// File should be truncated to length 0. + pub const O_TRUNC: u32 = bindings::O_TRUNC; + + /// Bitmask for access mode flags. + /// + /// # Examples + /// + /// ``` + /// use kernel::fs::file; + /// # fn do_something() {} + /// # let flags = 0; + /// if (flags & file::flags::O_ACCMODE) == file::flags::O_RDONLY { + /// do_something(); + /// } + /// ``` + pub const O_ACCMODE: u32 = bindings::O_ACCMODE; + + /// File is read only. + pub const O_RDONLY: u32 = bindings::O_RDONLY; + + /// File is write only. + pub const O_WRONLY: u32 = bindings::O_WRONLY; + + /// File can be both read and written. + pub const O_RDWR: u32 = bindings::O_RDWR; +} + +/// Wraps the kernel's `struct file`. Thread safe. +/// +/// This represents an open file rather than a file on a filesystem. Processes generally reference +/// open files using file descriptors. However, file descriptors are not the same as files. A file +/// descriptor is just an integer that corresponds to a file, and a single file may be referenced +/// by multiple file descriptors. +/// +/// # Refcounting +/// +/// Instances of this type are reference-counted. The reference count is incremented by the +/// `fget`/`get_file` functions and decremented by `fput`. The Rust type `ARef<File>` represents a +/// pointer that owns a reference count on the file. +/// +/// Whenever a process opens a file descriptor (fd), it stores a pointer to the file in its fd +/// table (`struct files_struct`). This pointer owns a reference count to the file, ensuring the +/// file isn't prematurely deleted while the file descriptor is open. In Rust terminology, the +/// pointers in `struct files_struct` are `ARef<File>` pointers. +/// +/// ## Light refcounts +/// +/// Whenever a process has an fd to a file, it may use something called a "light refcount" as a +/// performance optimization. Light refcounts are acquired by calling `fdget` and released with +/// `fdput`. The idea behind light refcounts is that if the fd is not closed between the calls to +/// `fdget` and `fdput`, then the refcount cannot hit zero during that time, as the `struct +/// files_struct` holds a reference until the fd is closed. This means that it's safe to access the +/// file even if `fdget` does not increment the refcount. +/// +/// The requirement that the fd is not closed during a light refcount applies globally across all +/// threads - not just on the thread using the light refcount. For this reason, light refcounts are +/// only used when the `struct files_struct` is not shared with other threads, since this ensures +/// that other unrelated threads cannot suddenly start using the fd and close it. Therefore, +/// calling `fdget` on a shared `struct files_struct` creates a normal refcount instead of a light +/// refcount. +/// +/// Light reference counts must be released with `fdput` before the system call returns to +/// userspace. This means that if you wait until the current system call returns to userspace, then +/// all light refcounts that existed at the time have gone away. +/// +/// ### The file position +/// +/// Each `struct file` has a position integer, which is protected by the `f_pos_lock` mutex. +/// However, if the `struct file` is not shared, then the kernel may avoid taking the lock as a +/// performance optimization. +/// +/// The condition for avoiding the `f_pos_lock` mutex is different from the condition for using +/// `fdget`. With `fdget`, you may avoid incrementing the refcount as long as the current fd table +/// is not shared; it is okay if there are other fd tables that also reference the same `struct +/// file`. However, `fdget_pos` can only avoid taking the `f_pos_lock` if the entire `struct file` +/// is not shared, as different processes with an fd to the same `struct file` share the same +/// position. +/// +/// To represent files that are not thread safe due to this optimization, the [`LocalFile`] type is +/// used. +/// +/// ## Rust references +/// +/// The reference type `&File` is similar to light refcounts: +/// +/// * `&File` references don't own a reference count. They can only exist as long as the reference +/// count stays positive, and can only be created when there is some mechanism in place to ensure +/// this. +/// +/// * The Rust borrow-checker normally ensures this by enforcing that the `ARef<File>` from which +/// a `&File` is created outlives the `&File`. +/// +/// * Using the unsafe [`File::from_raw_file`] means that it is up to the caller to ensure that the +/// `&File` only exists while the reference count is positive. +/// +/// * You can think of `fdget` as using an fd to look up an `ARef<File>` in the `struct +/// files_struct` and create an `&File` from it. The "fd cannot be closed" rule is like the Rust +/// rule "the `ARef<File>` must outlive the `&File`". +/// +/// # Invariants +/// +/// * All instances of this type are refcounted using the `f_count` field. +/// * There must not be any active calls to `fdget_pos` on this file that did not take the +/// `f_pos_lock` mutex. +#[repr(transparent)] +pub struct File { + inner: Opaque<bindings::file>, +} + +// SAFETY: This file is known to not have any active `fdget_pos` calls that did not take the +// `f_pos_lock` mutex, so it is safe to transfer it between threads. +unsafe impl Send for File {} + +// SAFETY: This file is known to not have any active `fdget_pos` calls that did not take the +// `f_pos_lock` mutex, so it is safe to access its methods from several threads in parallel. +unsafe impl Sync for File {} + +// SAFETY: The type invariants guarantee that `File` is always ref-counted. This implementation +// makes `ARef<File>` own a normal refcount. +unsafe impl AlwaysRefCounted for File { + #[inline] + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_file(self.as_ptr()) }; + } + + #[inline] + unsafe fn dec_ref(obj: ptr::NonNull<File>) { + // SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we + // may drop it. The cast is okay since `File` has the same representation as `struct file`. + unsafe { bindings::fput(obj.cast().as_ptr()) } + } +} + +/// Wraps the kernel's `struct file`. Not thread safe. +/// +/// This type represents a file that is not known to be safe to transfer across thread boundaries. +/// To obtain a thread-safe [`File`], use the [`assume_no_fdget_pos`] conversion. +/// +/// See the documentation for [`File`] for more information. +/// +/// # Invariants +/// +/// * All instances of this type are refcounted using the `f_count` field. +/// * If there is an active call to `fdget_pos` that did not take the `f_pos_lock` mutex, then it +/// must be on the same thread as this file. +/// +/// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos +pub struct LocalFile { + inner: Opaque<bindings::file>, +} + +// SAFETY: The type invariants guarantee that `LocalFile` is always ref-counted. This implementation +// makes `ARef<File>` own a normal refcount. +unsafe impl AlwaysRefCounted for LocalFile { + #[inline] + fn inc_ref(&self) { + // SAFETY: The existence of a shared reference means that the refcount is nonzero. + unsafe { bindings::get_file(self.as_ptr()) }; + } + + #[inline] + unsafe fn dec_ref(obj: ptr::NonNull<LocalFile>) { + // SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we + // may drop it. The cast is okay since `File` has the same representation as `struct file`. + unsafe { bindings::fput(obj.cast().as_ptr()) } + } +} + +impl LocalFile { + /// Constructs a new `struct file` wrapper from a file descriptor. + /// + /// The file descriptor belongs to the current process, and there might be active local calls + /// to `fdget_pos` on the same file. + /// + /// To obtain an `ARef<File>`, use the [`assume_no_fdget_pos`] function to convert. + /// + /// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos + #[inline] + pub fn fget(fd: u32) -> Result<ARef<LocalFile>, BadFdError> { + // SAFETY: FFI call, there are no requirements on `fd`. + let ptr = ptr::NonNull::new(unsafe { bindings::fget(fd) }).ok_or(BadFdError)?; + + // SAFETY: `bindings::fget` created a refcount, and we pass ownership of it to the `ARef`. + // + // INVARIANT: This file is in the fd table on this thread, so either all `fdget_pos` calls + // are on this thread, or the file is shared, in which case `fdget_pos` calls took the + // `f_pos_lock` mutex. + Ok(unsafe { ARef::from_raw(ptr.cast()) }) + } + + /// Creates a reference to a [`LocalFile`] from a valid pointer. + /// + /// # Safety + /// + /// * The caller must ensure that `ptr` points at a valid file and that the file's refcount is + /// positive for the duration of 'a. + /// * The caller must ensure that if there is an active call to `fdget_pos` that did not take + /// the `f_pos_lock` mutex, then that call is on the current thread. + #[inline] + pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a LocalFile { + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the + // duration of 'a. The cast is okay because `File` is `repr(transparent)`. + // + // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls. + unsafe { &*ptr.cast() } + } + + /// Assume that there are no active `fdget_pos` calls that prevent us from sharing this file. + /// + /// This makes it safe to transfer this file to other threads. No checks are performed, and + /// using it incorrectly may lead to a data race on the file position if the file is shared + /// with another thread. + /// + /// This method is intended to be used together with [`LocalFile::fget`] when the caller knows + /// statically that there are no `fdget_pos` calls on the current thread. For example, you + /// might use it when calling `fget` from an ioctl, since ioctls usually do not touch the file + /// position. + /// + /// # Safety + /// + /// There must not be any active `fdget_pos` calls on the current thread. + #[inline] + pub unsafe fn assume_no_fdget_pos(me: ARef<LocalFile>) -> ARef<File> { + // INVARIANT: There are no `fdget_pos` calls on the current thread, and by the type + // invariants, if there is a `fdget_pos` call on another thread, then it took the + // `f_pos_lock` mutex. + // + // SAFETY: `LocalFile` and `File` have the same layout. + unsafe { ARef::from_raw(ARef::into_raw(me).cast()) } + } + + /// Returns a raw pointer to the inner C struct. + #[inline] + pub fn as_ptr(&self) -> *mut bindings::file { + self.inner.get() + } + + /// Returns the credentials of the task that originally opened the file. + pub fn cred(&self) -> &Credential { + // SAFETY: It's okay to read the `f_cred` field without synchronization because `f_cred` is + // never changed after initialization of the file. + let ptr = unsafe { (*self.as_ptr()).f_cred }; + + // SAFETY: The signature of this function ensures that the caller will only access the + // returned credential while the file is still valid, and the C side ensures that the + // credential stays valid at least as long as the file. + unsafe { Credential::from_ptr(ptr) } + } + + /// Returns the flags associated with the file. + /// + /// The flags are a combination of the constants in [`flags`]. + #[inline] + pub fn flags(&self) -> u32 { + // This `read_volatile` is intended to correspond to a READ_ONCE call. + // + // SAFETY: The file is valid because the shared reference guarantees a nonzero refcount. + // + // FIXME(read_once): Replace with `read_once` when available on the Rust side. + unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() } + } +} + +impl File { + /// Creates a reference to a [`File`] from a valid pointer. + /// + /// # Safety + /// + /// * The caller must ensure that `ptr` points at a valid file and that the file's refcount is + /// positive for the duration of 'a. + /// * The caller must ensure that if there are active `fdget_pos` calls on this file, then they + /// took the `f_pos_lock` mutex. + #[inline] + pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a File { + // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the + // duration of 'a. The cast is okay because `File` is `repr(transparent)`. + // + // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls. + unsafe { &*ptr.cast() } + } +} + +// Make LocalFile methods available on File. +impl core::ops::Deref for File { + type Target = LocalFile; + #[inline] + fn deref(&self) -> &LocalFile { + // SAFETY: The caller provides a `&File`, and since it is a reference, it must point at a + // valid file for the desired duration. + // + // By the type invariants, there are no `fdget_pos` calls that did not take the + // `f_pos_lock` mutex. + unsafe { LocalFile::from_raw_file(self as *const File as *const bindings::file) } + } +} + +/// A file descriptor reservation. +/// +/// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it, +/// then we commit or drop the reservation. The first step may fail (e.g., the current process ran +/// out of available slots), but commit and drop never fail (and are mutually exclusive). +/// +/// Dropping the reservation happens in the destructor of this type. +/// +/// # Invariants +/// +/// The fd stored in this struct must correspond to a reserved file descriptor of the current task. +pub struct FileDescriptorReservation { + fd: u32, + /// Prevent values of this type from being moved to a different task. + /// + /// The `fd_install` and `put_unused_fd` functions assume that the value of `current` is + /// unchanged since the call to `get_unused_fd_flags`. By adding this marker to this type, we + /// prevent it from being moved across task boundaries, which ensures that `current` does not + /// change while this value exists. + _not_send: NotThreadSafe, +} + +impl FileDescriptorReservation { + /// Creates a new file descriptor reservation. + pub fn get_unused_fd_flags(flags: u32) -> Result<Self> { + // SAFETY: FFI call, there are no safety requirements on `flags`. + let fd: i32 = unsafe { bindings::get_unused_fd_flags(flags) }; + if fd < 0 { + return Err(Error::from_errno(fd)); + } + Ok(Self { + fd: fd as u32, + _not_send: NotThreadSafe, + }) + } + + /// Returns the file descriptor number that was reserved. + pub fn reserved_fd(&self) -> u32 { + self.fd + } + + /// Commits the reservation. + /// + /// The previously reserved file descriptor is bound to `file`. This method consumes the + /// [`FileDescriptorReservation`], so it will not be usable after this call. + pub fn fd_install(self, file: ARef<File>) { + // SAFETY: `self.fd` was previously returned by `get_unused_fd_flags`. We have not yet used + // the fd, so it is still valid, and `current` still refers to the same task, as this type + // cannot be moved across task boundaries. + // + // Furthermore, the file pointer is guaranteed to own a refcount by its type invariants, + // and we take ownership of that refcount by not running the destructor below. + // Additionally, the file is known to not have any non-shared `fdget_pos` calls, so even if + // this process starts using the file position, this will not result in a data race on the + // file position. + unsafe { bindings::fd_install(self.fd, file.as_ptr()) }; + + // `fd_install` consumes both the file descriptor and the file reference, so we cannot run + // the destructors. + core::mem::forget(self); + core::mem::forget(file); + } +} + +impl Drop for FileDescriptorReservation { + fn drop(&mut self) { + // SAFETY: By the type invariants of this type, `self.fd` was previously returned by + // `get_unused_fd_flags`. We have not yet used the fd, so it is still valid, and `current` + // still refers to the same task, as this type cannot be moved across task boundaries. + unsafe { bindings::put_unused_fd(self.fd) }; + } +} + +/// Represents the `EBADF` error code. +/// +/// Used for methods that can only fail with `EBADF`. +#[derive(Copy, Clone, Eq, PartialEq)] +pub struct BadFdError; + +impl From<BadFdError> for Error { + #[inline] + fn from(_: BadFdError) -> Error { + EBADF + } +} + +impl core::fmt::Debug for BadFdError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.pad("EBADF") + } +} diff --git a/rust/kernel/generated_arch_static_branch_asm.rs.S b/rust/kernel/generated_arch_static_branch_asm.rs.S new file mode 100644 index 000000000000..2afb638708db --- /dev/null +++ b/rust/kernel/generated_arch_static_branch_asm.rs.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/jump_label.h> + +// Cut here. + +::kernel::concat_literals!(ARCH_STATIC_BRANCH_ASM("{symb} + {off} + {branch}", "{l_yes}")) diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 495c09ebe3a3..a17ac8762d8f 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -213,6 +213,7 @@ use crate::{ alloc::{box_ext::BoxExt, AllocError, Flags}, error::{self, Error}, + sync::Arc, sync::UniqueArc, types::{Opaque, ScopeGuard}, }; @@ -742,6 +743,74 @@ macro_rules! try_init { }; } +/// Asserts that a field on a struct using `#[pin_data]` is marked with `#[pin]` ie. that it is +/// structurally pinned. +/// +/// # Example +/// +/// This will succeed: +/// ``` +/// use kernel::assert_pinned; +/// #[pin_data] +/// struct MyStruct { +/// #[pin] +/// some_field: u64, +/// } +/// +/// assert_pinned!(MyStruct, some_field, u64); +/// ``` +/// +/// This will fail: +// TODO: replace with `compile_fail` when supported. +/// ```ignore +/// use kernel::assert_pinned; +/// #[pin_data] +/// struct MyStruct { +/// some_field: u64, +/// } +/// +/// assert_pinned!(MyStruct, some_field, u64); +/// ``` +/// +/// Some uses of the macro may trigger the `can't use generic parameters from outer item` error. To +/// work around this, you may pass the `inline` parameter to the macro. The `inline` parameter can +/// only be used when the macro is invoked from a function body. +/// ``` +/// use kernel::assert_pinned; +/// #[pin_data] +/// struct Foo<T> { +/// #[pin] +/// elem: T, +/// } +/// +/// impl<T> Foo<T> { +/// fn project(self: Pin<&mut Self>) -> Pin<&mut T> { +/// assert_pinned!(Foo<T>, elem, T, inline); +/// +/// // SAFETY: The field is structurally pinned. +/// unsafe { self.map_unchecked_mut(|me| &mut me.elem) } +/// } +/// } +/// ``` +#[macro_export] +macro_rules! assert_pinned { + ($ty:ty, $field:ident, $field_ty:ty, inline) => { + let _ = move |ptr: *mut $field_ty| { + // SAFETY: This code is unreachable. + let data = unsafe { <$ty as $crate::init::__internal::HasPinData>::__pin_data() }; + let init = $crate::init::__internal::AlwaysFail::<$field_ty>::new(); + // SAFETY: This code is unreachable. + unsafe { data.$field(ptr, init) }.ok(); + }; + }; + + ($ty:ty, $field:ident, $field_ty:ty) => { + const _: () = { + $crate::assert_pinned!($ty, $field, $field_ty, inline); + }; + }; +} + /// A pin-initializer for the type `T`. /// /// To use this initializer, you will need a suitable memory location that can hold a `T`. This can @@ -1107,11 +1176,17 @@ unsafe impl<T, E> PinInit<T, E> for T { /// Smart pointer that can initialize memory in-place. pub trait InPlaceInit<T>: Sized { + /// Pinned version of `Self`. + /// + /// If a type already implicitly pins its pointee, `Pin<Self>` is unnecessary. In this case use + /// `Self`, otherwise just use `Pin<Self>`. + type PinnedSelf; + /// Use the given pin-initializer to pin-initialize a `T` inside of a new smart pointer of this /// type. /// /// If `T: !Unpin` it will not be able to move afterwards. - fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Pin<Self>, E> + fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self::PinnedSelf, E> where E: From<AllocError>; @@ -1119,7 +1194,7 @@ pub trait InPlaceInit<T>: Sized { /// type. /// /// If `T: !Unpin` it will not be able to move afterwards. - fn pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> error::Result<Pin<Self>> + fn pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> error::Result<Self::PinnedSelf> where Error: From<E>, { @@ -1148,19 +1223,35 @@ pub trait InPlaceInit<T>: Sized { } } +impl<T> InPlaceInit<T> for Arc<T> { + type PinnedSelf = Self; + + #[inline] + fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self::PinnedSelf, E> + where + E: From<AllocError>, + { + UniqueArc::try_pin_init(init, flags).map(|u| u.into()) + } + + #[inline] + fn try_init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E> + where + E: From<AllocError>, + { + UniqueArc::try_init(init, flags).map(|u| u.into()) + } +} + impl<T> InPlaceInit<T> for Box<T> { + type PinnedSelf = Pin<Self>; + #[inline] - fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Pin<Self>, E> + fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self::PinnedSelf, E> where E: From<AllocError>, { - let mut this = <Box<_> as BoxExt<_>>::new_uninit(flags)?; - let slot = this.as_mut_ptr(); - // SAFETY: When init errors/panics, slot will get deallocated but not dropped, - // slot is valid and will not be moved, because we pin it later. - unsafe { init.__pinned_init(slot)? }; - // SAFETY: All fields have been initialized. - Ok(unsafe { this.assume_init() }.into()) + <Box<_> as BoxExt<_>>::new_uninit(flags)?.write_pin_init(init) } #[inline] @@ -1168,29 +1259,19 @@ impl<T> InPlaceInit<T> for Box<T> { where E: From<AllocError>, { - let mut this = <Box<_> as BoxExt<_>>::new_uninit(flags)?; - let slot = this.as_mut_ptr(); - // SAFETY: When init errors/panics, slot will get deallocated but not dropped, - // slot is valid. - unsafe { init.__init(slot)? }; - // SAFETY: All fields have been initialized. - Ok(unsafe { this.assume_init() }) + <Box<_> as BoxExt<_>>::new_uninit(flags)?.write_init(init) } } impl<T> InPlaceInit<T> for UniqueArc<T> { + type PinnedSelf = Pin<Self>; + #[inline] - fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Pin<Self>, E> + fn try_pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self::PinnedSelf, E> where E: From<AllocError>, { - let mut this = UniqueArc::new_uninit(flags)?; - let slot = this.as_mut_ptr(); - // SAFETY: When init errors/panics, slot will get deallocated but not dropped, - // slot is valid and will not be moved, because we pin it later. - unsafe { init.__pinned_init(slot)? }; - // SAFETY: All fields have been initialized. - Ok(unsafe { this.assume_init() }.into()) + UniqueArc::new_uninit(flags)?.write_pin_init(init) } #[inline] @@ -1198,13 +1279,67 @@ impl<T> InPlaceInit<T> for UniqueArc<T> { where E: From<AllocError>, { - let mut this = UniqueArc::new_uninit(flags)?; - let slot = this.as_mut_ptr(); + UniqueArc::new_uninit(flags)?.write_init(init) + } +} + +/// Smart pointer containing uninitialized memory and that can write a value. +pub trait InPlaceWrite<T> { + /// The type `Self` turns into when the contents are initialized. + type Initialized; + + /// Use the given initializer to write a value into `self`. + /// + /// Does not drop the current value and considers it as uninitialized memory. + fn write_init<E>(self, init: impl Init<T, E>) -> Result<Self::Initialized, E>; + + /// Use the given pin-initializer to write a value into `self`. + /// + /// Does not drop the current value and considers it as uninitialized memory. + fn write_pin_init<E>(self, init: impl PinInit<T, E>) -> Result<Pin<Self::Initialized>, E>; +} + +impl<T> InPlaceWrite<T> for Box<MaybeUninit<T>> { + type Initialized = Box<T>; + + fn write_init<E>(mut self, init: impl Init<T, E>) -> Result<Self::Initialized, E> { + let slot = self.as_mut_ptr(); // SAFETY: When init errors/panics, slot will get deallocated but not dropped, // slot is valid. unsafe { init.__init(slot)? }; // SAFETY: All fields have been initialized. - Ok(unsafe { this.assume_init() }) + Ok(unsafe { self.assume_init() }) + } + + fn write_pin_init<E>(mut self, init: impl PinInit<T, E>) -> Result<Pin<Self::Initialized>, E> { + let slot = self.as_mut_ptr(); + // SAFETY: When init errors/panics, slot will get deallocated but not dropped, + // slot is valid and will not be moved, because we pin it later. + unsafe { init.__pinned_init(slot)? }; + // SAFETY: All fields have been initialized. + Ok(unsafe { self.assume_init() }.into()) + } +} + +impl<T> InPlaceWrite<T> for UniqueArc<MaybeUninit<T>> { + type Initialized = UniqueArc<T>; + + fn write_init<E>(mut self, init: impl Init<T, E>) -> Result<Self::Initialized, E> { + let slot = self.as_mut_ptr(); + // SAFETY: When init errors/panics, slot will get deallocated but not dropped, + // slot is valid. + unsafe { init.__init(slot)? }; + // SAFETY: All fields have been initialized. + Ok(unsafe { self.assume_init() }) + } + + fn write_pin_init<E>(mut self, init: impl PinInit<T, E>) -> Result<Pin<Self::Initialized>, E> { + let slot = self.as_mut_ptr(); + // SAFETY: When init errors/panics, slot will get deallocated but not dropped, + // slot is valid and will not be moved, because we pin it later. + unsafe { init.__pinned_init(slot)? }; + // SAFETY: All fields have been initialized. + Ok(unsafe { self.assume_init() }.into()) } } diff --git a/rust/kernel/init/__internal.rs b/rust/kernel/init/__internal.rs index db3372619ecd..13cefd37512f 100644 --- a/rust/kernel/init/__internal.rs +++ b/rust/kernel/init/__internal.rs @@ -228,3 +228,32 @@ impl OnlyCallFromDrop { Self(()) } } + +/// Initializer that always fails. +/// +/// Used by [`assert_pinned!`]. +/// +/// [`assert_pinned!`]: crate::assert_pinned +pub struct AlwaysFail<T: ?Sized> { + _t: PhantomData<T>, +} + +impl<T: ?Sized> AlwaysFail<T> { + /// Creates a new initializer that always fails. + pub fn new() -> Self { + Self { _t: PhantomData } + } +} + +impl<T: ?Sized> Default for AlwaysFail<T> { + fn default() -> Self { + Self::new() + } +} + +// SAFETY: `__pinned_init` always fails, which is always okay. +unsafe impl<T: ?Sized> PinInit<T, ()> for AlwaysFail<T> { + unsafe fn __pinned_init(self, _slot: *mut T) -> Result<(), ()> { + Err(()) + } +} diff --git a/rust/kernel/jump_label.rs b/rust/kernel/jump_label.rs new file mode 100644 index 000000000000..4e974c768dbd --- /dev/null +++ b/rust/kernel/jump_label.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Logic for static keys. +//! +//! C header: [`include/linux/jump_label.h`](srctree/include/linux/jump_label.h). + +/// Branch based on a static key. +/// +/// Takes three arguments: +/// +/// * `key` - the path to the static variable containing the `static_key`. +/// * `keytyp` - the type of `key`. +/// * `field` - the name of the field of `key` that contains the `static_key`. +/// +/// # Safety +/// +/// The macro must be used with a real static key defined by C. +#[macro_export] +macro_rules! static_branch_unlikely { + ($key:path, $keytyp:ty, $field:ident) => {{ + let _key: *const $keytyp = ::core::ptr::addr_of!($key); + let _key: *const $crate::bindings::static_key_false = ::core::ptr::addr_of!((*_key).$field); + let _key: *const $crate::bindings::static_key = _key.cast(); + + #[cfg(not(CONFIG_JUMP_LABEL))] + { + $crate::bindings::static_key_count(_key.cast_mut()) > 0 + } + + #[cfg(CONFIG_JUMP_LABEL)] + $crate::jump_label::arch_static_branch! { $key, $keytyp, $field, false } + }}; +} +pub use static_branch_unlikely; + +/// Assert that the assembly block evaluates to a string literal. +#[cfg(CONFIG_JUMP_LABEL)] +const _: &str = include!(concat!( + env!("OBJTREE"), + "/rust/kernel/generated_arch_static_branch_asm.rs" +)); + +#[macro_export] +#[doc(hidden)] +#[cfg(CONFIG_JUMP_LABEL)] +macro_rules! arch_static_branch { + ($key:path, $keytyp:ty, $field:ident, $branch:expr) => {'my_label: { + $crate::asm!( + include!(concat!(env!("OBJTREE"), "/rust/kernel/generated_arch_static_branch_asm.rs")); + l_yes = label { + break 'my_label true; + }, + symb = sym $key, + off = const ::core::mem::offset_of!($keytyp, $field), + branch = const $crate::jump_label::bool_to_int($branch), + ); + + break 'my_label false; + }}; +} + +#[cfg(CONFIG_JUMP_LABEL)] +pub use arch_static_branch; + +/// A helper used by inline assembly to pass a boolean to as a `const` parameter. +/// +/// Using this function instead of a cast lets you assert that the input is a boolean, and not some +/// other type that can also be cast to an integer. +#[doc(hidden)] +pub const fn bool_to_int(b: bool) -> i32 { + b as i32 +} diff --git a/rust/kernel/kunit.rs b/rust/kernel/kunit.rs index 0ba77276ae7e..824da0e9738a 100644 --- a/rust/kernel/kunit.rs +++ b/rust/kernel/kunit.rs @@ -18,7 +18,7 @@ pub fn err(args: fmt::Arguments<'_>) { #[cfg(CONFIG_PRINTK)] unsafe { bindings::_printk( - b"\x013%pA\0".as_ptr() as _, + c"\x013%pA".as_ptr() as _, &args as *const _ as *const c_void, ); } @@ -34,7 +34,7 @@ pub fn info(args: fmt::Arguments<'_>) { #[cfg(CONFIG_PRINTK)] unsafe { bindings::_printk( - b"\x016%pA\0".as_ptr() as _, + c"\x016%pA".as_ptr() as _, &args as *const _ as *const c_void, ); } diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 58ed400198bf..29947bb8593b 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -30,19 +30,26 @@ pub mod alloc; #[cfg(CONFIG_BLOCK)] pub mod block; mod build_assert; +pub mod cred; pub mod device; pub mod error; #[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] pub mod firmware; +pub mod fs; pub mod init; pub mod ioctl; +pub mod jump_label; #[cfg(CONFIG_KUNIT)] pub mod kunit; +pub mod list; #[cfg(CONFIG_NET)] pub mod net; pub mod page; pub mod prelude; pub mod print; +pub mod rbtree; +pub mod security; +pub mod seq_file; pub mod sizes; mod static_assert; #[doc(hidden)] @@ -51,6 +58,7 @@ pub mod str; pub mod sync; pub mod task; pub mod time; +pub mod tracepoint; pub mod types; pub mod uaccess; pub mod workqueue; @@ -144,3 +152,38 @@ macro_rules! container_of { ptr.sub(offset) as *const $type }} } + +/// Helper for `.rs.S` files. +#[doc(hidden)] +#[macro_export] +macro_rules! concat_literals { + ($( $asm:literal )* ) => { + ::core::concat!($($asm),*) + }; +} + +/// Wrapper around `asm!` configured for use in the kernel. +/// +/// Uses a semicolon to avoid parsing ambiguities, even though this does not match native `asm!` +/// syntax. +// For x86, `asm!` uses intel syntax by default, but we want to use at&t syntax in the kernel. +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[macro_export] +macro_rules! asm { + ($($asm:expr),* ; $($rest:tt)*) => { + ::core::arch::asm!( $($asm)*, options(att_syntax), $($rest)* ) + }; +} + +/// Wrapper around `asm!` configured for use in the kernel. +/// +/// Uses a semicolon to avoid parsing ambiguities, even though this does not match native `asm!` +/// syntax. +// For non-x86 arches we just pass through to `asm!`. +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +#[macro_export] +macro_rules! asm { + ($($asm:expr),* ; $($rest:tt)*) => { + ::core::arch::asm!( $($asm)*, $($rest)* ) + }; +} diff --git a/rust/kernel/list.rs b/rust/kernel/list.rs new file mode 100644 index 000000000000..5b4aec29eb67 --- /dev/null +++ b/rust/kernel/list.rs @@ -0,0 +1,686 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! A linked list implementation. + +use crate::init::PinInit; +use crate::sync::ArcBorrow; +use crate::types::Opaque; +use core::iter::{DoubleEndedIterator, FusedIterator}; +use core::marker::PhantomData; +use core::ptr; + +mod impl_list_item_mod; +pub use self::impl_list_item_mod::{ + impl_has_list_links, impl_has_list_links_self_ptr, impl_list_item, HasListLinks, HasSelfPtr, +}; + +mod arc; +pub use self::arc::{impl_list_arc_safe, AtomicTracker, ListArc, ListArcSafe, TryNewListArc}; + +mod arc_field; +pub use self::arc_field::{define_list_arc_field_getter, ListArcField}; + +/// A linked list. +/// +/// All elements in this linked list will be [`ListArc`] references to the value. Since a value can +/// only have one `ListArc` (for each pair of prev/next pointers), this ensures that the same +/// prev/next pointers are not used for several linked lists. +/// +/// # Invariants +/// +/// * If the list is empty, then `first` is null. Otherwise, `first` points at the `ListLinks` +/// field of the first element in the list. +/// * All prev/next pointers in `ListLinks` fields of items in the list are valid and form a cycle. +/// * For every item in the list, the list owns the associated [`ListArc`] reference and has +/// exclusive access to the `ListLinks` field. +pub struct List<T: ?Sized + ListItem<ID>, const ID: u64 = 0> { + first: *mut ListLinksFields, + _ty: PhantomData<ListArc<T, ID>>, +} + +// SAFETY: This is a container of `ListArc<T, ID>`, and access to the container allows the same +// type of access to the `ListArc<T, ID>` elements. +unsafe impl<T, const ID: u64> Send for List<T, ID> +where + ListArc<T, ID>: Send, + T: ?Sized + ListItem<ID>, +{ +} +// SAFETY: This is a container of `ListArc<T, ID>`, and access to the container allows the same +// type of access to the `ListArc<T, ID>` elements. +unsafe impl<T, const ID: u64> Sync for List<T, ID> +where + ListArc<T, ID>: Sync, + T: ?Sized + ListItem<ID>, +{ +} + +/// Implemented by types where a [`ListArc<Self>`] can be inserted into a [`List`]. +/// +/// # Safety +/// +/// Implementers must ensure that they provide the guarantees documented on methods provided by +/// this trait. +/// +/// [`ListArc<Self>`]: ListArc +pub unsafe trait ListItem<const ID: u64 = 0>: ListArcSafe<ID> { + /// Views the [`ListLinks`] for this value. + /// + /// # Guarantees + /// + /// If there is a previous call to `prepare_to_insert` and there is no call to `post_remove` + /// since the most recent such call, then this returns the same pointer as the one returned by + /// the most recent call to `prepare_to_insert`. + /// + /// Otherwise, the returned pointer points at a read-only [`ListLinks`] with two null pointers. + /// + /// # Safety + /// + /// The provided pointer must point at a valid value. (It need not be in an `Arc`.) + unsafe fn view_links(me: *const Self) -> *mut ListLinks<ID>; + + /// View the full value given its [`ListLinks`] field. + /// + /// Can only be used when the value is in a list. + /// + /// # Guarantees + /// + /// * Returns the same pointer as the one passed to the most recent call to `prepare_to_insert`. + /// * The returned pointer is valid until the next call to `post_remove`. + /// + /// # Safety + /// + /// * The provided pointer must originate from the most recent call to `prepare_to_insert`, or + /// from a call to `view_links` that happened after the most recent call to + /// `prepare_to_insert`. + /// * Since the most recent call to `prepare_to_insert`, the `post_remove` method must not have + /// been called. + unsafe fn view_value(me: *mut ListLinks<ID>) -> *const Self; + + /// This is called when an item is inserted into a [`List`]. + /// + /// # Guarantees + /// + /// The caller is granted exclusive access to the returned [`ListLinks`] until `post_remove` is + /// called. + /// + /// # Safety + /// + /// * The provided pointer must point at a valid value in an [`Arc`]. + /// * Calls to `prepare_to_insert` and `post_remove` on the same value must alternate. + /// * The caller must own the [`ListArc`] for this value. + /// * The caller must not give up ownership of the [`ListArc`] unless `post_remove` has been + /// called after this call to `prepare_to_insert`. + /// + /// [`Arc`]: crate::sync::Arc + unsafe fn prepare_to_insert(me: *const Self) -> *mut ListLinks<ID>; + + /// This undoes a previous call to `prepare_to_insert`. + /// + /// # Guarantees + /// + /// The returned pointer is the pointer that was originally passed to `prepare_to_insert`. + /// + /// # Safety + /// + /// The provided pointer must be the pointer returned by the most recent call to + /// `prepare_to_insert`. + unsafe fn post_remove(me: *mut ListLinks<ID>) -> *const Self; +} + +#[repr(C)] +#[derive(Copy, Clone)] +struct ListLinksFields { + next: *mut ListLinksFields, + prev: *mut ListLinksFields, +} + +/// The prev/next pointers for an item in a linked list. +/// +/// # Invariants +/// +/// The fields are null if and only if this item is not in a list. +#[repr(transparent)] +pub struct ListLinks<const ID: u64 = 0> { + // This type is `!Unpin` for aliasing reasons as the pointers are part of an intrusive linked + // list. + inner: Opaque<ListLinksFields>, +} + +// SAFETY: The only way to access/modify the pointers inside of `ListLinks<ID>` is via holding the +// associated `ListArc<T, ID>`. Since that type correctly implements `Send`, it is impossible to +// move this an instance of this type to a different thread if the pointees are `!Send`. +unsafe impl<const ID: u64> Send for ListLinks<ID> {} +// SAFETY: The type is opaque so immutable references to a ListLinks are useless. Therefore, it's +// okay to have immutable access to a ListLinks from several threads at once. +unsafe impl<const ID: u64> Sync for ListLinks<ID> {} + +impl<const ID: u64> ListLinks<ID> { + /// Creates a new initializer for this type. + pub fn new() -> impl PinInit<Self> { + // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will + // not be constructed in an `Arc` that already has a `ListArc`. + ListLinks { + inner: Opaque::new(ListLinksFields { + prev: ptr::null_mut(), + next: ptr::null_mut(), + }), + } + } + + /// # Safety + /// + /// `me` must be dereferenceable. + #[inline] + unsafe fn fields(me: *mut Self) -> *mut ListLinksFields { + // SAFETY: The caller promises that the pointer is valid. + unsafe { Opaque::raw_get(ptr::addr_of!((*me).inner)) } + } + + /// # Safety + /// + /// `me` must be dereferenceable. + #[inline] + unsafe fn from_fields(me: *mut ListLinksFields) -> *mut Self { + me.cast() + } +} + +/// Similar to [`ListLinks`], but also contains a pointer to the full value. +/// +/// This type can be used instead of [`ListLinks`] to support lists with trait objects. +#[repr(C)] +pub struct ListLinksSelfPtr<T: ?Sized, const ID: u64 = 0> { + /// The `ListLinks` field inside this value. + /// + /// This is public so that it can be used with `impl_has_list_links!`. + pub inner: ListLinks<ID>, + // UnsafeCell is not enough here because we use `Opaque::uninit` as a dummy value, and + // `ptr::null()` doesn't work for `T: ?Sized`. + self_ptr: Opaque<*const T>, +} + +// SAFETY: The fields of a ListLinksSelfPtr can be moved across thread boundaries. +unsafe impl<T: ?Sized + Send, const ID: u64> Send for ListLinksSelfPtr<T, ID> {} +// SAFETY: The type is opaque so immutable references to a ListLinksSelfPtr are useless. Therefore, +// it's okay to have immutable access to a ListLinks from several threads at once. +// +// Note that `inner` being a public field does not prevent this type from being opaque, since +// `inner` is a opaque type. +unsafe impl<T: ?Sized + Sync, const ID: u64> Sync for ListLinksSelfPtr<T, ID> {} + +impl<T: ?Sized, const ID: u64> ListLinksSelfPtr<T, ID> { + /// The offset from the [`ListLinks`] to the self pointer field. + pub const LIST_LINKS_SELF_PTR_OFFSET: usize = core::mem::offset_of!(Self, self_ptr); + + /// Creates a new initializer for this type. + pub fn new() -> impl PinInit<Self> { + // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will + // not be constructed in an `Arc` that already has a `ListArc`. + Self { + inner: ListLinks { + inner: Opaque::new(ListLinksFields { + prev: ptr::null_mut(), + next: ptr::null_mut(), + }), + }, + self_ptr: Opaque::uninit(), + } + } +} + +impl<T: ?Sized + ListItem<ID>, const ID: u64> List<T, ID> { + /// Creates a new empty list. + pub const fn new() -> Self { + Self { + first: ptr::null_mut(), + _ty: PhantomData, + } + } + + /// Returns whether this list is empty. + pub fn is_empty(&self) -> bool { + self.first.is_null() + } + + /// Add the provided item to the back of the list. + pub fn push_back(&mut self, item: ListArc<T, ID>) { + let raw_item = ListArc::into_raw(item); + // SAFETY: + // * We just got `raw_item` from a `ListArc`, so it's in an `Arc`. + // * Since we have ownership of the `ListArc`, `post_remove` must have been called after + // the most recent call to `prepare_to_insert`, if any. + // * We own the `ListArc`. + // * Removing items from this list is always done using `remove_internal_inner`, which + // calls `post_remove` before giving up ownership. + let list_links = unsafe { T::prepare_to_insert(raw_item) }; + // SAFETY: We have not yet called `post_remove`, so `list_links` is still valid. + let item = unsafe { ListLinks::fields(list_links) }; + + if self.first.is_null() { + self.first = item; + // SAFETY: The caller just gave us ownership of these fields. + // INVARIANT: A linked list with one item should be cyclic. + unsafe { + (*item).next = item; + (*item).prev = item; + } + } else { + let next = self.first; + // SAFETY: By the type invariant, this pointer is valid or null. We just checked that + // it's not null, so it must be valid. + let prev = unsafe { (*next).prev }; + // SAFETY: Pointers in a linked list are never dangling, and the caller just gave us + // ownership of the fields on `item`. + // INVARIANT: This correctly inserts `item` between `prev` and `next`. + unsafe { + (*item).next = next; + (*item).prev = prev; + (*prev).next = item; + (*next).prev = item; + } + } + } + + /// Add the provided item to the front of the list. + pub fn push_front(&mut self, item: ListArc<T, ID>) { + let raw_item = ListArc::into_raw(item); + // SAFETY: + // * We just got `raw_item` from a `ListArc`, so it's in an `Arc`. + // * If this requirement is violated, then the previous caller of `prepare_to_insert` + // violated the safety requirement that they can't give up ownership of the `ListArc` + // until they call `post_remove`. + // * We own the `ListArc`. + // * Removing items] from this list is always done using `remove_internal_inner`, which + // calls `post_remove` before giving up ownership. + let list_links = unsafe { T::prepare_to_insert(raw_item) }; + // SAFETY: We have not yet called `post_remove`, so `list_links` is still valid. + let item = unsafe { ListLinks::fields(list_links) }; + + if self.first.is_null() { + // SAFETY: The caller just gave us ownership of these fields. + // INVARIANT: A linked list with one item should be cyclic. + unsafe { + (*item).next = item; + (*item).prev = item; + } + } else { + let next = self.first; + // SAFETY: We just checked that `next` is non-null. + let prev = unsafe { (*next).prev }; + // SAFETY: Pointers in a linked list are never dangling, and the caller just gave us + // ownership of the fields on `item`. + // INVARIANT: This correctly inserts `item` between `prev` and `next`. + unsafe { + (*item).next = next; + (*item).prev = prev; + (*prev).next = item; + (*next).prev = item; + } + } + self.first = item; + } + + /// Removes the last item from this list. + pub fn pop_back(&mut self) -> Option<ListArc<T, ID>> { + if self.first.is_null() { + return None; + } + + // SAFETY: We just checked that the list is not empty. + let last = unsafe { (*self.first).prev }; + // SAFETY: The last item of this list is in this list. + Some(unsafe { self.remove_internal(last) }) + } + + /// Removes the first item from this list. + pub fn pop_front(&mut self) -> Option<ListArc<T, ID>> { + if self.first.is_null() { + return None; + } + + // SAFETY: The first item of this list is in this list. + Some(unsafe { self.remove_internal(self.first) }) + } + + /// Removes the provided item from this list and returns it. + /// + /// This returns `None` if the item is not in the list. (Note that by the safety requirements, + /// this means that the item is not in any list.) + /// + /// # Safety + /// + /// `item` must not be in a different linked list (with the same id). + pub unsafe fn remove(&mut self, item: &T) -> Option<ListArc<T, ID>> { + let mut item = unsafe { ListLinks::fields(T::view_links(item)) }; + // SAFETY: The user provided a reference, and reference are never dangling. + // + // As for why this is not a data race, there are two cases: + // + // * If `item` is not in any list, then these fields are read-only and null. + // * If `item` is in this list, then we have exclusive access to these fields since we + // have a mutable reference to the list. + // + // In either case, there's no race. + let ListLinksFields { next, prev } = unsafe { *item }; + + debug_assert_eq!(next.is_null(), prev.is_null()); + if !next.is_null() { + // This is really a no-op, but this ensures that `item` is a raw pointer that was + // obtained without going through a pointer->reference->pointer conversion roundtrip. + // This ensures that the list is valid under the more restrictive strict provenance + // ruleset. + // + // SAFETY: We just checked that `next` is not null, and it's not dangling by the + // list invariants. + unsafe { + debug_assert_eq!(item, (*next).prev); + item = (*next).prev; + } + + // SAFETY: We just checked that `item` is in a list, so the caller guarantees that it + // is in this list. The pointers are in the right order. + Some(unsafe { self.remove_internal_inner(item, next, prev) }) + } else { + None + } + } + + /// Removes the provided item from the list. + /// + /// # Safety + /// + /// `item` must point at an item in this list. + unsafe fn remove_internal(&mut self, item: *mut ListLinksFields) -> ListArc<T, ID> { + // SAFETY: The caller promises that this pointer is not dangling, and there's no data race + // since we have a mutable reference to the list containing `item`. + let ListLinksFields { next, prev } = unsafe { *item }; + // SAFETY: The pointers are ok and in the right order. + unsafe { self.remove_internal_inner(item, next, prev) } + } + + /// Removes the provided item from the list. + /// + /// # Safety + /// + /// The `item` pointer must point at an item in this list, and we must have `(*item).next == + /// next` and `(*item).prev == prev`. + unsafe fn remove_internal_inner( + &mut self, + item: *mut ListLinksFields, + next: *mut ListLinksFields, + prev: *mut ListLinksFields, + ) -> ListArc<T, ID> { + // SAFETY: We have exclusive access to the pointers of items in the list, and the prev/next + // pointers are always valid for items in a list. + // + // INVARIANT: There are three cases: + // * If the list has at least three items, then after removing the item, `prev` and `next` + // will be next to each other. + // * If the list has two items, then the remaining item will point at itself. + // * If the list has one item, then `next == prev == item`, so these writes have no + // effect. The list remains unchanged and `item` is still in the list for now. + unsafe { + (*next).prev = prev; + (*prev).next = next; + } + // SAFETY: We have exclusive access to items in the list. + // INVARIANT: `item` is being removed, so the pointers should be null. + unsafe { + (*item).prev = ptr::null_mut(); + (*item).next = ptr::null_mut(); + } + // INVARIANT: There are three cases: + // * If `item` was not the first item, then `self.first` should remain unchanged. + // * If `item` was the first item and there is another item, then we just updated + // `prev->next` to `next`, which is the new first item, and setting `item->next` to null + // did not modify `prev->next`. + // * If `item` was the only item in the list, then `prev == item`, and we just set + // `item->next` to null, so this correctly sets `first` to null now that the list is + // empty. + if self.first == item { + // SAFETY: The `prev` pointer is the value that `item->prev` had when it was in this + // list, so it must be valid. There is no race since `prev` is still in the list and we + // still have exclusive access to the list. + self.first = unsafe { (*prev).next }; + } + + // SAFETY: `item` used to be in the list, so it is dereferenceable by the type invariants + // of `List`. + let list_links = unsafe { ListLinks::from_fields(item) }; + // SAFETY: Any pointer in the list originates from a `prepare_to_insert` call. + let raw_item = unsafe { T::post_remove(list_links) }; + // SAFETY: The above call to `post_remove` guarantees that we can recreate the `ListArc`. + unsafe { ListArc::from_raw(raw_item) } + } + + /// Moves all items from `other` into `self`. + /// + /// The items of `other` are added to the back of `self`, so the last item of `other` becomes + /// the last item of `self`. + pub fn push_all_back(&mut self, other: &mut List<T, ID>) { + // First, we insert the elements into `self`. At the end, we make `other` empty. + if self.is_empty() { + // INVARIANT: All of the elements in `other` become elements of `self`. + self.first = other.first; + } else if !other.is_empty() { + let other_first = other.first; + // SAFETY: The other list is not empty, so this pointer is valid. + let other_last = unsafe { (*other_first).prev }; + let self_first = self.first; + // SAFETY: The self list is not empty, so this pointer is valid. + let self_last = unsafe { (*self_first).prev }; + + // SAFETY: We have exclusive access to both lists, so we can update the pointers. + // INVARIANT: This correctly sets the pointers to merge both lists. We do not need to + // update `self.first` because the first element of `self` does not change. + unsafe { + (*self_first).prev = other_last; + (*other_last).next = self_first; + (*self_last).next = other_first; + (*other_first).prev = self_last; + } + } + + // INVARIANT: The other list is now empty, so update its pointer. + other.first = ptr::null_mut(); + } + + /// Returns a cursor to the first element of the list. + /// + /// If the list is empty, this returns `None`. + pub fn cursor_front(&mut self) -> Option<Cursor<'_, T, ID>> { + if self.first.is_null() { + None + } else { + Some(Cursor { + current: self.first, + list: self, + }) + } + } + + /// Creates an iterator over the list. + pub fn iter(&self) -> Iter<'_, T, ID> { + // INVARIANT: If the list is empty, both pointers are null. Otherwise, both pointers point + // at the first element of the same list. + Iter { + current: self.first, + stop: self.first, + _ty: PhantomData, + } + } +} + +impl<T: ?Sized + ListItem<ID>, const ID: u64> Default for List<T, ID> { + fn default() -> Self { + List::new() + } +} + +impl<T: ?Sized + ListItem<ID>, const ID: u64> Drop for List<T, ID> { + fn drop(&mut self) { + while let Some(item) = self.pop_front() { + drop(item); + } + } +} + +/// An iterator over a [`List`]. +/// +/// # Invariants +/// +/// * There must be a [`List`] that is immutably borrowed for the duration of `'a`. +/// * The `current` pointer is null or points at a value in that [`List`]. +/// * The `stop` pointer is equal to the `first` field of that [`List`]. +#[derive(Clone)] +pub struct Iter<'a, T: ?Sized + ListItem<ID>, const ID: u64 = 0> { + current: *mut ListLinksFields, + stop: *mut ListLinksFields, + _ty: PhantomData<&'a ListArc<T, ID>>, +} + +impl<'a, T: ?Sized + ListItem<ID>, const ID: u64> Iterator for Iter<'a, T, ID> { + type Item = ArcBorrow<'a, T>; + + fn next(&mut self) -> Option<ArcBorrow<'a, T>> { + if self.current.is_null() { + return None; + } + + let current = self.current; + + // SAFETY: We just checked that `current` is not null, so it is in a list, and hence not + // dangling. There's no race because the iterator holds an immutable borrow to the list. + let next = unsafe { (*current).next }; + // INVARIANT: If `current` was the last element of the list, then this updates it to null. + // Otherwise, we update it to the next element. + self.current = if next != self.stop { + next + } else { + ptr::null_mut() + }; + + // SAFETY: The `current` pointer points at a value in the list. + let item = unsafe { T::view_value(ListLinks::from_fields(current)) }; + // SAFETY: + // * All values in a list are stored in an `Arc`. + // * The value cannot be removed from the list for the duration of the lifetime annotated + // on the returned `ArcBorrow`, because removing it from the list would require mutable + // access to the list. However, the `ArcBorrow` is annotated with the iterator's + // lifetime, and the list is immutably borrowed for that lifetime. + // * Values in a list never have a `UniqueArc` reference. + Some(unsafe { ArcBorrow::from_raw(item) }) + } +} + +/// A cursor into a [`List`]. +/// +/// # Invariants +/// +/// The `current` pointer points a value in `list`. +pub struct Cursor<'a, T: ?Sized + ListItem<ID>, const ID: u64 = 0> { + current: *mut ListLinksFields, + list: &'a mut List<T, ID>, +} + +impl<'a, T: ?Sized + ListItem<ID>, const ID: u64> Cursor<'a, T, ID> { + /// Access the current element of this cursor. + pub fn current(&self) -> ArcBorrow<'_, T> { + // SAFETY: The `current` pointer points a value in the list. + let me = unsafe { T::view_value(ListLinks::from_fields(self.current)) }; + // SAFETY: + // * All values in a list are stored in an `Arc`. + // * The value cannot be removed from the list for the duration of the lifetime annotated + // on the returned `ArcBorrow`, because removing it from the list would require mutable + // access to the cursor or the list. However, the `ArcBorrow` holds an immutable borrow + // on the cursor, which in turn holds a mutable borrow on the list, so any such + // mutable access requires first releasing the immutable borrow on the cursor. + // * Values in a list never have a `UniqueArc` reference, because the list has a `ListArc` + // reference, and `UniqueArc` references must be unique. + unsafe { ArcBorrow::from_raw(me) } + } + + /// Move the cursor to the next element. + pub fn next(self) -> Option<Cursor<'a, T, ID>> { + // SAFETY: The `current` field is always in a list. + let next = unsafe { (*self.current).next }; + + if next == self.list.first { + None + } else { + // INVARIANT: Since `self.current` is in the `list`, its `next` pointer is also in the + // `list`. + Some(Cursor { + current: next, + list: self.list, + }) + } + } + + /// Move the cursor to the previous element. + pub fn prev(self) -> Option<Cursor<'a, T, ID>> { + // SAFETY: The `current` field is always in a list. + let prev = unsafe { (*self.current).prev }; + + if self.current == self.list.first { + None + } else { + // INVARIANT: Since `self.current` is in the `list`, its `prev` pointer is also in the + // `list`. + Some(Cursor { + current: prev, + list: self.list, + }) + } + } + + /// Remove the current element from the list. + pub fn remove(self) -> ListArc<T, ID> { + // SAFETY: The `current` pointer always points at a member of the list. + unsafe { self.list.remove_internal(self.current) } + } +} + +impl<'a, T: ?Sized + ListItem<ID>, const ID: u64> FusedIterator for Iter<'a, T, ID> {} + +impl<'a, T: ?Sized + ListItem<ID>, const ID: u64> IntoIterator for &'a List<T, ID> { + type IntoIter = Iter<'a, T, ID>; + type Item = ArcBorrow<'a, T>; + + fn into_iter(self) -> Iter<'a, T, ID> { + self.iter() + } +} + +/// An owning iterator into a [`List`]. +pub struct IntoIter<T: ?Sized + ListItem<ID>, const ID: u64 = 0> { + list: List<T, ID>, +} + +impl<T: ?Sized + ListItem<ID>, const ID: u64> Iterator for IntoIter<T, ID> { + type Item = ListArc<T, ID>; + + fn next(&mut self) -> Option<ListArc<T, ID>> { + self.list.pop_front() + } +} + +impl<T: ?Sized + ListItem<ID>, const ID: u64> FusedIterator for IntoIter<T, ID> {} + +impl<T: ?Sized + ListItem<ID>, const ID: u64> DoubleEndedIterator for IntoIter<T, ID> { + fn next_back(&mut self) -> Option<ListArc<T, ID>> { + self.list.pop_back() + } +} + +impl<T: ?Sized + ListItem<ID>, const ID: u64> IntoIterator for List<T, ID> { + type IntoIter = IntoIter<T, ID>; + type Item = ListArc<T, ID>; + + fn into_iter(self) -> IntoIter<T, ID> { + IntoIter { list: self } + } +} diff --git a/rust/kernel/list/arc.rs b/rust/kernel/list/arc.rs new file mode 100644 index 000000000000..d801b9dc6291 --- /dev/null +++ b/rust/kernel/list/arc.rs @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! A wrapper around `Arc` for linked lists. + +use crate::alloc::{AllocError, Flags}; +use crate::prelude::*; +use crate::sync::{Arc, ArcBorrow, UniqueArc}; +use core::marker::{PhantomPinned, Unsize}; +use core::ops::Deref; +use core::pin::Pin; +use core::sync::atomic::{AtomicBool, Ordering}; + +/// Declares that this type has some way to ensure that there is exactly one `ListArc` instance for +/// this id. +/// +/// Types that implement this trait should include some kind of logic for keeping track of whether +/// a [`ListArc`] exists or not. We refer to this logic as "the tracking inside `T`". +/// +/// We allow the case where the tracking inside `T` thinks that a [`ListArc`] exists, but actually, +/// there isn't a [`ListArc`]. However, we do not allow the opposite situation where a [`ListArc`] +/// exists, but the tracking thinks it doesn't. This is because the former can at most result in us +/// failing to create a [`ListArc`] when the operation could succeed, whereas the latter can result +/// in the creation of two [`ListArc`] references. Only the latter situation can lead to memory +/// safety issues. +/// +/// A consequence of the above is that you may implement the tracking inside `T` by not actually +/// keeping track of anything. To do this, you always claim that a [`ListArc`] exists, even if +/// there isn't one. This implementation is allowed by the above rule, but it means that +/// [`ListArc`] references can only be created if you have ownership of *all* references to the +/// refcounted object, as you otherwise have no way of knowing whether a [`ListArc`] exists. +pub trait ListArcSafe<const ID: u64 = 0> { + /// Informs the tracking inside this type that it now has a [`ListArc`] reference. + /// + /// This method may be called even if the tracking inside this type thinks that a `ListArc` + /// reference exists. (But only if that's not actually the case.) + /// + /// # Safety + /// + /// Must not be called if a [`ListArc`] already exist for this value. + unsafe fn on_create_list_arc_from_unique(self: Pin<&mut Self>); + + /// Informs the tracking inside this type that there is no [`ListArc`] reference anymore. + /// + /// # Safety + /// + /// Must only be called if there is no [`ListArc`] reference, but the tracking thinks there is. + unsafe fn on_drop_list_arc(&self); +} + +/// Declares that this type is able to safely attempt to create `ListArc`s at any time. +/// +/// # Safety +/// +/// The guarantees of `try_new_list_arc` must be upheld. +pub unsafe trait TryNewListArc<const ID: u64 = 0>: ListArcSafe<ID> { + /// Attempts to convert an `Arc<Self>` into an `ListArc<Self>`. Returns `true` if the + /// conversion was successful. + /// + /// This method should not be called directly. Use [`ListArc::try_from_arc`] instead. + /// + /// # Guarantees + /// + /// If this call returns `true`, then there is no [`ListArc`] pointing to this value. + /// Additionally, this call will have transitioned the tracking inside `Self` from not thinking + /// that a [`ListArc`] exists, to thinking that a [`ListArc`] exists. + fn try_new_list_arc(&self) -> bool; +} + +/// Declares that this type supports [`ListArc`]. +/// +/// This macro supports a few different strategies for implementing the tracking inside the type: +/// +/// * The `untracked` strategy does not actually keep track of whether a [`ListArc`] exists. When +/// using this strategy, the only way to create a [`ListArc`] is using a [`UniqueArc`]. +/// * The `tracked_by` strategy defers the tracking to a field of the struct. The user much specify +/// which field to defer the tracking to. The field must implement [`ListArcSafe`]. If the field +/// implements [`TryNewListArc`], then the type will also implement [`TryNewListArc`]. +/// +/// The `tracked_by` strategy is usually used by deferring to a field of type +/// [`AtomicTracker`]. However, it is also possible to defer the tracking to another struct +/// using also using this macro. +#[macro_export] +macro_rules! impl_list_arc_safe { + (impl$({$($generics:tt)*})? ListArcSafe<$num:tt> for $t:ty { untracked; } $($rest:tt)*) => { + impl$(<$($generics)*>)? $crate::list::ListArcSafe<$num> for $t { + unsafe fn on_create_list_arc_from_unique(self: ::core::pin::Pin<&mut Self>) {} + unsafe fn on_drop_list_arc(&self) {} + } + $crate::list::impl_list_arc_safe! { $($rest)* } + }; + + (impl$({$($generics:tt)*})? ListArcSafe<$num:tt> for $t:ty { + tracked_by $field:ident : $fty:ty; + } $($rest:tt)*) => { + impl$(<$($generics)*>)? $crate::list::ListArcSafe<$num> for $t { + unsafe fn on_create_list_arc_from_unique(self: ::core::pin::Pin<&mut Self>) { + $crate::assert_pinned!($t, $field, $fty, inline); + + // SAFETY: This field is structurally pinned as per the above assertion. + let field = unsafe { + ::core::pin::Pin::map_unchecked_mut(self, |me| &mut me.$field) + }; + // SAFETY: The caller promises that there is no `ListArc`. + unsafe { + <$fty as $crate::list::ListArcSafe<$num>>::on_create_list_arc_from_unique(field) + }; + } + unsafe fn on_drop_list_arc(&self) { + // SAFETY: The caller promises that there is no `ListArc` reference, and also + // promises that the tracking thinks there is a `ListArc` reference. + unsafe { <$fty as $crate::list::ListArcSafe<$num>>::on_drop_list_arc(&self.$field) }; + } + } + unsafe impl$(<$($generics)*>)? $crate::list::TryNewListArc<$num> for $t + where + $fty: TryNewListArc<$num>, + { + fn try_new_list_arc(&self) -> bool { + <$fty as $crate::list::TryNewListArc<$num>>::try_new_list_arc(&self.$field) + } + } + $crate::list::impl_list_arc_safe! { $($rest)* } + }; + + () => {}; +} +pub use impl_list_arc_safe; + +/// A wrapper around [`Arc`] that's guaranteed unique for the given id. +/// +/// The `ListArc` type can be thought of as a special reference to a refcounted object that owns the +/// permission to manipulate the `next`/`prev` pointers stored in the refcounted object. By ensuring +/// that each object has only one `ListArc` reference, the owner of that reference is assured +/// exclusive access to the `next`/`prev` pointers. When a `ListArc` is inserted into a [`List`], +/// the [`List`] takes ownership of the `ListArc` reference. +/// +/// There are various strategies to ensuring that a value has only one `ListArc` reference. The +/// simplest is to convert a [`UniqueArc`] into a `ListArc`. However, the refcounted object could +/// also keep track of whether a `ListArc` exists using a boolean, which could allow for the +/// creation of new `ListArc` references from an [`Arc`] reference. Whatever strategy is used, the +/// relevant tracking is referred to as "the tracking inside `T`", and the [`ListArcSafe`] trait +/// (and its subtraits) are used to update the tracking when a `ListArc` is created or destroyed. +/// +/// Note that we allow the case where the tracking inside `T` thinks that a `ListArc` exists, but +/// actually, there isn't a `ListArc`. However, we do not allow the opposite situation where a +/// `ListArc` exists, but the tracking thinks it doesn't. This is because the former can at most +/// result in us failing to create a `ListArc` when the operation could succeed, whereas the latter +/// can result in the creation of two `ListArc` references. +/// +/// While this `ListArc` is unique for the given id, there still might exist normal `Arc` +/// references to the object. +/// +/// # Invariants +/// +/// * Each reference counted object has at most one `ListArc` for each value of `ID`. +/// * The tracking inside `T` is aware that a `ListArc` reference exists. +/// +/// [`List`]: crate::list::List +#[repr(transparent)] +pub struct ListArc<T, const ID: u64 = 0> +where + T: ListArcSafe<ID> + ?Sized, +{ + arc: Arc<T>, +} + +impl<T: ListArcSafe<ID>, const ID: u64> ListArc<T, ID> { + /// Constructs a new reference counted instance of `T`. + #[inline] + pub fn new(contents: T, flags: Flags) -> Result<Self, AllocError> { + Ok(Self::from(UniqueArc::new(contents, flags)?)) + } + + /// Use the given initializer to in-place initialize a `T`. + /// + /// If `T: !Unpin` it will not be able to move afterwards. + // We don't implement `InPlaceInit` because `ListArc` is implicitly pinned. This is similar to + // what we do for `Arc`. + #[inline] + pub fn pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> Result<Self, E> + where + E: From<AllocError>, + { + Ok(Self::from(UniqueArc::try_pin_init(init, flags)?)) + } + + /// Use the given initializer to in-place initialize a `T`. + /// + /// This is equivalent to [`ListArc<T>::pin_init`], since a [`ListArc`] is always pinned. + #[inline] + pub fn init<E>(init: impl Init<T, E>, flags: Flags) -> Result<Self, E> + where + E: From<AllocError>, + { + Ok(Self::from(UniqueArc::try_init(init, flags)?)) + } +} + +impl<T, const ID: u64> From<UniqueArc<T>> for ListArc<T, ID> +where + T: ListArcSafe<ID> + ?Sized, +{ + /// Convert a [`UniqueArc`] into a [`ListArc`]. + #[inline] + fn from(unique: UniqueArc<T>) -> Self { + Self::from(Pin::from(unique)) + } +} + +impl<T, const ID: u64> From<Pin<UniqueArc<T>>> for ListArc<T, ID> +where + T: ListArcSafe<ID> + ?Sized, +{ + /// Convert a pinned [`UniqueArc`] into a [`ListArc`]. + #[inline] + fn from(mut unique: Pin<UniqueArc<T>>) -> Self { + // SAFETY: We have a `UniqueArc`, so there is no `ListArc`. + unsafe { T::on_create_list_arc_from_unique(unique.as_mut()) }; + let arc = Arc::from(unique); + // SAFETY: We just called `on_create_list_arc_from_unique` on an arc without a `ListArc`, + // so we can create a `ListArc`. + unsafe { Self::transmute_from_arc(arc) } + } +} + +impl<T, const ID: u64> ListArc<T, ID> +where + T: ListArcSafe<ID> + ?Sized, +{ + /// Creates two `ListArc`s from a [`UniqueArc`]. + /// + /// The two ids must be different. + #[inline] + pub fn pair_from_unique<const ID2: u64>(unique: UniqueArc<T>) -> (Self, ListArc<T, ID2>) + where + T: ListArcSafe<ID2>, + { + Self::pair_from_pin_unique(Pin::from(unique)) + } + + /// Creates two `ListArc`s from a pinned [`UniqueArc`]. + /// + /// The two ids must be different. + #[inline] + pub fn pair_from_pin_unique<const ID2: u64>( + mut unique: Pin<UniqueArc<T>>, + ) -> (Self, ListArc<T, ID2>) + where + T: ListArcSafe<ID2>, + { + build_assert!(ID != ID2); + + // SAFETY: We have a `UniqueArc`, so there is no `ListArc`. + unsafe { <T as ListArcSafe<ID>>::on_create_list_arc_from_unique(unique.as_mut()) }; + // SAFETY: We have a `UniqueArc`, so there is no `ListArc`. + unsafe { <T as ListArcSafe<ID2>>::on_create_list_arc_from_unique(unique.as_mut()) }; + + let arc1 = Arc::from(unique); + let arc2 = Arc::clone(&arc1); + + // SAFETY: We just called `on_create_list_arc_from_unique` on an arc without a `ListArc` + // for both IDs (which are different), so we can create two `ListArc`s. + unsafe { + ( + Self::transmute_from_arc(arc1), + ListArc::transmute_from_arc(arc2), + ) + } + } + + /// Try to create a new `ListArc`. + /// + /// This fails if this value already has a `ListArc`. + pub fn try_from_arc(arc: Arc<T>) -> Result<Self, Arc<T>> + where + T: TryNewListArc<ID>, + { + if arc.try_new_list_arc() { + // SAFETY: The `try_new_list_arc` method returned true, so we made the tracking think + // that a `ListArc` exists. This lets us create a `ListArc`. + Ok(unsafe { Self::transmute_from_arc(arc) }) + } else { + Err(arc) + } + } + + /// Try to create a new `ListArc`. + /// + /// This fails if this value already has a `ListArc`. + pub fn try_from_arc_borrow(arc: ArcBorrow<'_, T>) -> Option<Self> + where + T: TryNewListArc<ID>, + { + if arc.try_new_list_arc() { + // SAFETY: The `try_new_list_arc` method returned true, so we made the tracking think + // that a `ListArc` exists. This lets us create a `ListArc`. + Some(unsafe { Self::transmute_from_arc(Arc::from(arc)) }) + } else { + None + } + } + + /// Try to create a new `ListArc`. + /// + /// If it's not possible to create a new `ListArc`, then the `Arc` is dropped. This will never + /// run the destructor of the value. + pub fn try_from_arc_or_drop(arc: Arc<T>) -> Option<Self> + where + T: TryNewListArc<ID>, + { + match Self::try_from_arc(arc) { + Ok(list_arc) => Some(list_arc), + Err(arc) => Arc::into_unique_or_drop(arc).map(Self::from), + } + } + + /// Transmutes an [`Arc`] into a `ListArc` without updating the tracking inside `T`. + /// + /// # Safety + /// + /// * The value must not already have a `ListArc` reference. + /// * The tracking inside `T` must think that there is a `ListArc` reference. + #[inline] + unsafe fn transmute_from_arc(arc: Arc<T>) -> Self { + // INVARIANT: By the safety requirements, the invariants on `ListArc` are satisfied. + Self { arc } + } + + /// Transmutes a `ListArc` into an [`Arc`] without updating the tracking inside `T`. + /// + /// After this call, the tracking inside `T` will still think that there is a `ListArc` + /// reference. + #[inline] + fn transmute_to_arc(self) -> Arc<T> { + // Use a transmute to skip destructor. + // + // SAFETY: ListArc is repr(transparent). + unsafe { core::mem::transmute(self) } + } + + /// Convert ownership of this `ListArc` into a raw pointer. + /// + /// The returned pointer is indistinguishable from pointers returned by [`Arc::into_raw`]. The + /// tracking inside `T` will still think that a `ListArc` exists after this call. + #[inline] + pub fn into_raw(self) -> *const T { + Arc::into_raw(Self::transmute_to_arc(self)) + } + + /// Take ownership of the `ListArc` from a raw pointer. + /// + /// # Safety + /// + /// * `ptr` must satisfy the safety requirements of [`Arc::from_raw`]. + /// * The value must not already have a `ListArc` reference. + /// * The tracking inside `T` must think that there is a `ListArc` reference. + #[inline] + pub unsafe fn from_raw(ptr: *const T) -> Self { + // SAFETY: The pointer satisfies the safety requirements for `Arc::from_raw`. + let arc = unsafe { Arc::from_raw(ptr) }; + // SAFETY: The value doesn't already have a `ListArc` reference, but the tracking thinks it + // does. + unsafe { Self::transmute_from_arc(arc) } + } + + /// Converts the `ListArc` into an [`Arc`]. + #[inline] + pub fn into_arc(self) -> Arc<T> { + let arc = Self::transmute_to_arc(self); + // SAFETY: There is no longer a `ListArc`, but the tracking thinks there is. + unsafe { T::on_drop_list_arc(&arc) }; + arc + } + + /// Clone a `ListArc` into an [`Arc`]. + #[inline] + pub fn clone_arc(&self) -> Arc<T> { + self.arc.clone() + } + + /// Returns a reference to an [`Arc`] from the given [`ListArc`]. + /// + /// This is useful when the argument of a function call is an [`&Arc`] (e.g., in a method + /// receiver), but we have a [`ListArc`] instead. + /// + /// [`&Arc`]: Arc + #[inline] + pub fn as_arc(&self) -> &Arc<T> { + &self.arc + } + + /// Returns an [`ArcBorrow`] from the given [`ListArc`]. + /// + /// This is useful when the argument of a function call is an [`ArcBorrow`] (e.g., in a method + /// receiver), but we have an [`Arc`] instead. Getting an [`ArcBorrow`] is free when optimised. + #[inline] + pub fn as_arc_borrow(&self) -> ArcBorrow<'_, T> { + self.arc.as_arc_borrow() + } + + /// Compare whether two [`ListArc`] pointers reference the same underlying object. + #[inline] + pub fn ptr_eq(this: &Self, other: &Self) -> bool { + Arc::ptr_eq(&this.arc, &other.arc) + } +} + +impl<T, const ID: u64> Deref for ListArc<T, ID> +where + T: ListArcSafe<ID> + ?Sized, +{ + type Target = T; + + #[inline] + fn deref(&self) -> &Self::Target { + self.arc.deref() + } +} + +impl<T, const ID: u64> Drop for ListArc<T, ID> +where + T: ListArcSafe<ID> + ?Sized, +{ + #[inline] + fn drop(&mut self) { + // SAFETY: There is no longer a `ListArc`, but the tracking thinks there is by the type + // invariants on `Self`. + unsafe { T::on_drop_list_arc(&self.arc) }; + } +} + +impl<T, const ID: u64> AsRef<Arc<T>> for ListArc<T, ID> +where + T: ListArcSafe<ID> + ?Sized, +{ + #[inline] + fn as_ref(&self) -> &Arc<T> { + self.as_arc() + } +} + +// This is to allow [`ListArc`] (and variants) to be used as the type of `self`. +impl<T, const ID: u64> core::ops::Receiver for ListArc<T, ID> where T: ListArcSafe<ID> + ?Sized {} + +// This is to allow coercion from `ListArc<T>` to `ListArc<U>` if `T` can be converted to the +// dynamically-sized type (DST) `U`. +impl<T, U, const ID: u64> core::ops::CoerceUnsized<ListArc<U, ID>> for ListArc<T, ID> +where + T: ListArcSafe<ID> + Unsize<U> + ?Sized, + U: ListArcSafe<ID> + ?Sized, +{ +} + +// This is to allow `ListArc<U>` to be dispatched on when `ListArc<T>` can be coerced into +// `ListArc<U>`. +impl<T, U, const ID: u64> core::ops::DispatchFromDyn<ListArc<U, ID>> for ListArc<T, ID> +where + T: ListArcSafe<ID> + Unsize<U> + ?Sized, + U: ListArcSafe<ID> + ?Sized, +{ +} + +/// A utility for tracking whether a [`ListArc`] exists using an atomic. +/// +/// # Invariant +/// +/// If the boolean is `false`, then there is no [`ListArc`] for this value. +#[repr(transparent)] +pub struct AtomicTracker<const ID: u64 = 0> { + inner: AtomicBool, + // This value needs to be pinned to justify the INVARIANT: comment in `AtomicTracker::new`. + _pin: PhantomPinned, +} + +impl<const ID: u64> AtomicTracker<ID> { + /// Creates a new initializer for this type. + pub fn new() -> impl PinInit<Self> { + // INVARIANT: Pin-init initializers can't be used on an existing `Arc`, so this value will + // not be constructed in an `Arc` that already has a `ListArc`. + Self { + inner: AtomicBool::new(false), + _pin: PhantomPinned, + } + } + + fn project_inner(self: Pin<&mut Self>) -> &mut AtomicBool { + // SAFETY: The `inner` field is not structurally pinned, so we may obtain a mutable + // reference to it even if we only have a pinned reference to `self`. + unsafe { &mut Pin::into_inner_unchecked(self).inner } + } +} + +impl<const ID: u64> ListArcSafe<ID> for AtomicTracker<ID> { + unsafe fn on_create_list_arc_from_unique(self: Pin<&mut Self>) { + // INVARIANT: We just created a ListArc, so the boolean should be true. + *self.project_inner().get_mut() = true; + } + + unsafe fn on_drop_list_arc(&self) { + // INVARIANT: We just dropped a ListArc, so the boolean should be false. + self.inner.store(false, Ordering::Release); + } +} + +// SAFETY: If this method returns `true`, then by the type invariant there is no `ListArc` before +// this call, so it is okay to create a new `ListArc`. +// +// The acquire ordering will synchronize with the release store from the destruction of any +// previous `ListArc`, so if there was a previous `ListArc`, then the destruction of the previous +// `ListArc` happens-before the creation of the new `ListArc`. +unsafe impl<const ID: u64> TryNewListArc<ID> for AtomicTracker<ID> { + fn try_new_list_arc(&self) -> bool { + // INVARIANT: If this method returns true, then the boolean used to be false, and is no + // longer false, so it is okay for the caller to create a new [`ListArc`]. + self.inner + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + } +} diff --git a/rust/kernel/list/arc_field.rs b/rust/kernel/list/arc_field.rs new file mode 100644 index 000000000000..2330f673427a --- /dev/null +++ b/rust/kernel/list/arc_field.rs @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! A field that is exclusively owned by a [`ListArc`]. +//! +//! This can be used to have reference counted struct where one of the reference counted pointers +//! has exclusive access to a field of the struct. +//! +//! [`ListArc`]: crate::list::ListArc + +use core::cell::UnsafeCell; + +/// A field owned by a specific [`ListArc`]. +/// +/// [`ListArc`]: crate::list::ListArc +pub struct ListArcField<T, const ID: u64 = 0> { + value: UnsafeCell<T>, +} + +// SAFETY: If the inner type is thread-safe, then it's also okay for `ListArc` to be thread-safe. +unsafe impl<T: Send + Sync, const ID: u64> Send for ListArcField<T, ID> {} +// SAFETY: If the inner type is thread-safe, then it's also okay for `ListArc` to be thread-safe. +unsafe impl<T: Send + Sync, const ID: u64> Sync for ListArcField<T, ID> {} + +impl<T, const ID: u64> ListArcField<T, ID> { + /// Creates a new `ListArcField`. + pub fn new(value: T) -> Self { + Self { + value: UnsafeCell::new(value), + } + } + + /// Access the value when we have exclusive access to the `ListArcField`. + /// + /// This allows access to the field using an `UniqueArc` instead of a `ListArc`. + pub fn get_mut(&mut self) -> &mut T { + self.value.get_mut() + } + + /// Unsafely assert that you have shared access to the `ListArc` for this field. + /// + /// # Safety + /// + /// The caller must have shared access to the `ListArc<ID>` containing the struct with this + /// field for the duration of the returned reference. + pub unsafe fn assert_ref(&self) -> &T { + // SAFETY: The caller has shared access to the `ListArc`, so they also have shared access + // to this field. + unsafe { &*self.value.get() } + } + + /// Unsafely assert that you have mutable access to the `ListArc` for this field. + /// + /// # Safety + /// + /// The caller must have mutable access to the `ListArc<ID>` containing the struct with this + /// field for the duration of the returned reference. + #[allow(clippy::mut_from_ref)] + pub unsafe fn assert_mut(&self) -> &mut T { + // SAFETY: The caller has exclusive access to the `ListArc`, so they also have exclusive + // access to this field. + unsafe { &mut *self.value.get() } + } +} + +/// Defines getters for a [`ListArcField`]. +#[macro_export] +macro_rules! define_list_arc_field_getter { + ($pub:vis fn $name:ident(&self $(<$id:tt>)?) -> &$typ:ty { $field:ident } + $($rest:tt)* + ) => { + $pub fn $name<'a>(self: &'a $crate::list::ListArc<Self $(, $id)?>) -> &'a $typ { + let field = &(&**self).$field; + // SAFETY: We have a shared reference to the `ListArc`. + unsafe { $crate::list::ListArcField::<$typ $(, $id)?>::assert_ref(field) } + } + + $crate::list::define_list_arc_field_getter!($($rest)*); + }; + + ($pub:vis fn $name:ident(&mut self $(<$id:tt>)?) -> &mut $typ:ty { $field:ident } + $($rest:tt)* + ) => { + $pub fn $name<'a>(self: &'a mut $crate::list::ListArc<Self $(, $id)?>) -> &'a mut $typ { + let field = &(&**self).$field; + // SAFETY: We have a mutable reference to the `ListArc`. + unsafe { $crate::list::ListArcField::<$typ $(, $id)?>::assert_mut(field) } + } + + $crate::list::define_list_arc_field_getter!($($rest)*); + }; + + () => {}; +} +pub use define_list_arc_field_getter; diff --git a/rust/kernel/list/impl_list_item_mod.rs b/rust/kernel/list/impl_list_item_mod.rs new file mode 100644 index 000000000000..a0438537cee1 --- /dev/null +++ b/rust/kernel/list/impl_list_item_mod.rs @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Helpers for implementing list traits safely. + +use crate::list::ListLinks; + +/// Declares that this type has a `ListLinks<ID>` field at a fixed offset. +/// +/// This trait is only used to help implement `ListItem` safely. If `ListItem` is implemented +/// manually, then this trait is not needed. Use the [`impl_has_list_links!`] macro to implement +/// this trait. +/// +/// # Safety +/// +/// All values of this type must have a `ListLinks<ID>` field at the given offset. +/// +/// The behavior of `raw_get_list_links` must not be changed. +pub unsafe trait HasListLinks<const ID: u64 = 0> { + /// The offset of the `ListLinks` field. + const OFFSET: usize; + + /// Returns a pointer to the [`ListLinks<T, ID>`] field. + /// + /// # Safety + /// + /// The provided pointer must point at a valid struct of type `Self`. + /// + /// [`ListLinks<T, ID>`]: ListLinks + // We don't really need this method, but it's necessary for the implementation of + // `impl_has_list_links!` to be correct. + #[inline] + unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut ListLinks<ID> { + // SAFETY: The caller promises that the pointer is valid. The implementer promises that the + // `OFFSET` constant is correct. + unsafe { (ptr as *mut u8).add(Self::OFFSET) as *mut ListLinks<ID> } + } +} + +/// Implements the [`HasListLinks`] trait for the given type. +#[macro_export] +macro_rules! impl_has_list_links { + ($(impl$(<$($implarg:ident),*>)? + HasListLinks$(<$id:tt>)? + for $self:ident $(<$($selfarg:ty),*>)? + { self$(.$field:ident)* } + )*) => {$( + // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the + // right type. + // + // The behavior of `raw_get_list_links` is not changed since the `addr_of_mut!` macro is + // equivalent to the pointer offset operation in the trait definition. + unsafe impl$(<$($implarg),*>)? $crate::list::HasListLinks$(<$id>)? for + $self $(<$($selfarg),*>)? + { + const OFFSET: usize = ::core::mem::offset_of!(Self, $($field).*) as usize; + + #[inline] + unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { + // SAFETY: The caller promises that the pointer is not dangling. We know that this + // expression doesn't follow any pointers, as the `offset_of!` invocation above + // would otherwise not compile. + unsafe { ::core::ptr::addr_of_mut!((*ptr)$(.$field)*) } + } + } + )*}; +} +pub use impl_has_list_links; + +/// Declares that the `ListLinks<ID>` field in this struct is inside a `ListLinksSelfPtr<T, ID>`. +/// +/// # Safety +/// +/// The `ListLinks<ID>` field of this struct at the offset `HasListLinks<ID>::OFFSET` must be +/// inside a `ListLinksSelfPtr<T, ID>`. +pub unsafe trait HasSelfPtr<T: ?Sized, const ID: u64 = 0> +where + Self: HasListLinks<ID>, +{ +} + +/// Implements the [`HasListLinks`] and [`HasSelfPtr`] traits for the given type. +#[macro_export] +macro_rules! impl_has_list_links_self_ptr { + ($(impl$({$($implarg:tt)*})? + HasSelfPtr<$item_type:ty $(, $id:tt)?> + for $self:ident $(<$($selfarg:ty),*>)? + { self.$field:ident } + )*) => {$( + // SAFETY: The implementation of `raw_get_list_links` only compiles if the field has the + // right type. + unsafe impl$(<$($implarg)*>)? $crate::list::HasSelfPtr<$item_type $(, $id)?> for + $self $(<$($selfarg),*>)? + {} + + unsafe impl$(<$($implarg)*>)? $crate::list::HasListLinks$(<$id>)? for + $self $(<$($selfarg),*>)? + { + const OFFSET: usize = ::core::mem::offset_of!(Self, $field) as usize; + + #[inline] + unsafe fn raw_get_list_links(ptr: *mut Self) -> *mut $crate::list::ListLinks$(<$id>)? { + // SAFETY: The caller promises that the pointer is not dangling. + let ptr: *mut $crate::list::ListLinksSelfPtr<$item_type $(, $id)?> = + unsafe { ::core::ptr::addr_of_mut!((*ptr).$field) }; + ptr.cast() + } + } + )*}; +} +pub use impl_has_list_links_self_ptr; + +/// Implements the [`ListItem`] trait for the given type. +/// +/// Requires that the type implements [`HasListLinks`]. Use the [`impl_has_list_links!`] macro to +/// implement that trait. +/// +/// [`ListItem`]: crate::list::ListItem +#[macro_export] +macro_rules! impl_list_item { + ( + $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $t:ty { + using ListLinks; + })* + ) => {$( + // SAFETY: See GUARANTEES comment on each method. + unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $t { + // GUARANTEES: + // * This returns the same pointer as `prepare_to_insert` because `prepare_to_insert` + // is implemented in terms of `view_links`. + // * By the type invariants of `ListLinks`, the `ListLinks` has two null pointers when + // this value is not in a list. + unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> { + // SAFETY: The caller guarantees that `me` points at a valid value of type `Self`. + unsafe { + <Self as $crate::list::HasListLinks<$num>>::raw_get_list_links(me.cast_mut()) + } + } + + // GUARANTEES: + // * `me` originates from the most recent call to `prepare_to_insert`, which just added + // `offset` to the pointer passed to `prepare_to_insert`. This method subtracts + // `offset` from `me` so it returns the pointer originally passed to + // `prepare_to_insert`. + // * The pointer remains valid until the next call to `post_remove` because the caller + // of the most recent call to `prepare_to_insert` promised to retain ownership of the + // `ListArc` containing `Self` until the next call to `post_remove`. The value cannot + // be destroyed while a `ListArc` reference exists. + unsafe fn view_value(me: *mut $crate::list::ListLinks<$num>) -> *const Self { + let offset = <Self as $crate::list::HasListLinks<$num>>::OFFSET; + // SAFETY: `me` originates from the most recent call to `prepare_to_insert`, so it + // points at the field at offset `offset` in a value of type `Self`. Thus, + // subtracting `offset` from `me` is still in-bounds of the allocation. + unsafe { (me as *const u8).sub(offset) as *const Self } + } + + // GUARANTEES: + // This implementation of `ListItem` will not give out exclusive access to the same + // `ListLinks` several times because calls to `prepare_to_insert` and `post_remove` + // must alternate and exclusive access is given up when `post_remove` is called. + // + // Other invocations of `impl_list_item!` also cannot give out exclusive access to the + // same `ListLinks` because you can only implement `ListItem` once for each value of + // `ID`, and the `ListLinks` fields only work with the specified `ID`. + unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$num> { + // SAFETY: The caller promises that `me` points at a valid value. + unsafe { <Self as $crate::list::ListItem<$num>>::view_links(me) } + } + + // GUARANTEES: + // * `me` originates from the most recent call to `prepare_to_insert`, which just added + // `offset` to the pointer passed to `prepare_to_insert`. This method subtracts + // `offset` from `me` so it returns the pointer originally passed to + // `prepare_to_insert`. + unsafe fn post_remove(me: *mut $crate::list::ListLinks<$num>) -> *const Self { + let offset = <Self as $crate::list::HasListLinks<$num>>::OFFSET; + // SAFETY: `me` originates from the most recent call to `prepare_to_insert`, so it + // points at the field at offset `offset` in a value of type `Self`. Thus, + // subtracting `offset` from `me` is still in-bounds of the allocation. + unsafe { (me as *const u8).sub(offset) as *const Self } + } + } + )*}; + + ( + $(impl$({$($generics:tt)*})? ListItem<$num:tt> for $t:ty { + using ListLinksSelfPtr; + })* + ) => {$( + // SAFETY: See GUARANTEES comment on each method. + unsafe impl$(<$($generics)*>)? $crate::list::ListItem<$num> for $t { + // GUARANTEES: + // This implementation of `ListItem` will not give out exclusive access to the same + // `ListLinks` several times because calls to `prepare_to_insert` and `post_remove` + // must alternate and exclusive access is given up when `post_remove` is called. + // + // Other invocations of `impl_list_item!` also cannot give out exclusive access to the + // same `ListLinks` because you can only implement `ListItem` once for each value of + // `ID`, and the `ListLinks` fields only work with the specified `ID`. + unsafe fn prepare_to_insert(me: *const Self) -> *mut $crate::list::ListLinks<$num> { + // SAFETY: The caller promises that `me` points at a valid value of type `Self`. + let links_field = unsafe { <Self as $crate::list::ListItem<$num>>::view_links(me) }; + + let spoff = $crate::list::ListLinksSelfPtr::<Self, $num>::LIST_LINKS_SELF_PTR_OFFSET; + // Goes via the offset as the field is private. + // + // SAFETY: The constant is equal to `offset_of!(ListLinksSelfPtr, self_ptr)`, so + // the pointer stays in bounds of the allocation. + let self_ptr = unsafe { (links_field as *const u8).add(spoff) } + as *const $crate::types::Opaque<*const Self>; + let cell_inner = $crate::types::Opaque::raw_get(self_ptr); + + // SAFETY: This value is not accessed in any other places than `prepare_to_insert`, + // `post_remove`, or `view_value`. By the safety requirements of those methods, + // none of these three methods may be called in parallel with this call to + // `prepare_to_insert`, so this write will not race with any other access to the + // value. + unsafe { ::core::ptr::write(cell_inner, me) }; + + links_field + } + + // GUARANTEES: + // * This returns the same pointer as `prepare_to_insert` because `prepare_to_insert` + // returns the return value of `view_links`. + // * By the type invariants of `ListLinks`, the `ListLinks` has two null pointers when + // this value is not in a list. + unsafe fn view_links(me: *const Self) -> *mut $crate::list::ListLinks<$num> { + // SAFETY: The caller promises that `me` points at a valid value of type `Self`. + unsafe { <Self as HasListLinks<$num>>::raw_get_list_links(me.cast_mut()) } + } + + // This function is also used as the implementation of `post_remove`, so the caller + // may choose to satisfy the safety requirements of `post_remove` instead of the safety + // requirements for `view_value`. + // + // GUARANTEES: (always) + // * This returns the same pointer as the one passed to the most recent call to + // `prepare_to_insert` since that call wrote that pointer to this location. The value + // is only modified in `prepare_to_insert`, so it has not been modified since the + // most recent call. + // + // GUARANTEES: (only when using the `view_value` safety requirements) + // * The pointer remains valid until the next call to `post_remove` because the caller + // of the most recent call to `prepare_to_insert` promised to retain ownership of the + // `ListArc` containing `Self` until the next call to `post_remove`. The value cannot + // be destroyed while a `ListArc` reference exists. + unsafe fn view_value(links_field: *mut $crate::list::ListLinks<$num>) -> *const Self { + let spoff = $crate::list::ListLinksSelfPtr::<Self, $num>::LIST_LINKS_SELF_PTR_OFFSET; + // SAFETY: The constant is equal to `offset_of!(ListLinksSelfPtr, self_ptr)`, so + // the pointer stays in bounds of the allocation. + let self_ptr = unsafe { (links_field as *const u8).add(spoff) } + as *const ::core::cell::UnsafeCell<*const Self>; + let cell_inner = ::core::cell::UnsafeCell::raw_get(self_ptr); + // SAFETY: This is not a data race, because the only function that writes to this + // value is `prepare_to_insert`, but by the safety requirements the + // `prepare_to_insert` method may not be called in parallel with `view_value` or + // `post_remove`. + unsafe { ::core::ptr::read(cell_inner) } + } + + // GUARANTEES: + // The first guarantee of `view_value` is exactly what `post_remove` guarantees. + unsafe fn post_remove(me: *mut $crate::list::ListLinks<$num>) -> *const Self { + // SAFETY: This specific implementation of `view_value` allows the caller to + // promise the safety requirements of `post_remove` instead of the safety + // requirements for `view_value`. + unsafe { <Self as $crate::list::ListItem<$num>>::view_value(me) } + } + } + )*}; +} +pub use impl_list_item; diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index 910ce867480a..801907fba199 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -848,9 +848,7 @@ impl DeviceMask { /// } /// }; /// -/// #[cfg(MODULE)] -/// #[no_mangle] -/// static __mod_mdio__phydev_device_table: [::kernel::bindings::mdio_device_id; 2] = [ +/// const _DEVICE_TABLE: [::kernel::bindings::mdio_device_id; 2] = [ /// ::kernel::bindings::mdio_device_id { /// phy_id: 0x00000001, /// phy_id_mask: 0xffffffff, @@ -860,6 +858,9 @@ impl DeviceMask { /// phy_id_mask: 0, /// }, /// ]; +/// #[cfg(MODULE)] +/// #[no_mangle] +/// static __mod_mdio__phydev_device_table: [::kernel::bindings::mdio_device_id; 2] = _DEVICE_TABLE; /// ``` #[macro_export] macro_rules! module_phy_driver { @@ -871,9 +872,7 @@ macro_rules! module_phy_driver { (@device_table [$($dev:expr),+]) => { // SAFETY: C will not read off the end of this constant since the last element is zero. - #[cfg(MODULE)] - #[no_mangle] - static __mod_mdio__phydev_device_table: [$crate::bindings::mdio_device_id; + const _DEVICE_TABLE: [$crate::bindings::mdio_device_id; $crate::module_phy_driver!(@count_devices $($dev),+) + 1] = [ $($dev.mdio_device_id()),+, $crate::bindings::mdio_device_id { @@ -881,6 +880,11 @@ macro_rules! module_phy_driver { phy_id_mask: 0 } ]; + + #[cfg(MODULE)] + #[no_mangle] + static __mod_mdio__phydev_device_table: [$crate::bindings::mdio_device_id; + $crate::module_phy_driver!(@count_devices $($dev),+) + 1] = _DEVICE_TABLE; }; (drivers: [$($driver:ident),+ $(,)?], device_table: [$($dev:expr),+ $(,)?], $($f:tt)*) => { diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs index b37a0b3180fb..4571daec0961 100644 --- a/rust/kernel/prelude.rs +++ b/rust/kernel/prelude.rs @@ -37,6 +37,6 @@ pub use super::error::{code::*, Error, Result}; pub use super::{str::CStr, ThisModule}; -pub use super::init::{InPlaceInit, Init, PinInit}; +pub use super::init::{InPlaceInit, InPlaceWrite, Init, PinInit}; pub use super::current; diff --git a/rust/kernel/print.rs b/rust/kernel/print.rs index a78aa3514a0a..508b0221256c 100644 --- a/rust/kernel/print.rs +++ b/rust/kernel/print.rs @@ -4,7 +4,7 @@ //! //! C header: [`include/linux/printk.h`](srctree/include/linux/printk.h) //! -//! Reference: <https://www.kernel.org/doc/html/latest/core-api/printk-basics.html> +//! Reference: <https://docs.kernel.org/core-api/printk-basics.html> use core::{ ffi::{c_char, c_void}, @@ -197,7 +197,7 @@ macro_rules! print_macro ( /// Mimics the interface of [`std::print!`]. See [`core::fmt`] and /// `alloc::format!` for information about the formatting syntax. /// -/// [`pr_emerg`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_emerg +/// [`pr_emerg`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_emerg /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples @@ -221,7 +221,7 @@ macro_rules! pr_emerg ( /// Mimics the interface of [`std::print!`]. See [`core::fmt`] and /// `alloc::format!` for information about the formatting syntax. /// -/// [`pr_alert`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_alert +/// [`pr_alert`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_alert /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples @@ -245,7 +245,7 @@ macro_rules! pr_alert ( /// Mimics the interface of [`std::print!`]. See [`core::fmt`] and /// `alloc::format!` for information about the formatting syntax. /// -/// [`pr_crit`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_crit +/// [`pr_crit`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_crit /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples @@ -269,7 +269,7 @@ macro_rules! pr_crit ( /// Mimics the interface of [`std::print!`]. See [`core::fmt`] and /// `alloc::format!` for information about the formatting syntax. /// -/// [`pr_err`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_err +/// [`pr_err`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_err /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples @@ -293,7 +293,7 @@ macro_rules! pr_err ( /// Mimics the interface of [`std::print!`]. See [`core::fmt`] and /// `alloc::format!` for information about the formatting syntax. /// -/// [`pr_warn`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_warn +/// [`pr_warn`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_warn /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples @@ -317,7 +317,7 @@ macro_rules! pr_warn ( /// Mimics the interface of [`std::print!`]. See [`core::fmt`] and /// `alloc::format!` for information about the formatting syntax. /// -/// [`pr_notice`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_notice +/// [`pr_notice`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_notice /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples @@ -341,7 +341,7 @@ macro_rules! pr_notice ( /// Mimics the interface of [`std::print!`]. See [`core::fmt`] and /// `alloc::format!` for information about the formatting syntax. /// -/// [`pr_info`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_info +/// [`pr_info`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_info /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples @@ -367,7 +367,7 @@ macro_rules! pr_info ( /// Mimics the interface of [`std::print!`]. See [`core::fmt`] and /// `alloc::format!` for information about the formatting syntax. /// -/// [`pr_debug`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_debug +/// [`pr_debug`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_debug /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples @@ -395,7 +395,7 @@ macro_rules! pr_debug ( /// `alloc::format!` for information about the formatting syntax. /// /// [`pr_info!`]: crate::pr_info! -/// [`pr_cont`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html#c.pr_cont +/// [`pr_cont`]: https://docs.kernel.org/core-api/printk-basics.html#c.pr_cont /// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html /// /// # Examples diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs new file mode 100644 index 000000000000..25eb36fd1cdc --- /dev/null +++ b/rust/kernel/rbtree.rs @@ -0,0 +1,1278 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Red-black trees. +//! +//! C header: [`include/linux/rbtree.h`](srctree/include/linux/rbtree.h) +//! +//! Reference: <https://docs.kernel.org/core-api/rbtree.html> + +use crate::{alloc::Flags, bindings, container_of, error::Result, prelude::*}; +use alloc::boxed::Box; +use core::{ + cmp::{Ord, Ordering}, + marker::PhantomData, + mem::MaybeUninit, + ptr::{addr_of_mut, from_mut, NonNull}, +}; + +/// A red-black tree with owned nodes. +/// +/// It is backed by the kernel C red-black trees. +/// +/// # Examples +/// +/// In the example below we do several operations on a tree. We note that insertions may fail if +/// the system is out of memory. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::{RBTree, RBTreeNode, RBTreeNodeReservation}}; +/// +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?; +/// +/// // Check the nodes we just inserted. +/// { +/// assert_eq!(tree.get(&10).unwrap(), &100); +/// assert_eq!(tree.get(&20).unwrap(), &200); +/// assert_eq!(tree.get(&30).unwrap(), &300); +/// } +/// +/// // Iterate over the nodes we just inserted. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Print all elements. +/// for (key, value) in &tree { +/// pr_info!("{} = {}\n", key, value); +/// } +/// +/// // Replace one of the elements. +/// tree.try_create_and_insert(10, 1000, flags::GFP_KERNEL)?; +/// +/// // Check that the tree reflects the replacement. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &1000)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Change the value of one of the elements. +/// *tree.get_mut(&30).unwrap() = 3000; +/// +/// // Check that the tree reflects the update. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &1000)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &3000)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Remove an element. +/// tree.remove(&10); +/// +/// // Check that the tree reflects the removal. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &3000)); +/// assert!(iter.next().is_none()); +/// } +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +/// In the example below, we first allocate a node, acquire a spinlock, then insert the node into +/// the tree. This is useful when the insertion context does not allow sleeping, for example, when +/// holding a spinlock. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::{RBTree, RBTreeNode}, sync::SpinLock}; +/// +/// fn insert_test(tree: &SpinLock<RBTree<u32, u32>>) -> Result { +/// // Pre-allocate node. This may fail (as it allocates memory). +/// let node = RBTreeNode::new(10, 100, flags::GFP_KERNEL)?; +/// +/// // Insert node while holding the lock. It is guaranteed to succeed with no allocation +/// // attempts. +/// let mut guard = tree.lock(); +/// guard.insert(node); +/// Ok(()) +/// } +/// ``` +/// +/// In the example below, we reuse an existing node allocation from an element we removed. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::{RBTree, RBTreeNodeReservation}}; +/// +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?; +/// +/// // Check the nodes we just inserted. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert_eq!(iter.next().unwrap(), (&30, &300)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Remove a node, getting back ownership of it. +/// let existing = tree.remove(&30).unwrap(); +/// +/// // Check that the tree reflects the removal. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert!(iter.next().is_none()); +/// } +/// +/// // Create a preallocated reservation that we can re-use later. +/// let reservation = RBTreeNodeReservation::new(flags::GFP_KERNEL)?; +/// +/// // Insert a new node into the tree, reusing the previous allocation. This is guaranteed to +/// // succeed (no memory allocations). +/// tree.insert(reservation.into_node(15, 150)); +/// +/// // Check that the tree reflect the new insertion. +/// { +/// let mut iter = tree.iter(); +/// assert_eq!(iter.next().unwrap(), (&10, &100)); +/// assert_eq!(iter.next().unwrap(), (&15, &150)); +/// assert_eq!(iter.next().unwrap(), (&20, &200)); +/// assert!(iter.next().is_none()); +/// } +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +/// # Invariants +/// +/// Non-null parent/children pointers stored in instances of the `rb_node` C struct are always +/// valid, and pointing to a field of our internal representation of a node. +pub struct RBTree<K, V> { + root: bindings::rb_root, + _p: PhantomData<Node<K, V>>, +} + +// SAFETY: An [`RBTree`] allows the same kinds of access to its values that a struct allows to its +// fields, so we use the same Send condition as would be used for a struct with K and V fields. +unsafe impl<K: Send, V: Send> Send for RBTree<K, V> {} + +// SAFETY: An [`RBTree`] allows the same kinds of access to its values that a struct allows to its +// fields, so we use the same Sync condition as would be used for a struct with K and V fields. +unsafe impl<K: Sync, V: Sync> Sync for RBTree<K, V> {} + +impl<K, V> RBTree<K, V> { + /// Creates a new and empty tree. + pub fn new() -> Self { + Self { + // INVARIANT: There are no nodes in the tree, so the invariant holds vacuously. + root: bindings::rb_root::default(), + _p: PhantomData, + } + } + + /// Returns an iterator over the tree nodes, sorted by key. + pub fn iter(&self) -> Iter<'_, K, V> { + Iter { + _tree: PhantomData, + // INVARIANT: + // - `self.root` is a valid pointer to a tree root. + // - `bindings::rb_first` produces a valid pointer to a node given `root` is valid. + iter_raw: IterRaw { + // SAFETY: by the invariants, all pointers are valid. + next: unsafe { bindings::rb_first(&self.root) }, + _phantom: PhantomData, + }, + } + } + + /// Returns a mutable iterator over the tree nodes, sorted by key. + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> { + IterMut { + _tree: PhantomData, + // INVARIANT: + // - `self.root` is a valid pointer to a tree root. + // - `bindings::rb_first` produces a valid pointer to a node given `root` is valid. + iter_raw: IterRaw { + // SAFETY: by the invariants, all pointers are valid. + next: unsafe { bindings::rb_first(from_mut(&mut self.root)) }, + _phantom: PhantomData, + }, + } + } + + /// Returns an iterator over the keys of the nodes in the tree, in sorted order. + pub fn keys(&self) -> impl Iterator<Item = &'_ K> { + self.iter().map(|(k, _)| k) + } + + /// Returns an iterator over the values of the nodes in the tree, sorted by key. + pub fn values(&self) -> impl Iterator<Item = &'_ V> { + self.iter().map(|(_, v)| v) + } + + /// Returns a mutable iterator over the values of the nodes in the tree, sorted by key. + pub fn values_mut(&mut self) -> impl Iterator<Item = &'_ mut V> { + self.iter_mut().map(|(_, v)| v) + } + + /// Returns a cursor over the tree nodes, starting with the smallest key. + pub fn cursor_front(&mut self) -> Option<Cursor<'_, K, V>> { + let root = addr_of_mut!(self.root); + // SAFETY: `self.root` is always a valid root node + let current = unsafe { bindings::rb_first(root) }; + NonNull::new(current).map(|current| { + // INVARIANT: + // - `current` is a valid node in the [`RBTree`] pointed to by `self`. + Cursor { + current, + tree: self, + } + }) + } + + /// Returns a cursor over the tree nodes, starting with the largest key. + pub fn cursor_back(&mut self) -> Option<Cursor<'_, K, V>> { + let root = addr_of_mut!(self.root); + // SAFETY: `self.root` is always a valid root node + let current = unsafe { bindings::rb_last(root) }; + NonNull::new(current).map(|current| { + // INVARIANT: + // - `current` is a valid node in the [`RBTree`] pointed to by `self`. + Cursor { + current, + tree: self, + } + }) + } +} + +impl<K, V> RBTree<K, V> +where + K: Ord, +{ + /// Tries to insert a new value into the tree. + /// + /// It overwrites a node if one already exists with the same key and returns it (containing the + /// key/value pair). Returns [`None`] if a node with the same key didn't already exist. + /// + /// Returns an error if it cannot allocate memory for the new node. + pub fn try_create_and_insert( + &mut self, + key: K, + value: V, + flags: Flags, + ) -> Result<Option<RBTreeNode<K, V>>> { + Ok(self.insert(RBTreeNode::new(key, value, flags)?)) + } + + /// Inserts a new node into the tree. + /// + /// It overwrites a node if one already exists with the same key and returns it (containing the + /// key/value pair). Returns [`None`] if a node with the same key didn't already exist. + /// + /// This function always succeeds. + pub fn insert(&mut self, node: RBTreeNode<K, V>) -> Option<RBTreeNode<K, V>> { + match self.raw_entry(&node.node.key) { + RawEntry::Occupied(entry) => Some(entry.replace(node)), + RawEntry::Vacant(entry) => { + entry.insert(node); + None + } + } + } + + fn raw_entry(&mut self, key: &K) -> RawEntry<'_, K, V> { + let raw_self: *mut RBTree<K, V> = self; + // The returned `RawEntry` is used to call either `rb_link_node` or `rb_replace_node`. + // The parameters of `bindings::rb_link_node` are as follows: + // - `node`: A pointer to an uninitialized node being inserted. + // - `parent`: A pointer to an existing node in the tree. One of its child pointers must be + // null, and `node` will become a child of `parent` by replacing that child pointer + // with a pointer to `node`. + // - `rb_link`: A pointer to either the left-child or right-child field of `parent`. This + // specifies which child of `parent` should hold `node` after this call. The + // value of `*rb_link` must be null before the call to `rb_link_node`. If the + // red/black tree is empty, then it’s also possible for `parent` to be null. In + // this case, `rb_link` is a pointer to the `root` field of the red/black tree. + // + // We will traverse the tree looking for a node that has a null pointer as its child, + // representing an empty subtree where we can insert our new node. We need to make sure + // that we preserve the ordering of the nodes in the tree. In each iteration of the loop + // we store `parent` and `child_field_of_parent`, and the new `node` will go somewhere + // in the subtree of `parent` that `child_field_of_parent` points at. Once + // we find an empty subtree, we can insert the new node using `rb_link_node`. + let mut parent = core::ptr::null_mut(); + let mut child_field_of_parent: &mut *mut bindings::rb_node = + // SAFETY: `raw_self` is a valid pointer to the `RBTree` (created from `self` above). + unsafe { &mut (*raw_self).root.rb_node }; + while !(*child_field_of_parent).is_null() { + let curr = *child_field_of_parent; + // SAFETY: All links fields we create are in a `Node<K, V>`. + let node = unsafe { container_of!(curr, Node<K, V>, links) }; + + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + match key.cmp(unsafe { &(*node).key }) { + // SAFETY: `curr` is a non-null node so it is valid by the type invariants. + Ordering::Less => child_field_of_parent = unsafe { &mut (*curr).rb_left }, + // SAFETY: `curr` is a non-null node so it is valid by the type invariants. + Ordering::Greater => child_field_of_parent = unsafe { &mut (*curr).rb_right }, + Ordering::Equal => { + return RawEntry::Occupied(OccupiedEntry { + rbtree: self, + node_links: curr, + }) + } + } + parent = curr; + } + + RawEntry::Vacant(RawVacantEntry { + rbtree: raw_self, + parent, + child_field_of_parent, + _phantom: PhantomData, + }) + } + + /// Gets the given key's corresponding entry in the map for in-place manipulation. + pub fn entry(&mut self, key: K) -> Entry<'_, K, V> { + match self.raw_entry(&key) { + RawEntry::Occupied(entry) => Entry::Occupied(entry), + RawEntry::Vacant(entry) => Entry::Vacant(VacantEntry { raw: entry, key }), + } + } + + /// Used for accessing the given node, if it exists. + pub fn find_mut(&mut self, key: &K) -> Option<OccupiedEntry<'_, K, V>> { + match self.raw_entry(key) { + RawEntry::Occupied(entry) => Some(entry), + RawEntry::Vacant(_entry) => None, + } + } + + /// Returns a reference to the value corresponding to the key. + pub fn get(&self, key: &K) -> Option<&V> { + let mut node = self.root.rb_node; + while !node.is_null() { + // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self` + // point to the links field of `Node<K, V>` objects. + let this = unsafe { container_of!(node, Node<K, V>, links) }; + // SAFETY: `this` is a non-null node so it is valid by the type invariants. + node = match key.cmp(unsafe { &(*this).key }) { + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + Ordering::Less => unsafe { (*node).rb_left }, + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + Ordering::Greater => unsafe { (*node).rb_right }, + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + Ordering::Equal => return Some(unsafe { &(*this).value }), + } + } + None + } + + /// Returns a mutable reference to the value corresponding to the key. + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { + self.find_mut(key).map(|node| node.into_mut()) + } + + /// Removes the node with the given key from the tree. + /// + /// It returns the node that was removed if one exists, or [`None`] otherwise. + pub fn remove_node(&mut self, key: &K) -> Option<RBTreeNode<K, V>> { + self.find_mut(key).map(OccupiedEntry::remove_node) + } + + /// Removes the node with the given key from the tree. + /// + /// It returns the value that was removed if one exists, or [`None`] otherwise. + pub fn remove(&mut self, key: &K) -> Option<V> { + self.find_mut(key).map(OccupiedEntry::remove) + } + + /// Returns a cursor over the tree nodes based on the given key. + /// + /// If the given key exists, the cursor starts there. + /// Otherwise it starts with the first larger key in sort order. + /// If there is no larger key, it returns [`None`]. + pub fn cursor_lower_bound(&mut self, key: &K) -> Option<Cursor<'_, K, V>> + where + K: Ord, + { + let mut node = self.root.rb_node; + let mut best_match: Option<NonNull<Node<K, V>>> = None; + while !node.is_null() { + // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self` + // point to the links field of `Node<K, V>` objects. + let this = unsafe { container_of!(node, Node<K, V>, links) }.cast_mut(); + // SAFETY: `this` is a non-null node so it is valid by the type invariants. + let this_key = unsafe { &(*this).key }; + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + let left_child = unsafe { (*node).rb_left }; + // SAFETY: `node` is a non-null node so it is valid by the type invariants. + let right_child = unsafe { (*node).rb_right }; + match key.cmp(this_key) { + Ordering::Equal => { + best_match = NonNull::new(this); + break; + } + Ordering::Greater => { + node = right_child; + } + Ordering::Less => { + let is_better_match = match best_match { + None => true, + Some(best) => { + // SAFETY: `best` is a non-null node so it is valid by the type invariants. + let best_key = unsafe { &(*best.as_ptr()).key }; + best_key > this_key + } + }; + if is_better_match { + best_match = NonNull::new(this); + } + node = left_child; + } + }; + } + + let best = best_match?; + + // SAFETY: `best` is a non-null node so it is valid by the type invariants. + let links = unsafe { addr_of_mut!((*best.as_ptr()).links) }; + + NonNull::new(links).map(|current| { + // INVARIANT: + // - `current` is a valid node in the [`RBTree`] pointed to by `self`. + Cursor { + current, + tree: self, + } + }) + } +} + +impl<K, V> Default for RBTree<K, V> { + fn default() -> Self { + Self::new() + } +} + +impl<K, V> Drop for RBTree<K, V> { + fn drop(&mut self) { + // SAFETY: `root` is valid as it's embedded in `self` and we have a valid `self`. + let mut next = unsafe { bindings::rb_first_postorder(&self.root) }; + + // INVARIANT: The loop invariant is that all tree nodes from `next` in postorder are valid. + while !next.is_null() { + // SAFETY: All links fields we create are in a `Node<K, V>`. + let this = unsafe { container_of!(next, Node<K, V>, links) }; + + // Find out what the next node is before disposing of the current one. + // SAFETY: `next` and all nodes in postorder are still valid. + next = unsafe { bindings::rb_next_postorder(next) }; + + // INVARIANT: This is the destructor, so we break the type invariant during clean-up, + // but it is not observable. The loop invariant is still maintained. + + // SAFETY: `this` is valid per the loop invariant. + unsafe { drop(Box::from_raw(this.cast_mut())) }; + } + } +} + +/// A bidirectional cursor over the tree nodes, sorted by key. +/// +/// # Examples +/// +/// In the following example, we obtain a cursor to the first element in the tree. +/// The cursor allows us to iterate bidirectionally over key/value pairs in the tree. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::RBTree}; +/// +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?; +/// +/// // Get a cursor to the first element. +/// let mut cursor = tree.cursor_front().unwrap(); +/// let mut current = cursor.current(); +/// assert_eq!(current, (&10, &100)); +/// +/// // Move the cursor, updating it to the 2nd element. +/// cursor = cursor.move_next().unwrap(); +/// current = cursor.current(); +/// assert_eq!(current, (&20, &200)); +/// +/// // Peek at the next element without impacting the cursor. +/// let next = cursor.peek_next().unwrap(); +/// assert_eq!(next, (&30, &300)); +/// current = cursor.current(); +/// assert_eq!(current, (&20, &200)); +/// +/// // Moving past the last element causes the cursor to return [`None`]. +/// cursor = cursor.move_next().unwrap(); +/// current = cursor.current(); +/// assert_eq!(current, (&30, &300)); +/// let cursor = cursor.move_next(); +/// assert!(cursor.is_none()); +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +/// A cursor can also be obtained at the last element in the tree. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::RBTree}; +/// +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?; +/// +/// let mut cursor = tree.cursor_back().unwrap(); +/// let current = cursor.current(); +/// assert_eq!(current, (&30, &300)); +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +/// Obtaining a cursor returns [`None`] if the tree is empty. +/// +/// ``` +/// use kernel::rbtree::RBTree; +/// +/// let mut tree: RBTree<u16, u16> = RBTree::new(); +/// assert!(tree.cursor_front().is_none()); +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +/// [`RBTree::cursor_lower_bound`] can be used to start at an arbitrary node in the tree. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::RBTree}; +/// +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert five elements. +/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(40, 400, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(50, 500, flags::GFP_KERNEL)?; +/// +/// // If the provided key exists, a cursor to that key is returned. +/// let cursor = tree.cursor_lower_bound(&20).unwrap(); +/// let current = cursor.current(); +/// assert_eq!(current, (&20, &200)); +/// +/// // If the provided key doesn't exist, a cursor to the first larger element in sort order is returned. +/// let cursor = tree.cursor_lower_bound(&25).unwrap(); +/// let current = cursor.current(); +/// assert_eq!(current, (&30, &300)); +/// +/// // If there is no larger key, [`None`] is returned. +/// let cursor = tree.cursor_lower_bound(&55); +/// assert!(cursor.is_none()); +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +/// The cursor allows mutation of values in the tree. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::RBTree}; +/// +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?; +/// +/// // Retrieve a cursor. +/// let mut cursor = tree.cursor_front().unwrap(); +/// +/// // Get a mutable reference to the current value. +/// let (k, v) = cursor.current_mut(); +/// *v = 1000; +/// +/// // The updated value is reflected in the tree. +/// let updated = tree.get(&10).unwrap(); +/// assert_eq!(updated, &1000); +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +/// It also allows node removal. The following examples demonstrate the behavior of removing the current node. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::RBTree}; +/// +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?; +/// +/// // Remove the first element. +/// let mut cursor = tree.cursor_front().unwrap(); +/// let mut current = cursor.current(); +/// assert_eq!(current, (&10, &100)); +/// cursor = cursor.remove_current().0.unwrap(); +/// +/// // If a node exists after the current element, it is returned. +/// current = cursor.current(); +/// assert_eq!(current, (&20, &200)); +/// +/// // Get a cursor to the last element, and remove it. +/// cursor = tree.cursor_back().unwrap(); +/// current = cursor.current(); +/// assert_eq!(current, (&30, &300)); +/// +/// // Since there is no next node, the previous node is returned. +/// cursor = cursor.remove_current().0.unwrap(); +/// current = cursor.current(); +/// assert_eq!(current, (&20, &200)); +/// +/// // Removing the last element in the tree returns [`None`]. +/// assert!(cursor.remove_current().0.is_none()); +/// +/// # Ok::<(), Error>(()) +/// ``` +/// +/// Nodes adjacent to the current node can also be removed. +/// +/// ``` +/// use kernel::{alloc::flags, rbtree::RBTree}; +/// +/// // Create a new tree. +/// let mut tree = RBTree::new(); +/// +/// // Insert three elements. +/// tree.try_create_and_insert(10, 100, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(20, 200, flags::GFP_KERNEL)?; +/// tree.try_create_and_insert(30, 300, flags::GFP_KERNEL)?; +/// +/// // Get a cursor to the first element. +/// let mut cursor = tree.cursor_front().unwrap(); +/// let mut current = cursor.current(); +/// assert_eq!(current, (&10, &100)); +/// +/// // Calling `remove_prev` from the first element returns [`None`]. +/// assert!(cursor.remove_prev().is_none()); +/// +/// // Get a cursor to the last element. +/// cursor = tree.cursor_back().unwrap(); +/// current = cursor.current(); +/// assert_eq!(current, (&30, &300)); +/// +/// // Calling `remove_prev` removes and returns the middle element. +/// assert_eq!(cursor.remove_prev().unwrap().to_key_value(), (20, 200)); +/// +/// // Calling `remove_next` from the last element returns [`None`]. +/// assert!(cursor.remove_next().is_none()); +/// +/// // Move to the first element +/// cursor = cursor.move_prev().unwrap(); +/// current = cursor.current(); +/// assert_eq!(current, (&10, &100)); +/// +/// // Calling `remove_next` removes and returns the last element. +/// assert_eq!(cursor.remove_next().unwrap().to_key_value(), (30, 300)); +/// +/// # Ok::<(), Error>(()) +/// +/// ``` +/// +/// # Invariants +/// - `current` points to a node that is in the same [`RBTree`] as `tree`. +pub struct Cursor<'a, K, V> { + tree: &'a mut RBTree<K, V>, + current: NonNull<bindings::rb_node>, +} + +// SAFETY: The [`Cursor`] has exclusive access to both `K` and `V`, so it is sufficient to require them to be `Send`. +// The cursor only gives out immutable references to the keys, but since it has excusive access to those same +// keys, `Send` is sufficient. `Sync` would be okay, but it is more restrictive to the user. +unsafe impl<'a, K: Send, V: Send> Send for Cursor<'a, K, V> {} + +// SAFETY: The [`Cursor`] gives out immutable references to K and mutable references to V, +// so it has the same thread safety requirements as mutable references. +unsafe impl<'a, K: Sync, V: Sync> Sync for Cursor<'a, K, V> {} + +impl<'a, K, V> Cursor<'a, K, V> { + /// The current node + pub fn current(&self) -> (&K, &V) { + // SAFETY: + // - `self.current` is a valid node by the type invariants. + // - We have an immutable reference by the function signature. + unsafe { Self::to_key_value(self.current) } + } + + /// The current node, with a mutable value + pub fn current_mut(&mut self) -> (&K, &mut V) { + // SAFETY: + // - `self.current` is a valid node by the type invariants. + // - We have an mutable reference by the function signature. + unsafe { Self::to_key_value_mut(self.current) } + } + + /// Remove the current node from the tree. + /// + /// Returns a tuple where the first element is a cursor to the next node, if it exists, + /// else the previous node, else [`None`] (if the tree becomes empty). The second element + /// is the removed node. + pub fn remove_current(self) -> (Option<Self>, RBTreeNode<K, V>) { + let prev = self.get_neighbor_raw(Direction::Prev); + let next = self.get_neighbor_raw(Direction::Next); + // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self` + // point to the links field of `Node<K, V>` objects. + let this = unsafe { container_of!(self.current.as_ptr(), Node<K, V>, links) }.cast_mut(); + // SAFETY: `this` is valid by the type invariants as described above. + let node = unsafe { Box::from_raw(this) }; + let node = RBTreeNode { node }; + // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so + // the tree cannot change. By the tree invariant, all nodes are valid. + unsafe { bindings::rb_erase(&mut (*this).links, addr_of_mut!(self.tree.root)) }; + + let current = match (prev, next) { + (_, Some(next)) => next, + (Some(prev), None) => prev, + (None, None) => { + return (None, node); + } + }; + + ( + // INVARIANT: + // - `current` is a valid node in the [`RBTree`] pointed to by `self.tree`. + Some(Self { + current, + tree: self.tree, + }), + node, + ) + } + + /// Remove the previous node, returning it if it exists. + pub fn remove_prev(&mut self) -> Option<RBTreeNode<K, V>> { + self.remove_neighbor(Direction::Prev) + } + + /// Remove the next node, returning it if it exists. + pub fn remove_next(&mut self) -> Option<RBTreeNode<K, V>> { + self.remove_neighbor(Direction::Next) + } + + fn remove_neighbor(&mut self, direction: Direction) -> Option<RBTreeNode<K, V>> { + if let Some(neighbor) = self.get_neighbor_raw(direction) { + let neighbor = neighbor.as_ptr(); + // SAFETY: The reference to the tree used to create the cursor outlives the cursor, so + // the tree cannot change. By the tree invariant, all nodes are valid. + unsafe { bindings::rb_erase(neighbor, addr_of_mut!(self.tree.root)) }; + // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self` + // point to the links field of `Node<K, V>` objects. + let this = unsafe { container_of!(neighbor, Node<K, V>, links) }.cast_mut(); + // SAFETY: `this` is valid by the type invariants as described above. + let node = unsafe { Box::from_raw(this) }; + return Some(RBTreeNode { node }); + } + None + } + + /// Move the cursor to the previous node, returning [`None`] if it doesn't exist. + pub fn move_prev(self) -> Option<Self> { + self.mv(Direction::Prev) + } + + /// Move the cursor to the next node, returning [`None`] if it doesn't exist. + pub fn move_next(self) -> Option<Self> { + self.mv(Direction::Next) + } + + fn mv(self, direction: Direction) -> Option<Self> { + // INVARIANT: + // - `neighbor` is a valid node in the [`RBTree`] pointed to by `self.tree`. + self.get_neighbor_raw(direction).map(|neighbor| Self { + tree: self.tree, + current: neighbor, + }) + } + + /// Access the previous node without moving the cursor. + pub fn peek_prev(&self) -> Option<(&K, &V)> { + self.peek(Direction::Prev) + } + + /// Access the previous node without moving the cursor. + pub fn peek_next(&self) -> Option<(&K, &V)> { + self.peek(Direction::Next) + } + + fn peek(&self, direction: Direction) -> Option<(&K, &V)> { + self.get_neighbor_raw(direction).map(|neighbor| { + // SAFETY: + // - `neighbor` is a valid tree node. + // - By the function signature, we have an immutable reference to `self`. + unsafe { Self::to_key_value(neighbor) } + }) + } + + /// Access the previous node mutably without moving the cursor. + pub fn peek_prev_mut(&mut self) -> Option<(&K, &mut V)> { + self.peek_mut(Direction::Prev) + } + + /// Access the next node mutably without moving the cursor. + pub fn peek_next_mut(&mut self) -> Option<(&K, &mut V)> { + self.peek_mut(Direction::Next) + } + + fn peek_mut(&mut self, direction: Direction) -> Option<(&K, &mut V)> { + self.get_neighbor_raw(direction).map(|neighbor| { + // SAFETY: + // - `neighbor` is a valid tree node. + // - By the function signature, we have a mutable reference to `self`. + unsafe { Self::to_key_value_mut(neighbor) } + }) + } + + fn get_neighbor_raw(&self, direction: Direction) -> Option<NonNull<bindings::rb_node>> { + // SAFETY: `self.current` is valid by the type invariants. + let neighbor = unsafe { + match direction { + Direction::Prev => bindings::rb_prev(self.current.as_ptr()), + Direction::Next => bindings::rb_next(self.current.as_ptr()), + } + }; + + NonNull::new(neighbor) + } + + /// SAFETY: + /// - `node` must be a valid pointer to a node in an [`RBTree`]. + /// - The caller has immutable access to `node` for the duration of 'b. + unsafe fn to_key_value<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, &'b V) { + // SAFETY: the caller guarantees that `node` is a valid pointer in an `RBTree`. + let (k, v) = unsafe { Self::to_key_value_raw(node) }; + // SAFETY: the caller guarantees immutable access to `node`. + (k, unsafe { &*v }) + } + + /// SAFETY: + /// - `node` must be a valid pointer to a node in an [`RBTree`]. + /// - The caller has mutable access to `node` for the duration of 'b. + unsafe fn to_key_value_mut<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, &'b mut V) { + // SAFETY: the caller guarantees that `node` is a valid pointer in an `RBTree`. + let (k, v) = unsafe { Self::to_key_value_raw(node) }; + // SAFETY: the caller guarantees mutable access to `node`. + (k, unsafe { &mut *v }) + } + + /// SAFETY: + /// - `node` must be a valid pointer to a node in an [`RBTree`]. + /// - The caller has immutable access to the key for the duration of 'b. + unsafe fn to_key_value_raw<'b>(node: NonNull<bindings::rb_node>) -> (&'b K, *mut V) { + // SAFETY: By the type invariant of `Self`, all non-null `rb_node` pointers stored in `self` + // point to the links field of `Node<K, V>` objects. + let this = unsafe { container_of!(node.as_ptr(), Node<K, V>, links) }.cast_mut(); + // SAFETY: The passed `node` is the current node or a non-null neighbor, + // thus `this` is valid by the type invariants. + let k = unsafe { &(*this).key }; + // SAFETY: The passed `node` is the current node or a non-null neighbor, + // thus `this` is valid by the type invariants. + let v = unsafe { addr_of_mut!((*this).value) }; + (k, v) + } +} + +/// Direction for [`Cursor`] operations. +enum Direction { + /// the node immediately before, in sort order + Prev, + /// the node immediately after, in sort order + Next, +} + +impl<'a, K, V> IntoIterator for &'a RBTree<K, V> { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// An iterator over the nodes of a [`RBTree`]. +/// +/// Instances are created by calling [`RBTree::iter`]. +pub struct Iter<'a, K, V> { + _tree: PhantomData<&'a RBTree<K, V>>, + iter_raw: IterRaw<K, V>, +} + +// SAFETY: The [`Iter`] gives out immutable references to K and V, so it has the same +// thread safety requirements as immutable references. +unsafe impl<'a, K: Sync, V: Sync> Send for Iter<'a, K, V> {} + +// SAFETY: The [`Iter`] gives out immutable references to K and V, so it has the same +// thread safety requirements as immutable references. +unsafe impl<'a, K: Sync, V: Sync> Sync for Iter<'a, K, V> {} + +impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option<Self::Item> { + // SAFETY: Due to `self._tree`, `k` and `v` are valid for the lifetime of `'a`. + self.iter_raw.next().map(|(k, v)| unsafe { (&*k, &*v) }) + } +} + +impl<'a, K, V> IntoIterator for &'a mut RBTree<K, V> { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } +} + +/// A mutable iterator over the nodes of a [`RBTree`]. +/// +/// Instances are created by calling [`RBTree::iter_mut`]. +pub struct IterMut<'a, K, V> { + _tree: PhantomData<&'a mut RBTree<K, V>>, + iter_raw: IterRaw<K, V>, +} + +// SAFETY: The [`IterMut`] has exclusive access to both `K` and `V`, so it is sufficient to require them to be `Send`. +// The iterator only gives out immutable references to the keys, but since the iterator has excusive access to those same +// keys, `Send` is sufficient. `Sync` would be okay, but it is more restrictive to the user. +unsafe impl<'a, K: Send, V: Send> Send for IterMut<'a, K, V> {} + +// SAFETY: The [`IterMut`] gives out immutable references to K and mutable references to V, so it has the same +// thread safety requirements as mutable references. +unsafe impl<'a, K: Sync, V: Sync> Sync for IterMut<'a, K, V> {} + +impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option<Self::Item> { + self.iter_raw.next().map(|(k, v)| + // SAFETY: Due to `&mut self`, we have exclusive access to `k` and `v`, for the lifetime of `'a`. + unsafe { (&*k, &mut *v) }) + } +} + +/// A raw iterator over the nodes of a [`RBTree`]. +/// +/// # Invariants +/// - `self.next` is a valid pointer. +/// - `self.next` points to a node stored inside of a valid `RBTree`. +struct IterRaw<K, V> { + next: *mut bindings::rb_node, + _phantom: PhantomData<fn() -> (K, V)>, +} + +impl<K, V> Iterator for IterRaw<K, V> { + type Item = (*mut K, *mut V); + + fn next(&mut self) -> Option<Self::Item> { + if self.next.is_null() { + return None; + } + + // SAFETY: By the type invariant of `IterRaw`, `self.next` is a valid node in an `RBTree`, + // and by the type invariant of `RBTree`, all nodes point to the links field of `Node<K, V>` objects. + let cur = unsafe { container_of!(self.next, Node<K, V>, links) }.cast_mut(); + + // SAFETY: `self.next` is a valid tree node by the type invariants. + self.next = unsafe { bindings::rb_next(self.next) }; + + // SAFETY: By the same reasoning above, it is safe to dereference the node. + Some(unsafe { (addr_of_mut!((*cur).key), addr_of_mut!((*cur).value)) }) + } +} + +/// A memory reservation for a red-black tree node. +/// +/// +/// It contains the memory needed to hold a node that can be inserted into a red-black tree. One +/// can be obtained by directly allocating it ([`RBTreeNodeReservation::new`]). +pub struct RBTreeNodeReservation<K, V> { + node: Box<MaybeUninit<Node<K, V>>>, +} + +impl<K, V> RBTreeNodeReservation<K, V> { + /// Allocates memory for a node to be eventually initialised and inserted into the tree via a + /// call to [`RBTree::insert`]. + pub fn new(flags: Flags) -> Result<RBTreeNodeReservation<K, V>> { + Ok(RBTreeNodeReservation { + node: <Box<_> as BoxExt<_>>::new_uninit(flags)?, + }) + } +} + +// SAFETY: This doesn't actually contain K or V, and is just a memory allocation. Those can always +// be moved across threads. +unsafe impl<K, V> Send for RBTreeNodeReservation<K, V> {} + +// SAFETY: This doesn't actually contain K or V, and is just a memory allocation. +unsafe impl<K, V> Sync for RBTreeNodeReservation<K, V> {} + +impl<K, V> RBTreeNodeReservation<K, V> { + /// Initialises a node reservation. + /// + /// It then becomes an [`RBTreeNode`] that can be inserted into a tree. + pub fn into_node(mut self, key: K, value: V) -> RBTreeNode<K, V> { + self.node.write(Node { + key, + value, + links: bindings::rb_node::default(), + }); + // SAFETY: We just wrote to it. + let node = unsafe { self.node.assume_init() }; + RBTreeNode { node } + } +} + +/// A red-black tree node. +/// +/// The node is fully initialised (with key and value) and can be inserted into a tree without any +/// extra allocations or failure paths. +pub struct RBTreeNode<K, V> { + node: Box<Node<K, V>>, +} + +impl<K, V> RBTreeNode<K, V> { + /// Allocates and initialises a node that can be inserted into the tree via + /// [`RBTree::insert`]. + pub fn new(key: K, value: V, flags: Flags) -> Result<RBTreeNode<K, V>> { + Ok(RBTreeNodeReservation::new(flags)?.into_node(key, value)) + } + + /// Get the key and value from inside the node. + pub fn to_key_value(self) -> (K, V) { + (self.node.key, self.node.value) + } +} + +// SAFETY: If K and V can be sent across threads, then it's also okay to send [`RBTreeNode`] across +// threads. +unsafe impl<K: Send, V: Send> Send for RBTreeNode<K, V> {} + +// SAFETY: If K and V can be accessed without synchronization, then it's also okay to access +// [`RBTreeNode`] without synchronization. +unsafe impl<K: Sync, V: Sync> Sync for RBTreeNode<K, V> {} + +impl<K, V> RBTreeNode<K, V> { + /// Drop the key and value, but keep the allocation. + /// + /// It then becomes a reservation that can be re-initialised into a different node (i.e., with + /// a different key and/or value). + /// + /// The existing key and value are dropped in-place as part of this operation, that is, memory + /// may be freed (but only for the key/value; memory for the node itself is kept for reuse). + pub fn into_reservation(self) -> RBTreeNodeReservation<K, V> { + RBTreeNodeReservation { + node: Box::drop_contents(self.node), + } + } +} + +/// A view into a single entry in a map, which may either be vacant or occupied. +/// +/// This enum is constructed from the [`RBTree::entry`]. +/// +/// [`entry`]: fn@RBTree::entry +pub enum Entry<'a, K, V> { + /// This [`RBTree`] does not have a node with this key. + Vacant(VacantEntry<'a, K, V>), + /// This [`RBTree`] already has a node with this key. + Occupied(OccupiedEntry<'a, K, V>), +} + +/// Like [`Entry`], except that it doesn't have ownership of the key. +enum RawEntry<'a, K, V> { + Vacant(RawVacantEntry<'a, K, V>), + Occupied(OccupiedEntry<'a, K, V>), +} + +/// A view into a vacant entry in a [`RBTree`]. It is part of the [`Entry`] enum. +pub struct VacantEntry<'a, K, V> { + key: K, + raw: RawVacantEntry<'a, K, V>, +} + +/// Like [`VacantEntry`], but doesn't hold on to the key. +/// +/// # Invariants +/// - `parent` may be null if the new node becomes the root. +/// - `child_field_of_parent` is a valid pointer to the left-child or right-child of `parent`. If `parent` is +/// null, it is a pointer to the root of the [`RBTree`]. +struct RawVacantEntry<'a, K, V> { + rbtree: *mut RBTree<K, V>, + /// The node that will become the parent of the new node if we insert one. + parent: *mut bindings::rb_node, + /// This points to the left-child or right-child field of `parent`, or `root` if `parent` is + /// null. + child_field_of_parent: *mut *mut bindings::rb_node, + _phantom: PhantomData<&'a mut RBTree<K, V>>, +} + +impl<'a, K, V> RawVacantEntry<'a, K, V> { + /// Inserts the given node into the [`RBTree`] at this entry. + /// + /// The `node` must have a key such that inserting it here does not break the ordering of this + /// [`RBTree`]. + fn insert(self, node: RBTreeNode<K, V>) -> &'a mut V { + let node = Box::into_raw(node.node); + + // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when + // the node is removed or replaced. + let node_links = unsafe { addr_of_mut!((*node).links) }; + + // INVARIANT: We are linking in a new node, which is valid. It remains valid because we + // "forgot" it with `Box::into_raw`. + // SAFETY: The type invariants of `RawVacantEntry` are exactly the safety requirements of `rb_link_node`. + unsafe { bindings::rb_link_node(node_links, self.parent, self.child_field_of_parent) }; + + // SAFETY: All pointers are valid. `node` has just been inserted into the tree. + unsafe { bindings::rb_insert_color(node_links, addr_of_mut!((*self.rbtree).root)) }; + + // SAFETY: The node is valid until we remove it from the tree. + unsafe { &mut (*node).value } + } +} + +impl<'a, K, V> VacantEntry<'a, K, V> { + /// Inserts the given node into the [`RBTree`] at this entry. + pub fn insert(self, value: V, reservation: RBTreeNodeReservation<K, V>) -> &'a mut V { + self.raw.insert(reservation.into_node(self.key, value)) + } +} + +/// A view into an occupied entry in a [`RBTree`]. It is part of the [`Entry`] enum. +/// +/// # Invariants +/// - `node_links` is a valid, non-null pointer to a tree node in `self.rbtree` +pub struct OccupiedEntry<'a, K, V> { + rbtree: &'a mut RBTree<K, V>, + /// The node that this entry corresponds to. + node_links: *mut bindings::rb_node, +} + +impl<'a, K, V> OccupiedEntry<'a, K, V> { + /// Gets a reference to the value in the entry. + pub fn get(&self) -> &V { + // SAFETY: + // - `self.node_links` is a valid pointer to a node in the tree. + // - We have shared access to the underlying tree, and can thus give out a shared reference. + unsafe { &(*container_of!(self.node_links, Node<K, V>, links)).value } + } + + /// Gets a mutable reference to the value in the entry. + pub fn get_mut(&mut self) -> &mut V { + // SAFETY: + // - `self.node_links` is a valid pointer to a node in the tree. + // - We have exclusive access to the underlying tree, and can thus give out a mutable reference. + unsafe { &mut (*(container_of!(self.node_links, Node<K, V>, links).cast_mut())).value } + } + + /// Converts the entry into a mutable reference to its value. + /// + /// If you need multiple references to the `OccupiedEntry`, see [`self#get_mut`]. + pub fn into_mut(self) -> &'a mut V { + // SAFETY: + // - `self.node_links` is a valid pointer to a node in the tree. + // - This consumes the `&'a mut RBTree<K, V>`, therefore it can give out a mutable reference that lives for `'a`. + unsafe { &mut (*(container_of!(self.node_links, Node<K, V>, links).cast_mut())).value } + } + + /// Remove this entry from the [`RBTree`]. + pub fn remove_node(self) -> RBTreeNode<K, V> { + // SAFETY: The node is a node in the tree, so it is valid. + unsafe { bindings::rb_erase(self.node_links, &mut self.rbtree.root) }; + + // INVARIANT: The node is being returned and the caller may free it, however, it was + // removed from the tree. So the invariants still hold. + RBTreeNode { + // SAFETY: The node was a node in the tree, but we removed it, so we can convert it + // back into a box. + node: unsafe { + Box::from_raw(container_of!(self.node_links, Node<K, V>, links).cast_mut()) + }, + } + } + + /// Takes the value of the entry out of the map, and returns it. + pub fn remove(self) -> V { + self.remove_node().node.value + } + + /// Swap the current node for the provided node. + /// + /// The key of both nodes must be equal. + fn replace(self, node: RBTreeNode<K, V>) -> RBTreeNode<K, V> { + let node = Box::into_raw(node.node); + + // SAFETY: `node` is valid at least until we call `Box::from_raw`, which only happens when + // the node is removed or replaced. + let new_node_links = unsafe { addr_of_mut!((*node).links) }; + + // SAFETY: This updates the pointers so that `new_node_links` is in the tree where + // `self.node_links` used to be. + unsafe { + bindings::rb_replace_node(self.node_links, new_node_links, &mut self.rbtree.root) + }; + + // SAFETY: + // - `self.node_ptr` produces a valid pointer to a node in the tree. + // - Now that we removed this entry from the tree, we can convert the node to a box. + let old_node = + unsafe { Box::from_raw(container_of!(self.node_links, Node<K, V>, links).cast_mut()) }; + + RBTreeNode { node: old_node } + } +} + +struct Node<K, V> { + links: bindings::rb_node, + key: K, + value: V, +} diff --git a/rust/kernel/security.rs b/rust/kernel/security.rs new file mode 100644 index 000000000000..2522868862a1 --- /dev/null +++ b/rust/kernel/security.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Linux Security Modules (LSM). +//! +//! C header: [`include/linux/security.h`](srctree/include/linux/security.h). + +use crate::{ + bindings, + error::{to_result, Result}, +}; + +/// A security context string. +/// +/// # Invariants +/// +/// The `secdata` and `seclen` fields correspond to a valid security context as returned by a +/// successful call to `security_secid_to_secctx`, that has not yet been destroyed by calling +/// `security_release_secctx`. +pub struct SecurityCtx { + secdata: *mut core::ffi::c_char, + seclen: usize, +} + +impl SecurityCtx { + /// Get the security context given its id. + pub fn from_secid(secid: u32) -> Result<Self> { + let mut secdata = core::ptr::null_mut(); + let mut seclen = 0u32; + // SAFETY: Just a C FFI call. The pointers are valid for writes. + to_result(unsafe { bindings::security_secid_to_secctx(secid, &mut secdata, &mut seclen) })?; + + // INVARIANT: If the above call did not fail, then we have a valid security context. + Ok(Self { + secdata, + seclen: seclen as usize, + }) + } + + /// Returns whether the security context is empty. + pub fn is_empty(&self) -> bool { + self.seclen == 0 + } + + /// Returns the length of this security context. + pub fn len(&self) -> usize { + self.seclen + } + + /// Returns the bytes for this security context. + pub fn as_bytes(&self) -> &[u8] { + let ptr = self.secdata; + if ptr.is_null() { + debug_assert_eq!(self.seclen, 0); + // We can't pass a null pointer to `slice::from_raw_parts` even if the length is zero. + return &[]; + } + + // SAFETY: The call to `security_secid_to_secctx` guarantees that the pointer is valid for + // `seclen` bytes. Furthermore, if the length is zero, then we have ensured that the + // pointer is not null. + unsafe { core::slice::from_raw_parts(ptr.cast(), self.seclen) } + } +} + +impl Drop for SecurityCtx { + fn drop(&mut self) { + // SAFETY: By the invariant of `Self`, this frees a pointer that came from a successful + // call to `security_secid_to_secctx` and has not yet been destroyed by + // `security_release_secctx`. + unsafe { bindings::security_release_secctx(self.secdata, self.seclen as u32) }; + } +} diff --git a/rust/kernel/seq_file.rs b/rust/kernel/seq_file.rs new file mode 100644 index 000000000000..6ca29d576d02 --- /dev/null +++ b/rust/kernel/seq_file.rs @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Seq file bindings. +//! +//! C header: [`include/linux/seq_file.h`](srctree/include/linux/seq_file.h) + +use crate::{bindings, c_str, types::NotThreadSafe, types::Opaque}; + +/// A utility for generating the contents of a seq file. +#[repr(transparent)] +pub struct SeqFile { + inner: Opaque<bindings::seq_file>, + _not_send: NotThreadSafe, +} + +impl SeqFile { + /// Creates a new [`SeqFile`] from a raw pointer. + /// + /// # Safety + /// + /// The caller must ensure that for the duration of 'a the following is satisfied: + /// * The pointer points at a valid `struct seq_file`. + /// * The `struct seq_file` is not accessed from any other thread. + pub unsafe fn from_raw<'a>(ptr: *mut bindings::seq_file) -> &'a SeqFile { + // SAFETY: The caller ensures that the reference is valid for 'a. There's no way to trigger + // a data race by using the `&SeqFile` since this is the only thread accessing the seq_file. + // + // CAST: The layout of `struct seq_file` and `SeqFile` is compatible. + unsafe { &*ptr.cast() } + } + + /// Used by the [`seq_print`] macro. + pub fn call_printf(&self, args: core::fmt::Arguments<'_>) { + // SAFETY: Passing a void pointer to `Arguments` is valid for `%pA`. + unsafe { + bindings::seq_printf( + self.inner.get(), + c_str!("%pA").as_char_ptr(), + &args as *const _ as *const core::ffi::c_void, + ); + } + } +} + +/// Write to a [`SeqFile`] with the ordinary Rust formatting syntax. +#[macro_export] +macro_rules! seq_print { + ($m:expr, $($arg:tt)+) => ( + $m.call_printf(format_args!($($arg)+)) + ); +} +pub use seq_print; diff --git a/rust/kernel/std_vendor.rs b/rust/kernel/std_vendor.rs index 39679a960c1a..67bf9d37ddb5 100644 --- a/rust/kernel/std_vendor.rs +++ b/rust/kernel/std_vendor.rs @@ -136,7 +136,7 @@ /// /// [`std::dbg`]: https://doc.rust-lang.org/std/macro.dbg.html /// [`eprintln`]: https://doc.rust-lang.org/std/macro.eprintln.html -/// [`printk`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html +/// [`printk`]: https://docs.kernel.org/core-api/printk-basics.html /// [`pr_info`]: crate::pr_info! /// [`pr_debug`]: crate::pr_debug! #[macro_export] diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs index 0ab20975a3b5..bae4a5179c72 100644 --- a/rust/kernel/sync.rs +++ b/rust/kernel/sync.rs @@ -11,6 +11,7 @@ mod arc; mod condvar; pub mod lock; mod locked_by; +pub mod poll; pub use arc::{Arc, ArcBorrow, UniqueArc}; pub use condvar::{new_condvar, CondVar, CondVarTimeoutResult}; diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs index 3673496c2363..3021f30fd822 100644 --- a/rust/kernel/sync/arc.rs +++ b/rust/kernel/sync/arc.rs @@ -12,12 +12,13 @@ //! 2. It does not support weak references, which allows it to be half the size. //! 3. It saturates the reference count instead of aborting when it goes over a threshold. //! 4. It does not provide a `get_mut` method, so the ref counted object is pinned. +//! 5. The object in [`Arc`] is pinned implicitly. //! //! [`Arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html use crate::{ alloc::{box_ext::BoxExt, AllocError, Flags}, - error::{self, Error}, + bindings, init::{self, InPlaceInit, Init, PinInit}, try_init, types::{ForeignOwnable, Opaque}, @@ -209,28 +210,6 @@ impl<T> Arc<T> { // `Arc` object. Ok(unsafe { Self::from_inner(Box::leak(inner).into()) }) } - - /// Use the given initializer to in-place initialize a `T`. - /// - /// If `T: !Unpin` it will not be able to move afterwards. - #[inline] - pub fn pin_init<E>(init: impl PinInit<T, E>, flags: Flags) -> error::Result<Self> - where - Error: From<E>, - { - UniqueArc::pin_init(init, flags).map(|u| u.into()) - } - - /// Use the given initializer to in-place initialize a `T`. - /// - /// This is equivalent to [`Arc<T>::pin_init`], since an [`Arc`] is always pinned. - #[inline] - pub fn init<E>(init: impl Init<T, E>, flags: Flags) -> error::Result<Self> - where - Error: From<E>, - { - UniqueArc::init(init, flags).map(|u| u.into()) - } } impl<T: ?Sized> Arc<T> { diff --git a/rust/kernel/sync/lock.rs b/rust/kernel/sync/lock.rs index f6c34ca4d819..d6e9bab114b8 100644 --- a/rust/kernel/sync/lock.rs +++ b/rust/kernel/sync/lock.rs @@ -6,8 +6,13 @@ //! spinlocks, raw spinlocks) to be provided with minimal effort. use super::LockClassKey; -use crate::{init::PinInit, pin_init, str::CStr, types::Opaque, types::ScopeGuard}; -use core::{cell::UnsafeCell, marker::PhantomData, marker::PhantomPinned}; +use crate::{ + init::PinInit, + pin_init, + str::CStr, + types::{NotThreadSafe, Opaque, ScopeGuard}, +}; +use core::{cell::UnsafeCell, marker::PhantomPinned}; use macros::pin_data; pub mod mutex; @@ -139,7 +144,7 @@ impl<T: ?Sized, B: Backend> Lock<T, B> { pub struct Guard<'a, T: ?Sized, B: Backend> { pub(crate) lock: &'a Lock<T, B>, pub(crate) state: B::GuardState, - _not_send: PhantomData<*mut ()>, + _not_send: NotThreadSafe, } // SAFETY: `Guard` is sync when the data protected by the lock is also sync. @@ -191,7 +196,7 @@ impl<'a, T: ?Sized, B: Backend> Guard<'a, T, B> { Self { lock, state, - _not_send: PhantomData, + _not_send: NotThreadSafe, } } } diff --git a/rust/kernel/sync/locked_by.rs b/rust/kernel/sync/locked_by.rs index babc731bd5f6..ce2ee8d87865 100644 --- a/rust/kernel/sync/locked_by.rs +++ b/rust/kernel/sync/locked_by.rs @@ -83,8 +83,12 @@ pub struct LockedBy<T: ?Sized, U: ?Sized> { // SAFETY: `LockedBy` can be transferred across thread boundaries iff the data it protects can. unsafe impl<T: ?Sized + Send, U: ?Sized> Send for LockedBy<T, U> {} -// SAFETY: `LockedBy` serialises the interior mutability it provides, so it is `Sync` as long as the -// data it protects is `Send`. +// SAFETY: If `T` is not `Sync`, then parallel shared access to this `LockedBy` allows you to use +// `access_mut` to hand out `&mut T` on one thread at the time. The requirement that `T: Send` is +// sufficient to allow that. +// +// If `T` is `Sync`, then the `access` method also becomes available, which allows you to obtain +// several `&T` from several threads at once. However, this is okay as `T` is `Sync`. unsafe impl<T: ?Sized + Send, U: ?Sized> Sync for LockedBy<T, U> {} impl<T, U> LockedBy<T, U> { @@ -118,7 +122,10 @@ impl<T: ?Sized, U> LockedBy<T, U> { /// /// Panics if `owner` is different from the data protected by the lock used in /// [`new`](LockedBy::new). - pub fn access<'a>(&'a self, owner: &'a U) -> &'a T { + pub fn access<'a>(&'a self, owner: &'a U) -> &'a T + where + T: Sync, + { build_assert!( size_of::<U>() > 0, "`U` cannot be a ZST because `owner` wouldn't be unique" @@ -127,7 +134,10 @@ impl<T: ?Sized, U> LockedBy<T, U> { panic!("mismatched owners"); } - // SAFETY: `owner` is evidence that the owner is locked. + // SAFETY: `owner` is evidence that there are only shared references to the owner for the + // duration of 'a, so it's not possible to use `Self::access_mut` to obtain a mutable + // reference to the inner value that aliases with this shared reference. The type is `Sync` + // so there are no other requirements. unsafe { &*self.data.get() } } diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs new file mode 100644 index 000000000000..d5f17153b424 --- /dev/null +++ b/rust/kernel/sync/poll.rs @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Utilities for working with `struct poll_table`. + +use crate::{ + bindings, + fs::File, + prelude::*, + sync::{CondVar, LockClassKey}, + types::Opaque, +}; +use core::ops::Deref; + +/// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class. +#[macro_export] +macro_rules! new_poll_condvar { + ($($name:literal)?) => { + $crate::sync::poll::PollCondVar::new( + $crate::optional_name!($($name)?), $crate::static_lock_class!() + ) + }; +} + +/// Wraps the kernel's `struct poll_table`. +/// +/// # Invariants +/// +/// This struct contains a valid `struct poll_table`. +/// +/// For a `struct poll_table` to be valid, its `_qproc` function must follow the safety +/// requirements of `_qproc` functions: +/// +/// * The `_qproc` function is given permission to enqueue a waiter to the provided `poll_table` +/// during the call. Once the waiter is removed and an rcu grace period has passed, it must no +/// longer access the `wait_queue_head`. +#[repr(transparent)] +pub struct PollTable(Opaque<bindings::poll_table>); + +impl PollTable { + /// Creates a reference to a [`PollTable`] from a valid pointer. + /// + /// # Safety + /// + /// The caller must ensure that for the duration of 'a, the pointer will point at a valid poll + /// table (as defined in the type invariants). + /// + /// The caller must also ensure that the `poll_table` is only accessed via the returned + /// reference for the duration of 'a. + pub unsafe fn from_ptr<'a>(ptr: *mut bindings::poll_table) -> &'a mut PollTable { + // SAFETY: The safety requirements guarantee the validity of the dereference, while the + // `PollTable` type being transparent makes the cast ok. + unsafe { &mut *ptr.cast() } + } + + fn get_qproc(&self) -> bindings::poll_queue_proc { + let ptr = self.0.get(); + // SAFETY: The `ptr` is valid because it originates from a reference, and the `_qproc` + // field is not modified concurrently with this call since we have an immutable reference. + unsafe { (*ptr)._qproc } + } + + /// Register this [`PollTable`] with the provided [`PollCondVar`], so that it can be notified + /// using the condition variable. + pub fn register_wait(&mut self, file: &File, cv: &PollCondVar) { + if let Some(qproc) = self.get_qproc() { + // SAFETY: The pointers to `file` and `self` need to be valid for the duration of this + // call to `qproc`, which they are because they are references. + // + // The `cv.wait_queue_head` pointer must be valid until an rcu grace period after the + // waiter is removed. The `PollCondVar` is pinned, so before `cv.wait_queue_head` can + // be destroyed, the destructor must run. That destructor first removes all waiters, + // and then waits for an rcu grace period. Therefore, `cv.wait_queue_head` is valid for + // long enough. + unsafe { qproc(file.as_ptr() as _, cv.wait_queue_head.get(), self.0.get()) }; + } + } +} + +/// A wrapper around [`CondVar`] that makes it usable with [`PollTable`]. +/// +/// [`CondVar`]: crate::sync::CondVar +#[pin_data(PinnedDrop)] +pub struct PollCondVar { + #[pin] + inner: CondVar, +} + +impl PollCondVar { + /// Constructs a new condvar initialiser. + pub fn new(name: &'static CStr, key: &'static LockClassKey) -> impl PinInit<Self> { + pin_init!(Self { + inner <- CondVar::new(name, key), + }) + } +} + +// Make the `CondVar` methods callable on `PollCondVar`. +impl Deref for PollCondVar { + type Target = CondVar; + + fn deref(&self) -> &CondVar { + &self.inner + } +} + +#[pinned_drop] +impl PinnedDrop for PollCondVar { + fn drop(self: Pin<&mut Self>) { + // Clear anything registered using `register_wait`. + // + // SAFETY: The pointer points at a valid `wait_queue_head`. + unsafe { bindings::__wake_up_pollfree(self.inner.wait_queue_head.get()) }; + + // Wait for epoll items to be properly removed. + // + // SAFETY: Just an FFI call. + unsafe { bindings::synchronize_rcu() }; + } +} diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs index 55dff7e088bf..080599075875 100644 --- a/rust/kernel/task.rs +++ b/rust/kernel/task.rs @@ -4,10 +4,13 @@ //! //! C header: [`include/linux/sched.h`](srctree/include/linux/sched.h). -use crate::types::Opaque; +use crate::{ + bindings, + types::{NotThreadSafe, Opaque}, +}; use core::{ + cmp::{Eq, PartialEq}, ffi::{c_int, c_long, c_uint}, - marker::PhantomData, ops::Deref, ptr, }; @@ -94,7 +97,22 @@ unsafe impl Sync for Task {} /// The type of process identifiers (PIDs). type Pid = bindings::pid_t; +/// The type of user identifiers (UIDs). +#[derive(Copy, Clone)] +pub struct Kuid { + kuid: bindings::kuid_t, +} + impl Task { + /// Returns a raw pointer to the current task. + /// + /// It is up to the user to use the pointer correctly. + #[inline] + pub fn current_raw() -> *mut bindings::task_struct { + // SAFETY: Getting the current pointer is always safe. + unsafe { bindings::get_current() } + } + /// Returns a task reference for the currently executing task/thread. /// /// The recommended way to get the current task/thread is to use the @@ -106,7 +124,7 @@ impl Task { pub unsafe fn current() -> impl Deref<Target = Task> { struct TaskRef<'a> { task: &'a Task, - _not_send: PhantomData<*mut ()>, + _not_send: NotThreadSafe, } impl Deref for TaskRef<'_> { @@ -117,23 +135,27 @@ impl Task { } } - // SAFETY: Just an FFI call with no additional safety requirements. - let ptr = unsafe { bindings::get_current() }; - + let current = Task::current_raw(); TaskRef { // SAFETY: If the current thread is still running, the current task is valid. Given // that `TaskRef` is not `Send`, we know it cannot be transferred to another thread // (where it could potentially outlive the caller). - task: unsafe { &*ptr.cast() }, - _not_send: PhantomData, + task: unsafe { &*current.cast() }, + _not_send: NotThreadSafe, } } + /// Returns a raw pointer to the task. + #[inline] + pub fn as_ptr(&self) -> *mut bindings::task_struct { + self.0.get() + } + /// Returns the group leader of the given task. pub fn group_leader(&self) -> &Task { - // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always - // have a valid `group_leader`. - let ptr = unsafe { *ptr::addr_of!((*self.0.get()).group_leader) }; + // SAFETY: The group leader of a task never changes after initialization, so reading this + // field is not a data race. + let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) }; // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`, // and given that a task has a reference to its group leader, we know it must be valid for @@ -143,23 +165,41 @@ impl Task { /// Returns the PID of the given task. pub fn pid(&self) -> Pid { - // SAFETY: By the type invariant, we know that `self.0` is a valid task. Valid tasks always - // have a valid pid. - unsafe { *ptr::addr_of!((*self.0.get()).pid) } + // SAFETY: The pid of a task never changes after initialization, so reading this field is + // not a data race. + unsafe { *ptr::addr_of!((*self.as_ptr()).pid) } + } + + /// Returns the UID of the given task. + pub fn uid(&self) -> Kuid { + // SAFETY: It's always safe to call `task_uid` on a valid task. + Kuid::from_raw(unsafe { bindings::task_uid(self.as_ptr()) }) + } + + /// Returns the effective UID of the given task. + pub fn euid(&self) -> Kuid { + // SAFETY: It's always safe to call `task_euid` on a valid task. + Kuid::from_raw(unsafe { bindings::task_euid(self.as_ptr()) }) } /// Determines whether the given task has pending signals. pub fn signal_pending(&self) -> bool { - // SAFETY: By the type invariant, we know that `self.0` is valid. - unsafe { bindings::signal_pending(self.0.get()) != 0 } + // SAFETY: It's always safe to call `signal_pending` on a valid task. + unsafe { bindings::signal_pending(self.as_ptr()) != 0 } + } + + /// Returns the given task's pid in the current pid namespace. + pub fn pid_in_current_ns(&self) -> Pid { + // SAFETY: It's valid to pass a null pointer as the namespace (defaults to current + // namespace). The task pointer is also valid. + unsafe { bindings::task_tgid_nr_ns(self.as_ptr(), ptr::null_mut()) } } /// Wakes up the task. pub fn wake_up(&self) { - // SAFETY: By the type invariant, we know that `self.0.get()` is non-null and valid. - // And `wake_up_process` is safe to be called for any valid task, even if the task is + // SAFETY: It's always safe to call `signal_pending` on a valid task, even if the task // running. - unsafe { bindings::wake_up_process(self.0.get()) }; + unsafe { bindings::wake_up_process(self.as_ptr()) }; } } @@ -167,7 +207,7 @@ impl Task { unsafe impl crate::types::AlwaysRefCounted for Task { fn inc_ref(&self) { // SAFETY: The existence of a shared reference means that the refcount is nonzero. - unsafe { bindings::get_task_struct(self.0.get()) }; + unsafe { bindings::get_task_struct(self.as_ptr()) }; } unsafe fn dec_ref(obj: ptr::NonNull<Self>) { @@ -175,3 +215,43 @@ unsafe impl crate::types::AlwaysRefCounted for Task { unsafe { bindings::put_task_struct(obj.cast().as_ptr()) } } } + +impl Kuid { + /// Get the current euid. + #[inline] + pub fn current_euid() -> Kuid { + // SAFETY: Just an FFI call. + Self::from_raw(unsafe { bindings::current_euid() }) + } + + /// Create a `Kuid` given the raw C type. + #[inline] + pub fn from_raw(kuid: bindings::kuid_t) -> Self { + Self { kuid } + } + + /// Turn this kuid into the raw C type. + #[inline] + pub fn into_raw(self) -> bindings::kuid_t { + self.kuid + } + + /// Converts this kernel UID into a userspace UID. + /// + /// Uses the namespace of the current task. + #[inline] + pub fn into_uid_in_current_ns(self) -> bindings::uid_t { + // SAFETY: Just an FFI call. + unsafe { bindings::from_kuid(bindings::current_user_ns(), self.kuid) } + } +} + +impl PartialEq for Kuid { + #[inline] + fn eq(&self, other: &Kuid) -> bool { + // SAFETY: Just an FFI call. + unsafe { bindings::uid_eq(self.kuid, other.kuid) } + } +} + +impl Eq for Kuid {} diff --git a/rust/kernel/tracepoint.rs b/rust/kernel/tracepoint.rs new file mode 100644 index 000000000000..c6e80aa99e8e --- /dev/null +++ b/rust/kernel/tracepoint.rs @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Copyright (C) 2024 Google LLC. + +//! Logic for tracepoints. + +/// Declare the Rust entry point for a tracepoint. +/// +/// This macro generates an unsafe function that calls into C, and its safety requirements will be +/// whatever the relevant C code requires. To document these safety requirements, you may add +/// doc-comments when invoking the macro. +#[macro_export] +macro_rules! declare_trace { + ($($(#[$attr:meta])* $pub:vis unsafe fn $name:ident($($argname:ident : $argtyp:ty),* $(,)?);)*) => {$( + $( #[$attr] )* + #[inline(always)] + $pub unsafe fn $name($($argname : $argtyp),*) { + #[cfg(CONFIG_TRACEPOINTS)] + { + // SAFETY: It's always okay to query the static key for a tracepoint. + let should_trace = unsafe { + $crate::macros::paste! { + $crate::jump_label::static_branch_unlikely!( + $crate::bindings::[< __tracepoint_ $name >], + $crate::bindings::tracepoint, + key + ) + } + }; + + if should_trace { + $crate::macros::paste! { + // SAFETY: The caller guarantees that it is okay to call this tracepoint. + unsafe { $crate::bindings::[< rust_do_trace_ $name >]($($argname),*) }; + } + } + } + + #[cfg(not(CONFIG_TRACEPOINTS))] + { + // If tracepoints are disabled, insert a trivial use of each argument + // to avoid unused argument warnings. + $( let _unused = $argname; )* + } + } + )*} +} + +pub use declare_trace; diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index bd189d646adb..3238ffaab031 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -7,8 +7,9 @@ use alloc::boxed::Box; use core::{ cell::UnsafeCell, marker::{PhantomData, PhantomPinned}, - mem::MaybeUninit, + mem::{ManuallyDrop, MaybeUninit}, ops::{Deref, DerefMut}, + pin::Pin, ptr::NonNull, }; @@ -26,7 +27,10 @@ pub trait ForeignOwnable: Sized { /// Converts a Rust-owned object to a foreign-owned one. /// - /// The foreign representation is a pointer to void. + /// The foreign representation is a pointer to void. There are no guarantees for this pointer. + /// For example, it might be invalid, dangling or pointing to uninitialized memory. Using it in + /// any way except for [`ForeignOwnable::from_foreign`], [`ForeignOwnable::borrow`], + /// [`ForeignOwnable::try_from_foreign`] can result in undefined behavior. fn into_foreign(self) -> *const core::ffi::c_void; /// Borrows a foreign-owned object. @@ -89,6 +93,32 @@ impl<T: 'static> ForeignOwnable for Box<T> { } } +impl<T: 'static> ForeignOwnable for Pin<Box<T>> { + type Borrowed<'a> = Pin<&'a T>; + + fn into_foreign(self) -> *const core::ffi::c_void { + // SAFETY: We are still treating the box as pinned. + Box::into_raw(unsafe { Pin::into_inner_unchecked(self) }) as _ + } + + unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> Pin<&'a T> { + // SAFETY: The safety requirements for this function ensure that the object is still alive, + // so it is safe to dereference the raw pointer. + // The safety requirements of `from_foreign` also ensure that the object remains alive for + // the lifetime of the returned value. + let r = unsafe { &*ptr.cast() }; + + // SAFETY: This pointer originates from a `Pin<Box<T>>`. + unsafe { Pin::new_unchecked(r) } + } + + unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self { + // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous + // call to `Self::into_foreign`. + unsafe { Pin::new_unchecked(Box::from_raw(ptr as _)) } + } +} + impl ForeignOwnable for () { type Borrowed<'a> = (); @@ -366,6 +396,35 @@ impl<T: AlwaysRefCounted> ARef<T> { _p: PhantomData, } } + + /// Consumes the `ARef`, returning a raw pointer. + /// + /// This function does not change the refcount. After calling this function, the caller is + /// responsible for the refcount previously managed by the `ARef`. + /// + /// # Examples + /// + /// ``` + /// use core::ptr::NonNull; + /// use kernel::types::{ARef, AlwaysRefCounted}; + /// + /// struct Empty {} + /// + /// unsafe impl AlwaysRefCounted for Empty { + /// fn inc_ref(&self) {} + /// unsafe fn dec_ref(_obj: NonNull<Self>) {} + /// } + /// + /// let mut data = Empty {}; + /// let ptr = NonNull::<Empty>::new(&mut data as *mut _).unwrap(); + /// let data_ref: ARef<Empty> = unsafe { ARef::from_raw(ptr) }; + /// let raw_ptr: NonNull<Empty> = ARef::into_raw(data_ref); + /// + /// assert_eq!(ptr, raw_ptr); + /// ``` + pub fn into_raw(me: Self) -> NonNull<T> { + ManuallyDrop::new(me).ptr + } } impl<T: AlwaysRefCounted> Clone for ARef<T> { @@ -473,3 +532,24 @@ unsafe impl AsBytes for str {} // does not have any uninitialized portions either. unsafe impl<T: AsBytes> AsBytes for [T] {} unsafe impl<T: AsBytes, const N: usize> AsBytes for [T; N] {} + +/// Zero-sized type to mark types not [`Send`]. +/// +/// Add this type as a field to your struct if your type should not be sent to a different task. +/// Since [`Send`] is an auto trait, adding a single field that is `!Send` will ensure that the +/// whole type is `!Send`. +/// +/// If a type is `!Send` it is impossible to give control over an instance of the type to another +/// task. This is useful to include in types that store or reference task-local information. A file +/// descriptor is an example of such task-local information. +/// +/// This type also makes the type `!Sync`, which prevents immutable access to the value from +/// several threads in parallel. +pub type NotThreadSafe = PhantomData<*mut ()>; + +/// Used to construct instances of type [`NotThreadSafe`] similar to how `PhantomData` is +/// constructed. +/// +/// [`NotThreadSafe`]: type@NotThreadSafe +#[allow(non_upper_case_globals)] +pub const NotThreadSafe: NotThreadSafe = PhantomData; |