在上一篇文章中,我们设计了 simplefs
的数据存储格式并实现了对应的格式化工具,但我们在 fill_super
中还没有实际读取磁盘,也即我们其实还没有实际使用到磁盘上存储数据。现在,是时候让文件系统列出磁盘上的文件了。
# 准备
由于我们的文件系统此时还无法读写文件,所以我们直接用 mkfs.simplefs
在格式化文件系统时向磁盘添加几个文件用于后面测试。
struct file_block blk; | |
blk.busy = 1; | |
blk.mode = S_IFREG; | |
blk.idx = 1; | |
blk.parent_idx = 0; | |
blk.data[0] = 'a'; | |
strcpy(blk.filename, "file1"); // 普通文件 file1,位于根目录下 | |
ret = write(fd, (char *)&blk, sizeof(struct file_block)); | |
blk.busy = 1; | |
blk.mode = S_IFREG; | |
blk.idx = 2; | |
blk.parent_idx = 0; | |
blk.data[0] = 'b'; | |
strcpy(blk.filename, "file2"); // 普通文件 file2,位于根目录下 | |
ret = write(fd, (char *)&blk, sizeof(struct file_block)); | |
blk.busy = 1; | |
blk.mode = S_IFDIR; | |
blk.idx = 3; | |
blk.parent_idx = 0; | |
strcpy(blk.filename, "dir1"); // 目录 dir1,位于根目录下 | |
ret = write(fd, (char *)&blk, sizeof(struct file_block)); | |
blk.busy = 1; | |
blk.mode = S_IFREG; | |
blk.idx = 4; | |
blk.parent_idx = 3; | |
blk.data[0] = 'c'; | |
strcpy(blk.filename, "file3"); // 普通文件 file3,位于 dir1 目录下 | |
ret = write(fd, (char *)&blk, sizeof(struct file_block)); |
# 自定义 inode 数据
由于我们在创建文件系统时写入了第一个文件,也就是根目录(用于替代 superblock),所以我们在 fill_super
时要将该数据读取出来,用于填充 根inode
。
int simplefs_fill_super(struct super_block *sb, void *data, int silent) | |
{ | |
struct inode *root_inode; | |
struct buffer_head *bh; | |
bh = (struct buffer_head *)sb_bread(sb, 0); | |
/* A magic number that uniquely identifies our filesystem type */ | |
sb->s_magic = 0x20250130; | |
root_inode = simplefs_get_inode(sb, NULL, S_IFDIR, 0); | |
root_inode->i_op = &simplefs_inode_ops; | |
root_inode->i_fop = &simplefs_dir_operations; | |
root_inode->i_private = (struct file_block *)bh->b_data; | |
sb->s_root = d_make_root(root_inode); | |
if (!sb->s_root) | |
return -ENOMEM; | |
return 0; | |
} |
sb_bread()
是 Linux 内核中的一个文件系统接口函数,用于从块设备读取一个块(block)到内存中。它是基于超级块(super_block)的bread()
函数,原型为struct buffer_head *sb_bread(struct super_block *sb, sector_t block);
struct super_block *sb
:指向文件系统的超级块,提供块设备的上下文。sector_t block
:要读取的块号(相对于文件系统的起始位置)。成功:返回指向
buffer_head
结构的指针,包含读取的数据块。失败:返回
NULL
,表示读取失败(如设备错误)。
buffer_head
是内核中的一个结构,用于管理缓冲区缓存(buffer cache)。它提供了对块设备的高效访问,这里给出一些关键字段struct buffer_head {
char *b_data; // 指向数据块的内存地址
struct block_device *b_bdev; // 块设备结构
sector_t b_blocknr; // 逻辑块号
size_t b_size; // 数据块的大小
unsigned long b_state; // 状态标志(如已修改、锁定等)
struct buffer_head *b_this_page; // 链接到同一页面的其他块
struct page *b_page; // 指向内存页
};
simplefs_inode_ops
指定了inode
支持的方法,为了实现列出目录的操作,至少需要实现.lookup
方法,否则会无法进入(cd
)目录,会报错:cd: mnt/: Not a directory
。simplefs_dir_operations
指定了file
支持的方法,为了实现列出目录的操作,至少需要实现.iterate_shared
方法。root_inode->i_private = (struct file_block *)bh->b_data;
用i_private
字段存储了file_block
的数据,即用i_private
来存储我们自己定义的一些数据。
# 列出目录功能
在 VFS 中,目录文件也有 struct file
结构。在用户态,调用 open("/some/directory")
时,它会返回一个文件描述符( fd
),这表示进程成功打开了目录。这时,VFS 会创建 struct file
结构来跟踪这个打开的目录,遍历目录内容也需要依赖 file
结构。
当我们执行 ls
命令时,VFS 会先去调用 file_operations
中的 .iterate_shared
方法,该方法替代了原本的 readdir
方法,用于在较新的 Linux 内核中遍历文件目录。该方法的函数原型为 int (*iterate_shared) (struct file *, struct dir_context *);
,该方法会将 ls
后的目录对应的 file
指针作为第一个参数传递进函数,而第二个参数 struct dir_context
类型的声明如下:
struct dir_context { | |
filldir_t actor; // 回调函数,用于填充目录项 | |
loff_t pos; // 当前位置(偏移量),表示目录遍历的进度 | |
}; |
其中, filldir_t
是一个函数指针类型:
typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned); |
该函数用于将目录项信息传递给用户空间(如 ls
命令)。而 pos
是目录遍历的当前位置,即当前 iterate_shared()
读取到的 dentry 偏移量。每次处理完一个 dentry, pos
都需要更新,确保 ls
或 getdents()
等系统调用可以继续正确读取下一个目录项。
简单梳理下实现列出目录功能的思路:
- 根据
file
指针找打对应 inode 并获取file_block
数据 - 根据
inode
对应的 superblock 块用于读取磁盘数据 - 遍历磁盘数据,找到所有
busy = 1
且parent_idx
与当前file_block
的idx
相等的文件 - 向用户态传递目录项信息
代码如下:
static int simplefs_iterate(struct file *filep, struct dir_context *ctx) | |
{ | |
struct inode *inode = filep->f_path.dentry->d_inode; | |
struct super_block *sb = inode->i_sb; | |
struct buffer_head *bh; | |
struct file_block *blk = inode->i_private; | |
int i; | |
if (unlikely(!S_ISDIR(inode->i_mode))) { | |
printk(KERN_ERR "inode %lu not a directory", inode->i_ino); | |
return -ENOTDIR; | |
} | |
if (ctx->pos) return 0; | |
bh = (struct buffer_head *)sb_bread(sb, 0); | |
if (!bh) { | |
printk(KERN_ERR "Failed to read block 0\n"); | |
return -EIO; | |
} | |
struct file_block *block = (struct file_block *)bh->b_data; | |
for (i = 1; i < SIMPLEFS_MAX_FILES; i++) { | |
if (block[i].busy && block[i].parent_idx == blk->idx) { | |
if (!dir_emit(ctx, block[i].filename, strlen(block[i].filename), get_next_ino(), DT_UNKNOWN)) { | |
brelse(bh); | |
return -ENOMEM; | |
} | |
ctx->pos += sizeof(struct file_block); // 更新 pos,指向下一个 dentry | |
} | |
} | |
brelse(bh); | |
return 0; | |
} |
除了遍历所有 file_block
的逻辑外,有三点需要额外说明的:
遍历应从第 2 个
file_block
开始,因为第一个是根目录,所以for
循环的初始条件为i = 1
。在使用完
sb_bread
后应记得使用brelse
来释放获取的内存,防止内存泄露。dir_emit
是filldir_t
的封装,作用是向用户空间传递目录项,是iterate_shared()
处理目录遍历时的标准方式,其原型如下:bool dir_emit(struct dir_context *ctx, const char *name, int namelen, u64 ino, unsigned type);
- 参数
ctx
→dir_context
结构体,存储当前遍历状态name
→ 目录项名称namelen
→ 目录项名称长度ino
→ inode 号(必须是合法的ino
,不能是0
或无效值)type
→ 目录项类型,如DT_REG
、DT_DIR
- 返回值
true
→ 成功添加目录项false
→ 失败(通常是用户空间缓冲区满)
- 参数
实现了 simplefs_iterate
后别忘了 inode
注册:
const struct file_operations simplefs_dir_operations = { | |
.owner = THIS_MODULE, | |
.iterate_shared = simplefs_iterate, | |
}; | |
static struct inode_operations simplefs_inode_ops = { | |
.lookup = simplefs_lookup, | |
}; |
# 测试
❯ ./mkfs.simplefs simplefs.img | |
Super block written successfully | |
❯ sudo insmod simplefs.ko | |
❯ sudo mount -o loop -t simplefs simplefs.img /mnt | |
❯ cd /mnt && ls | |
ls: cannot access 'file1': No such file or directory | |
ls: cannot access 'file2': No such file or directory | |
ls: cannot access 'dir1': No such file or directory | |
dir1 file1 file2 |
这里出现 cannot access 'file1': No such file or directory
是正常的,是因为 ls
在调用 iterate_shared
后还会再去调用 lookup
方法获取指定文件的信息,而我们的 lookup
目前只返回了 NULL
,所以会出现如上报错,但还是成功列出了所有文件。
报错:mount: /home/hzy/mnt: /dev/loopX already mounted or mount point busy.
执行 sudo rm /dev/loopX
即可,执行 losetup -D
无法删掉虚拟回环设备😠
下一章我们将继续丰满 iterate_shared
的功能并实现 lookup
方法。
# 代码
整体代码如下
/* | |
* A simple Filesystem for Linux Kernel 6.8.0. | |
* | |
* Initial Author: Gality <gality369@gmail.com> | |
* License: GNU General Public License v3 - https://www.gnu.org/licenses/gpl-3.0.html | |
* Date: 2025-01-30 | |
*/ | |
#include <linux/init.h> | |
#include <linux/module.h> | |
#include <linux/fs.h> | |
#include <linux/buffer_head.h> | |
#include "simple.h" | |
struct dentry *simplefs_lookup(struct inode *parent_inode, | |
struct dentry *child_dentry, unsigned int flags); | |
/** | |
* @filep: The file structure that represents the file being iterated. | |
* @ctx: The directory context structure that is used for directory iteration. | |
* @return: 0 on success, and error code on failure. | |
* | |
* This function is called when the VFS needs to iterate over the files in a directory. | |
* It reads the directory's file blocks and fills the directory context with the file names. | |
*/ | |
static int simplefs_iterate(struct file *filep, struct dir_context *ctx) | |
{ | |
struct inode *inode = filep->f_path.dentry->d_inode; | |
struct super_block *sb = inode->i_sb; | |
struct buffer_head *bh; | |
struct file_block *blk = inode->i_private; | |
int i; | |
if (unlikely(!S_ISDIR(inode->i_mode))) { | |
printk(KERN_ERR "inode %lu not a directory", inode->i_ino); | |
return -ENOTDIR; | |
} | |
if (ctx->pos) return 0; | |
bh = (struct buffer_head *)sb_bread(sb, 0); | |
if (!bh) { | |
printk(KERN_ERR "Failed to read block 0\n"); | |
return -EIO; | |
} | |
struct file_block *block = (struct file_block *)bh->b_data; | |
for (i = 1; i < SIMPLEFS_MAX_FILES; i++) { | |
if (block[i].busy && block[i].parent_idx == blk->idx) { | |
if (!dir_emit(ctx, block[i].filename, strlen(block[i].filename), get_next_ino(), DT_UNKNOWN)) { | |
brelse(bh); | |
return -ENOMEM; | |
} | |
ctx->pos += sizeof(struct file_block); | |
} | |
} | |
brelse(bh); | |
return 0; | |
} | |
const struct file_operations simplefs_dir_operations = { | |
.owner = THIS_MODULE, | |
.iterate_shared = simplefs_iterate, | |
}; | |
static struct inode_operations simplefs_inode_ops = { | |
.lookup = simplefs_lookup, | |
}; | |
struct dentry *simplefs_lookup(struct inode *parent_inode, | |
struct dentry *child_dentry, unsigned int flags) | |
{ | |
/* The lookup function is used for dentry association. | |
* As of now, we don't deal with dentries in simplefs. | |
* So we will keep this simple for now and revisit later */ | |
return NULL; | |
} | |
struct inode *simplefs_get_inode(struct super_block *sb, | |
const struct inode *dir, umode_t mode, | |
dev_t dev); | |
int simplefs_fill_super(struct super_block *sb, void *data, int silent); | |
/** | |
* @sb: The super block of the filesystem (inodes, blocks, etc). | |
* @dir: The parent directory's inode. | |
* @mode: The mode of the inode to be created (S_IFDIR, S_IFREG, etc). | |
* @dev: The device id of the inode to be created. | |
* @return: The inode created. | |
* | |
* This function creates an inode for the filesystem, initializes it | |
* and returns it. For now, it only creates inodes for root directories. | |
*/ | |
struct inode *simplefs_get_inode(struct super_block *sb, | |
const struct inode *dir, umode_t mode, | |
dev_t dev) | |
{ | |
struct inode *inode = new_inode(sb); | |
if (inode) { | |
inode->i_ino = get_next_ino(); | |
inode_init_owner(&nop_mnt_idmap, inode, dir, mode); | |
inode->__i_atime = inode->__i_mtime = inode->__i_ctime = inode_set_ctime_current(inode); | |
switch (mode & S_IFMT) { | |
case S_IFDIR: | |
/* i_nlink will be initialized to 1 in the inode_init_always function | |
* (that gets called inside the new_inode function), | |
* We change it to 2 for directories, for covering the "." entry */ | |
inc_nlink(inode); | |
break; | |
case S_IFREG: | |
case S_IFLNK: | |
default: | |
printk(KERN_ERR | |
"simplefs can create meaningful inode for only root directory at the moment\n"); | |
return NULL; | |
break; | |
} | |
} | |
return inode; | |
} | |
/** | |
* @sb: The superblock which is passed from the VFS to the filesystem. | |
* @data: The mount arguments data that might be passed to the filesystem while mounting. | |
* @silent: A flag to indicate whether the filesystem should print logs or not. | |
* @return: 0 on success, and error code on failure. | |
* | |
* This function fills the super block of the filesystem with necessary information. | |
*/ | |
int simplefs_fill_super(struct super_block *sb, void *data, int silent) | |
{ | |
struct inode *root_inode; | |
struct buffer_head *bh; | |
bh = (struct buffer_head *)sb_bread(sb, 0); | |
/* A magic number that uniquely identifies our filesystem type */ | |
sb->s_magic = 0x20250130; | |
root_inode = simplefs_get_inode(sb, NULL, S_IFDIR, 0); | |
root_inode->i_op = &simplefs_inode_ops; | |
root_inode->i_fop = &simplefs_dir_operations; | |
root_inode->i_private = (struct file_block *)bh->b_data; | |
sb->s_root = d_make_root(root_inode); | |
if (!sb->s_root) | |
return -ENOMEM; | |
return 0; | |
} | |
/** | |
* @fs_type: The filesystem type structure that is registered with the kernel. | |
* @flags: Mount flags (e.g. MS_RDONLY for read-only mounts). | |
* @dev_name: The name of the device to be mounted (/dev/sda1, /dev/sdb1, etc). | |
* @data: Extra data that might be passed to the filesystem while mounting. | |
* @return: The root dentry of the filesystem that is mounted. | |
* | |
* This function is called when the VFS is asked to mount this filesystem | |
* and returns the root dentry of the filesystem. | |
*/ | |
static struct dentry *simplefs_mount(struct file_system_type *fs_type, | |
int flags, const char *dev_name, | |
void *data) | |
{ | |
struct dentry *ret; | |
ret = mount_bdev(fs_type, flags, dev_name, data, simplefs_fill_super); | |
if (unlikely(IS_ERR(ret))) | |
printk(KERN_ERR "Error mounting simplefs"); | |
else | |
printk(KERN_INFO "simplefs is successfully mounted on [%s]\n", | |
dev_name); | |
return ret; | |
} | |
static void simplefs_kill_superblock(struct super_block *s) | |
{ | |
printk(KERN_INFO | |
"simplefs superblock is destroyed. Unmount successful.\n"); | |
/* This is just a dummy function as of now. As our filesystem gets matured, | |
* we will do more meaningful operations here */ | |
return; | |
} | |
struct file_system_type simplefs_fs_type = { | |
.owner = THIS_MODULE, | |
.name = "simplefs", | |
.mount = simplefs_mount, | |
.kill_sb = simplefs_kill_superblock, | |
}; | |
static int simplefs_init(void) | |
{ | |
int ret; | |
ret = register_filesystem(&simplefs_fs_type); | |
if (likely(ret == 0)) | |
printk(KERN_INFO "Successfully registered simplefs\n"); | |
else | |
printk(KERN_ERR "Failed to register simplefs. Error:[%d]", ret); | |
return ret; | |
} | |
static void simplefs_exit(void) | |
{ | |
int ret; | |
ret = unregister_filesystem(&simplefs_fs_type); | |
if (likely(ret == 0)) | |
printk(KERN_INFO "Successfully unregistered simplefs\n"); | |
else | |
printk(KERN_ERR "Failed to unregister simplefs. Error:[%d]", | |
ret); | |
} | |
module_init(simplefs_init); | |
module_exit(simplefs_exit); | |
MODULE_LICENSE("GPL"); | |
MODULE_AUTHOR("Gality"); |