本篇文章是从零实现文件系统的第四篇文章,主要内容为文件系统增添列出目录功能,由于列出文件涉及到磁盘上存储的数据,所以也会涉及一点磁盘读取相关的内容。

⚙️ 以下是本篇文章所使用的环境:

  • Ubuntu Server 24.04
  • Linux kernel 6.8.0-51-generic
  • VSCode + GCC

⚓️对应代码版本: V0.3

在上一篇文章中,我们设计了 simplefs 的数据存储格式并实现了对应的格式化工具,但我们在 fill_super 中还没有实际读取磁盘,也即我们其实还没有实际使用到磁盘上存储数据。现在,是时候让文件系统列出磁盘上的文件了。

# 准备

由于我们的文件系统此时还无法读写文件,所以我们直接用 mkfs.simplefs 在格式化文件系统时向磁盘添加几个文件用于后面测试。

struct file_block blk;
	blk.busy = 1;
	blk.mode = S_IFREG;
	blk.idx = 1;
	blk.parent_idx = 0;
	blk.data[0] = 'a';
	strcpy(blk.filename, "file1"); // 普通文件 file1,位于根目录下
	ret = write(fd, (char *)&blk, sizeof(struct file_block));
	blk.busy = 1;
	blk.mode = S_IFREG;
	blk.idx = 2;
	blk.parent_idx = 0;
	blk.data[0] = 'b';
	strcpy(blk.filename, "file2"); // 普通文件 file2,位于根目录下
	ret = write(fd, (char *)&blk, sizeof(struct file_block));
	blk.busy = 1;
	blk.mode = S_IFDIR;
	blk.idx = 3;
	blk.parent_idx = 0;
	strcpy(blk.filename, "dir1"); // 目录 dir1,位于根目录下
	ret = write(fd, (char *)&blk, sizeof(struct file_block));
	blk.busy = 1;
	blk.mode = S_IFREG;
	blk.idx = 4;
	blk.parent_idx = 3;
	blk.data[0] = 'c';
	strcpy(blk.filename, "file3"); // 普通文件 file3,位于 dir1 目录下
  ret = write(fd, (char *)&blk, sizeof(struct file_block));

# 自定义 inode 数据

由于我们在创建文件系统时写入了第一个文件,也就是根目录(用于替代 superblock),所以我们在 fill_super 时要将该数据读取出来,用于填充 根inode

int simplefs_fill_super(struct super_block *sb, void *data, int silent)
{
	struct inode *root_inode;
	struct buffer_head *bh;
	bh = (struct buffer_head *)sb_bread(sb, 0);
	/* A magic number that uniquely identifies our filesystem type */
	sb->s_magic = 0x20250130;
	root_inode = simplefs_get_inode(sb, NULL, S_IFDIR, 0);
	root_inode->i_op = &simplefs_inode_ops;
	root_inode->i_fop = &simplefs_dir_operations;
	root_inode->i_private = (struct file_block *)bh->b_data;
	sb->s_root = d_make_root(root_inode);
	if (!sb->s_root)
		return -ENOMEM;
	return 0;
}
  • sb_bread() 是 Linux 内核中的一个文件系统接口函数,用于从块设备读取一个块(block)到内存中。它是基于超级块(super_block)的 bread() 函数,原型为

    struct buffer_head *sb_bread(struct super_block *sb, sector_t block);
    • struct super_block *sb :指向文件系统的超级块,提供块设备的上下文。

    • sector_t block :要读取的块号(相对于文件系统的起始位置)。

    • 成功:返回指向 buffer_head 结构的指针,包含读取的数据块。

    • 失败:返回 NULL ,表示读取失败(如设备错误)。

  • buffer_head 是内核中的一个结构,用于管理缓冲区缓存(buffer cache)。它提供了对块设备的高效访问,这里给出一些关键字段

    struct buffer_head {
        char *b_data;                    // 指向数据块的内存地址
        struct block_device *b_bdev;     // 块设备结构
        sector_t b_blocknr;              // 逻辑块号
        size_t b_size;                   // 数据块的大小
        unsigned long b_state;           // 状态标志(如已修改、锁定等)
        struct buffer_head *b_this_page; // 链接到同一页面的其他块
        struct page *b_page;             // 指向内存页
    };
  • simplefs_inode_ops 指定了 inode 支持的方法,为了实现列出目录的操作,至少需要实现 .lookup 方法,否则会无法进入( cd )目录,会报错: cd: mnt/: Not a directory

  • simplefs_dir_operations 指定了 file 支持的方法,为了实现列出目录的操作,至少需要实现 .iterate_shared 方法。

  • root_inode->i_private = (struct file_block *)bh->b_data;i_private 字段存储了 file_block 的数据,即用 i_private 来存储我们自己定义的一些数据。

# 列出目录功能

在 VFS 中,目录文件也有 struct file 结构。在用户态,调用 open("/some/directory") 时,它会返回一个文件描述符( fd ),这表示进程成功打开了目录。这时,VFS 会创建 struct file 结构来跟踪这个打开的目录,遍历目录内容也需要依赖 file 结构。

当我们执行 ls 命令时,VFS 会先去调用 file_operations 中的 .iterate_shared 方法,该方法替代了原本的 readdir 方法,用于在较新的 Linux 内核中遍历文件目录。该方法的函数原型为 int (*iterate_shared) (struct file *, struct dir_context *); ,该方法会将 ls 后的目录对应的 file 指针作为第一个参数传递进函数,而第二个参数 struct dir_context 类型的声明如下:

struct dir_context {
  filldir_t actor;  // 回调函数,用于填充目录项
  loff_t pos;       // 当前位置(偏移量),表示目录遍历的进度
};

其中, filldir_t 是一个函数指针类型:

typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, unsigned);

该函数用于将目录项信息传递给用户空间(如 ls 命令)。而 pos 是目录遍历的当前位置,即当前 iterate_shared() 读取到的 dentry 偏移量。每次处理完一个 dentry, pos 都需要更新,确保 lsgetdents() 等系统调用可以继续正确读取下一个目录项。

简单梳理下实现列出目录功能的思路:

  1. 根据 file 指针找打对应 inode 并获取 file_block 数据
  2. 根据 inode 对应的 superblock 块用于读取磁盘数据
  3. 遍历磁盘数据,找到所有 busy = 1parent_idx 与当前 file_blockidx 相等的文件
  4. 向用户态传递目录项信息

代码如下:

static int simplefs_iterate(struct file *filep, struct dir_context *ctx)
{
	struct inode *inode = filep->f_path.dentry->d_inode;
	struct super_block *sb = inode->i_sb;
	struct buffer_head *bh;
	struct file_block *blk = inode->i_private;
	int i;
	if (unlikely(!S_ISDIR(inode->i_mode))) {
    printk(KERN_ERR "inode %lu not a directory", inode->i_ino);
    return -ENOTDIR;
  }
	
	if (ctx->pos) return 0;
	bh = (struct buffer_head *)sb_bread(sb, 0);
  if (!bh) {
        printk(KERN_ERR "Failed to read block 0\n");
        return -EIO;
    }
  struct file_block *block = (struct file_block *)bh->b_data;
	for (i = 1; i < SIMPLEFS_MAX_FILES; i++) {
		if (block[i].busy && block[i].parent_idx == blk->idx) {
			if (!dir_emit(ctx, block[i].filename, strlen(block[i].filename), get_next_ino(), DT_UNKNOWN)) {
            brelse(bh);
            return -ENOMEM;
          }
			ctx->pos += sizeof(struct file_block); // 更新 pos,指向下一个 dentry
		}
	}
  brelse(bh);
	return 0;
}

除了遍历所有 file_block 的逻辑外,有三点需要额外说明的:

  1. 遍历应从第 2 个 file_block 开始,因为第一个是根目录,所以 for 循环的初始条件为 i = 1

  2. 在使用完 sb_bread 后应记得使用 brelse 来释放获取的内存,防止内存泄露。

  3. dir_emitfilldir_t 的封装,作用是向用户空间传递目录项,是 iterate_shared() 处理目录遍历时的标准方式,其原型如下:

    bool dir_emit(struct dir_context *ctx, const char *name, int namelen, u64 ino, unsigned type);
    • 参数
      • ctxdir_context 结构体,存储当前遍历状态
      • name → 目录项名称
      • namelen → 目录项名称长度
      • ino → inode 号(必须是合法的 ino ,不能是 0 或无效值)
      • type → 目录项类型,如 DT_REGDT_DIR
    • 返回值
      • true → 成功添加目录项
      • false → 失败(通常是用户空间缓冲区满)

实现了 simplefs_iterate 后别忘了 inode 注册:

const struct file_operations simplefs_dir_operations = {
	.owner = THIS_MODULE,
	.iterate_shared = simplefs_iterate,
};
static struct inode_operations simplefs_inode_ops = {
	.lookup = simplefs_lookup,
};

# 测试

❯ ./mkfs.simplefs simplefs.img
Super block written successfully
sudo insmod simplefs.ko
sudo mount -o loop -t simplefs simplefs.img /mnt
cd /mnt && ls
ls: cannot access 'file1': No such file or directory
ls: cannot access 'file2': No such file or directory
ls: cannot access 'dir1': No such file or directory
dir1  file1  file2

这里出现 cannot access 'file1': No such file or directory 是正常的,是因为 ls 在调用 iterate_shared 后还会再去调用 lookup 方法获取指定文件的信息,而我们的 lookup 目前只返回了 NULL ,所以会出现如上报错,但还是成功列出了所有文件。

报错:mount: /home/hzy/mnt: /dev/loopX already mounted or mount point busy.

执行 sudo rm /dev/loopX 即可,执行 losetup -D 无法删掉虚拟回环设备😠

下一章我们将继续丰满 iterate_shared 的功能并实现 lookup 方法。

# 代码

整体代码如下

/*
 * A simple Filesystem for Linux Kernel 6.8.0.
 *
 * Initial Author:  Gality <gality369@gmail.com>
 * License: GNU General Public License v3 - https://www.gnu.org/licenses/gpl-3.0.html
 * Date: 2025-01-30
 */
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include "simple.h"
struct dentry *simplefs_lookup(struct inode *parent_inode,
			       struct dentry *child_dentry, unsigned int flags);
/**
 * @filep: The file structure that represents the file being iterated.
 * @ctx: The directory context structure that is used for directory iteration.
 * @return: 0 on success, and error code on failure.
 * 
 * This function is called when the VFS needs to iterate over the files in a directory.
 * It reads the directory's file blocks and fills the directory context with the file names.
 */
static int simplefs_iterate(struct file *filep, struct dir_context *ctx)
{
	struct inode *inode = filep->f_path.dentry->d_inode;
	struct super_block *sb = inode->i_sb;
	struct buffer_head *bh;
	struct file_block *blk = inode->i_private;
	int i;
	if (unlikely(!S_ISDIR(inode->i_mode))) {
    printk(KERN_ERR "inode %lu not a directory", inode->i_ino);
    return -ENOTDIR;
  }
	
	if (ctx->pos) return 0;
	bh = (struct buffer_head *)sb_bread(sb, 0);
  if (!bh) {
        printk(KERN_ERR "Failed to read block 0\n");
        return -EIO;
    }
  struct file_block *block = (struct file_block *)bh->b_data;
	for (i = 1; i < SIMPLEFS_MAX_FILES; i++) {
		if (block[i].busy && block[i].parent_idx == blk->idx) {
			if (!dir_emit(ctx, block[i].filename, strlen(block[i].filename), get_next_ino(), DT_UNKNOWN)) {
            brelse(bh);
            return -ENOMEM;
          }
			ctx->pos += sizeof(struct file_block);
		}
	}
  brelse(bh);
	return 0;
}
const struct file_operations simplefs_dir_operations = {
	.owner = THIS_MODULE,
	.iterate_shared = simplefs_iterate,
};
static struct inode_operations simplefs_inode_ops = {
	.lookup = simplefs_lookup,
};
struct dentry *simplefs_lookup(struct inode *parent_inode,
			       struct dentry *child_dentry, unsigned int flags)
{
	/* The lookup function is used for dentry association.
	 * As of now, we don't deal with dentries in simplefs.
	 * So we will keep this simple for now and revisit later */
  return NULL;
}
struct inode *simplefs_get_inode(struct super_block *sb,
				 const struct inode *dir, umode_t mode,
				 dev_t dev);
int simplefs_fill_super(struct super_block *sb, void *data, int silent);
/**
 * @sb: The super block of the filesystem (inodes, blocks, etc).
 * @dir: The parent directory's inode.
 * @mode: The mode of the inode to be created (S_IFDIR, S_IFREG, etc).
 * @dev: The device id of the inode to be created.
 * @return: The inode created.
 * 
 * This function creates an inode for the filesystem, initializes it 
 * and returns it. For now, it only creates inodes for root directories.
 */
struct inode *simplefs_get_inode(struct super_block *sb,
				 const struct inode *dir, umode_t mode,
				 dev_t dev)
{
	struct inode *inode = new_inode(sb);
	if (inode) {
		inode->i_ino = get_next_ino();
		inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
		inode->__i_atime = inode->__i_mtime = inode->__i_ctime = inode_set_ctime_current(inode);
		switch (mode & S_IFMT) {
		case S_IFDIR:
			/* i_nlink will be initialized to 1 in the inode_init_always function
			 * (that gets called inside the new_inode function),
			 * We change it to 2 for directories, for covering the "." entry */
			inc_nlink(inode);
			break;
		case S_IFREG:
		case S_IFLNK:
		default:
			printk(KERN_ERR
			       "simplefs can create meaningful inode for only root directory at the moment\n");
			return NULL;
			break;
		}
	}
	return inode;
}
/**
 * @sb: The superblock which is passed from the VFS to the filesystem.
 * @data: The mount arguments data that might be passed to the filesystem while mounting.
 * @silent: A flag to indicate whether the filesystem should print logs or not.
 * @return: 0 on success, and error code on failure.
 * 
 * This function fills the super block of the filesystem with necessary information.
 */
int simplefs_fill_super(struct super_block *sb, void *data, int silent)
{
	struct inode *root_inode;
	struct buffer_head *bh;
	bh = (struct buffer_head *)sb_bread(sb, 0);
	/* A magic number that uniquely identifies our filesystem type */
	sb->s_magic = 0x20250130;
	root_inode = simplefs_get_inode(sb, NULL, S_IFDIR, 0);
	root_inode->i_op = &simplefs_inode_ops;
	root_inode->i_fop = &simplefs_dir_operations;
	root_inode->i_private = (struct file_block *)bh->b_data;
	sb->s_root = d_make_root(root_inode);
	if (!sb->s_root)
		return -ENOMEM;
	return 0;
}
/**
 * @fs_type: The filesystem type structure that is registered with the kernel.
 * @flags: Mount flags (e.g. MS_RDONLY for read-only mounts).
 * @dev_name: The name of the device to be mounted (/dev/sda1, /dev/sdb1, etc).
 * @data: Extra data that might be passed to the filesystem while mounting.
 * @return: The root dentry of the filesystem that is mounted.
 * 
 * This function is called when the VFS is asked to mount this filesystem 
 * and returns the root dentry of the filesystem.
 */
static struct dentry *simplefs_mount(struct file_system_type *fs_type,
				     int flags, const char *dev_name,
				     void *data)
{
	struct dentry *ret;
	ret = mount_bdev(fs_type, flags, dev_name, data, simplefs_fill_super);
	if (unlikely(IS_ERR(ret)))
		printk(KERN_ERR "Error mounting simplefs");
	else
		printk(KERN_INFO "simplefs is successfully mounted on [%s]\n",
		       dev_name);
	return ret;
}
static void simplefs_kill_superblock(struct super_block *s)
{
	printk(KERN_INFO
	       "simplefs superblock is destroyed. Unmount successful.\n");
	/* This is just a dummy function as of now. As our filesystem gets matured,
	 * we will do more meaningful operations here */
	return;
}
struct file_system_type simplefs_fs_type = {
	.owner = THIS_MODULE,
	.name = "simplefs",
	.mount = simplefs_mount,
	.kill_sb = simplefs_kill_superblock,
};
static int simplefs_init(void)
{
	int ret;
	ret = register_filesystem(&simplefs_fs_type);
	if (likely(ret == 0))
		printk(KERN_INFO "Successfully registered simplefs\n");
	else
		printk(KERN_ERR "Failed to register simplefs. Error:[%d]", ret);
	return ret;
}
static void simplefs_exit(void)
{
	int ret;
	ret = unregister_filesystem(&simplefs_fs_type);
	if (likely(ret == 0))
		printk(KERN_INFO "Successfully unregistered simplefs\n");
	else
		printk(KERN_ERR "Failed to unregister simplefs. Error:[%d]",
		       ret);
}
module_init(simplefs_init);
module_exit(simplefs_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gality");

更新于 阅读次数

请我喝[茶]~( ̄▽ ̄)~*

Gality 微信支付

微信支付

Gality 支付宝

支付宝