012 Linux一次文件写过程wirte()

上一篇介绍了文件打开open()操作,在open()操作中,已经获取到了文件描述符fd,fd本质就是file对象的下标,通过fd可以直接得到缓存中的file对象,通过file对象就可以进行文件的写操作write()。

写文件过程
写文件过程

一、示例代码

以下代码功能就是在当前目录下,打开一个文件fout.txt,然后在文件中写入一个学生的信息,调用的写入函数原型如下:

size_t write(unsigned int fd, const char *str, size_t count)
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>

struct student
{
    char name[33];
    int age;
    double score;
} stu = {"King Garry", 24, 93.5};

int main()
{
    int fd;
    char fname[] = "/fout.txt";
    char cwd[129];
    char stus[64] = { 0 };

    if(getcwd(cwd, sizeof(cwd)) != NULL) {
        printf("Curr dir: %s\n", cwd);
    } else {
        perror("Err info: ");
    }

    strcat(cwd, fname);
    fd = open(cwd, O_CREAT | O_WRONLY, 0755);
    if (fd == -1) {
        printf("Open file [%s] fail!\n", cwd);
        perror("Err info: ");
    }

    sprintf(stus, "%s-%d-%.2f", stu.name, stu.age, stu.score);
    write(fd, stus, strlen(stus));
    close(fd);

    printf("Finish\n");
    return 0;
}

二、系统调用入口

write()函数属于系统调用,入口在read_write.c中,基本代码逻辑如下:

  • 根据整型fd文件描述符获取struct fd对象
  • 调用vfs_write()完成文件写入
  • 返回写入结果码
// read_write.c

SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
		size_t, count)
{
	return ksys_write(fd, buf, count);
}

ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
{
	struct fd f = fdget_pos(fd);
	ssize_t ret = -EBADF;

	if (f.file) {
		loff_t pos, *ppos = file_ppos(f.file);
		if (ppos) {
			pos = *ppos;
			ppos = &pos;
		}
		ret = vfs_write(f.file, buf, count, ppos);
		if (ret >= 0 && ppos)
			f.file->f_pos = pos;
		fdput_pos(f);
	}

	return ret;
}

三、int fd转成struct fd

将int fd转化为struct fd,其实就是查询fd下标对应的file*对象,然后再将file*对象转化成fd*对象,具体步骤如下:

  • 使用current宏,获取当前进程打开的文件对象列表files_struct,也就是current->files(current在X86和ARM架构实现方式不一致)
  • 然后fdt表中,查找fd下标的file对象,然后返回
  • 上层将file对象转化成struct fd
// file.c

static unsigned long __fget_light(unsigned int fd, fmode_t mask)
{
        /** 获取当前进程的所有打开文件 */
	struct files_struct *files = current->files;
	struct file *file;

	if (atomic_read(&files->count) == 1) {
		file = files_lookup_fd_raw(files, fd);
		if (!file || unlikely(file->f_mode & mask))
			return 0;
		return (unsigned long)file;
	} else {
		file = __fget(fd, mask, 1);
		if (!file)
			return 0;
		return FDPUT_FPUT | (unsigned long)file;
	}
}

/*
 * The caller must ensure that fd table isn't shared or hold rcu or file lock
 */
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
{
	struct fdtable *fdt = rcu_dereference_raw(files->fdt);

	if (fd < fdt->max_fds) {
		fd = array_index_nospec(fd, fdt->max_fds);
                // 将fd作为下标,获取文件对象
		return rcu_dereference_raw(fdt->fd[fd]);
	}
	return NULL;
}

四、执行写入操作

前面第三步已经获取了文件对象file*,在写文件时,直接调用file操作表中的操作,最终完成内容的写入,主要步骤如下:

  • 校验f_mode
  • 校验pos和count
  • 调用file_start_write()函数,此函数最终会down超级块的读写信号量:sb->s_writers.rw_sem
  • 如果f_op->write存在,则调用此函数进行写入,否则,如果file->f_op->write_iter存在,则调用new_sync_write()函数进行写入
  • 调用fsnotify_modify()通知父目录文件变化
  • 调用add_wchar()将写入的数据量,更新到进程的struct task_io_accounting的wchar字段
  • 调用inc_syscw()将写入系统调用次数,更新到进程的struct task_io_accounting的syscw字段
  • 调用file_end_write()函数,此函数最终会up超级块的读写信号量:sb->s_writers.rw_sem,与file_start_write()函数对应
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
{
	ssize_t ret;

	if (!(file->f_mode & FMODE_WRITE))
		return -EBADF;
	if (!(file->f_mode & FMODE_CAN_WRITE))
		return -EINVAL;
	if (unlikely(!access_ok(buf, count)))
		return -EFAULT;

	ret = rw_verify_area(WRITE, file, pos, count);
	if (ret)
		return ret;
	if (count > MAX_RW_COUNT)
		count =  MAX_RW_COUNT;
	file_start_write(file);
	if (file->f_op->write)
		ret = file->f_op->write(file, buf, count, pos);
	else if (file->f_op->write_iter)
		ret = new_sync_write(file, buf, count, pos);
	else
		ret = -EINVAL;
	if (ret > 0) {
		fsnotify_modify(file);
		add_wchar(current, ret);
	}
	inc_syscw(current);
	file_end_write(file);
	return ret;
}

static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
{
	struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
	struct kiocb kiocb;
	struct iov_iter iter;
	ssize_t ret;

	init_sync_kiocb(&kiocb, filp);
	kiocb.ki_pos = (ppos ? *ppos : 0);
	iov_iter_init(&iter, WRITE, &iov, 1, len);

	ret = call_write_iter(filp, &kiocb, &iter);
	BUG_ON(ret == -EIOCBQUEUED);
	if (ret > 0 && ppos)
		*ppos = kiocb.ki_pos;
	return ret;
}

static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio,
				      struct iov_iter *iter)
{
	return file->f_op->write_iter(kio, iter);
}

注1:new_sync_write()迭代写

这个函数就是实现迭代IO,对于一段大的缓冲区,可以使用迭代写入到磁盘,先看下两个主要数据结构:

1)struct iovec & iov_iter

iovec:给定了要写入的缓冲区和长度

iov_iter:用于描述一次IO写入进度,*iov指向缓冲区,iov_offset表示写入的偏移量,count表示写入的数量,这两个字段会随着写入不断变化

// iovec & iov_iter

struct iovec
{
	void __user *iov_base;	/* BSD uses caddr_t (1003.1g requires void *) */
	__kernel_size_t iov_len; /* Must be size_t (1003.1g) */
};

struct iov_iter {
	u8 iter_type;
	bool nofault;
	bool data_source;
	size_t iov_offset;
	size_t count;
	union {
		const struct iovec *iov;
		const struct kvec *kvec;
		const struct bio_vec *bvec;
		struct xarray *xarray;
		struct pipe_inode_info *pipe;
	};
	union {
		unsigned long nr_segs;
		struct {
			unsigned int head;
			unsigned int start_head;
		};
		loff_t xarray_start;
	};
};

2)struct kiocb

从文件侧来看IO写入情况,ki_filp是file指针,ki_pos读写的偏移量

struct kiocb {
	struct file		*ki_filp;

	/* The 'ki_filp' pointer is shared in a union for aio */
	randomized_struct_fields_start

	loff_t			ki_pos;
	void (*ki_complete)(struct kiocb *iocb, long ret);
	void			*private;
	int			ki_flags;
	u16			ki_hint;
	u16			ki_ioprio; /* See linux/ioprio.h */
	struct wait_page_queue	*ki_waitq; /* for async buffered IO */
	randomized_struct_fields_end
};

注2:call_write_iter()

这个函数最终调用file指针的f_op操作表中的write_iter()完成迭代写。

file->f_op->write_iter(kio, iter)

ext2示例:

static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
#ifdef CONFIG_FS_DAX
	if (IS_DAX(iocb->ki_filp->f_mapping->host))
		return ext2_dax_write_iter(iocb, from);
#endif
	return generic_file_write_iter(iocb, from);
}

上一篇:Linux一次文件打开过程open

参考资料:官方文档

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注