04 ext2文件系统格式化

要使用ext2文件系统,要先在设备上创建文件系统,也就是对设备进行格式化。常见的格式化工具有e2fsprog的mkefs,还有busybox的mkfs.ext2。接下来就以busybox的工具为例,介绍具体的格式化流程。

ext2格式化
ext2格式化

一、打开设备

打开块设备,设备路径在argv[0]中,用的是系统调用open()函数,紧接着调用xmove_fd()函数。xmove_fd()函数就是调用dup2(),实现文件描述符复制,新的文件描述符fd,也指向相同块设备,后面写入数据时,都使用新的描述符,这样操作更方便。

接着调用stat(),获取块设备信息,确认是否块设备。

// mkfs_ext2.c 

// open the device, check the device is a block device
xmove_fd(xopen(argv[0], O_WRONLY), fd);
xfstat(fd, &st, argv[0]);
if (!S_ISBLK(st.st_mode) && !(option_mask32 & OPT_F))
    bb_error_msg_and_die("%s: not a block device", argv[0]);

// "Renumber" opened fd
void FAST_FUNC xmove_fd(int from, int to)
{
	if (from == to)
		return;
	xdup2(from, to);
	close(from);
}

二、初始化块相关信息

以下代码是初始化块信息,标红色部分是重点代码,因为字段较多,这里只举例说明,不做代码介绍:

  • kilobytes:块设备大小(KB)
  • blocksize:单块大小(默认1024字节,也就是1KB)
  • inodesize:单个inode大小(普通inode是128字节,sizeof(struct ext2_inode))
  • nblocks:块设备总块数
  • bytes_per_inode:单个inode占用的字节数,用于控制inode空间占比。如果块设备小于3MB,inode按照8192字节计算;如果块设备小于512MB,inode按照4096字节计算,除此之外使用16384字节,也就是16KB
  • first_block:第一块是否引导区,是的话first_block=1,否则为0
  • blocks_per_group:每个块组组包含总块数(默认是8192个块)
  • ngroups:容纳的总块组数(不含引导块)
  • gdsize:块组描述符大小,sizeof(struct ext2_group_desc),24个字节
  • group_desc_blocks:所有的块组描述符,占用的总块数
  • ninodes:容纳的inode表总数
  • inodes_per_group:每组容纳的inode总数
  • inode_table_blocks:单个inode表占用的块数
  • overhead:单个块组数据块之前元数据块数
  • remainder:按块组计算,剩余不够一个块组的块数量
// mkfs_ext2.c mkfs_ext2_main()
// get size in kbytes
kilobytes = get_volume_size_in_bytes(fd, argv[1], 1024, /*extend:*/ !(option_mask32 & OPT_n)) / 1024;

bytes_per_inode = 16384;
if (kilobytes < 512*1024)
	bytes_per_inode = 4096;
if (kilobytes < 3*1024)
	bytes_per_inode = 8192;
if (option_mask32 & OPT_i)
	bytes_per_inode = bpi;

// Determine block size and inode size
// block size is a multiple of 1024
// inode size is a multiple of 128
blocksize = 1024;
inodesize = sizeof(struct ext2_inode); // 128
if (kilobytes >= 512*1024) { // mke2fs 1.41.9 compat
	blocksize = 4096;
	inodesize = 256;
}
.....
if (option_mask32 & OPT_b)
	blocksize = bs;
if (blocksize < EXT2_MIN_BLOCK_SIZE
 || blocksize > EXT2_MAX_BLOCK_SIZE
 || (blocksize & (blocksize - 1)) // not power of 2
) {
	bb_error_msg_and_die("blocksize %u is bad", blocksize);
}
.....
if ((int32_t)bytes_per_inode < blocksize)
	bb_error_msg_and_die("-%c is bad", 'i');
// number of bits in one block, i.e. 8*blocksize
#define blocks_per_group (8 * blocksize)
first_block = (EXT2_MIN_BLOCK_SIZE == blocksize);
blocksize_log2 = int_log2(blocksize);

// Determine number of blocks
kilobytes >>= (blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
nblocks = kilobytes;
if (nblocks != kilobytes)
	bb_simple_error_msg_and_die("block count doesn't fit in 32 bits");
#define kilobytes kilobytes_unused_after_this
// Experimentally, standard mke2fs won't work on images smaller than 60k
if (nblocks < 60)
	bb_simple_error_msg_and_die("need >= 60 blocks");

......

// N.B. killing e2fsprogs feature! Unused blocks don't account in calculations
nblocks_full = nblocks;

// If last block group is too small, nblocks may be decreased in order
// to discard it, and control returns here to recalculate some
// parameters.
// Note: blocksize and bytes_per_inode are never recalculated.
retry:
// N.B. a block group can have no more than blocks_per_group blocks
ngroups = div_roundup(nblocks - first_block, blocks_per_group);

group_desc_blocks = div_roundup(ngroups, blocksize / sizeof(*gd));

......
{
	// N.B. e2fsprogs does as follows!
	uint32_t overhead, remainder;
	// ninodes is the max number of inodes in this filesystem
	uint32_t ninodes = ((uint64_t) nblocks_full * blocksize) / bytes_per_inode;
	if (ninodes < EXT2_GOOD_OLD_FIRST_INO+1)
		ninodes = EXT2_GOOD_OLD_FIRST_INO+1;
	inodes_per_group = div_roundup(ninodes, ngroups);
	// minimum number because the first EXT2_GOOD_OLD_FIRST_INO-1 are reserved
	if (inodes_per_group < 16)
		inodes_per_group = 16;
	// a block group can't have more inodes than blocks
	if (inodes_per_group > blocks_per_group)
		inodes_per_group = blocks_per_group;
	// adjust inodes per group so they completely fill the inode table blocks in the descriptor
	inodes_per_group = (div_roundup(inodes_per_group * inodesize, blocksize) * blocksize) / inodesize;
	// make sure the number of inodes per group is a multiple of 8
	inodes_per_group &= ~7;
	inode_table_blocks = div_roundup(inodes_per_group * inodesize, blocksize);

	......

	// the last group needs more attention: isn't it too small for possible overhead?
	overhead = (has_super(ngroups - 1) ? (1/*sb*/ + group_desc_blocks) : 0) + 1/*bbmp*/ + 1/*ibmp*/ + inode_table_blocks;
	remainder = (nblocks - first_block) % blocks_per_group;

        ......
}

示例1:20MB块设备,上述变量值情况

  • 块数:20480
  • 块组数:3
  • 块组描述符占用块数:1
  • 每组inode数:1707
  • 每组inode表占用 块数:214
kilobytes = 20 * 1024KB = 20480KB
blocksize = 1024
inodesize = 128
nblocks   = kilobytes * 1024 / blocksize= 20480

bytes_per_inode  = 4096
first_block      = 1
blocks_per_group = 8 * 1024 = 8192
ngroups          = roundup((nblocks - first_block) / blocks_per_group) = 3 // 2.5向上取整

gdsize             = sizeof(struct ext2_group_desc) = 24
group_desc_blocks  = roundup(ngroups / (blocksize / gdsize)) = 1

ninodes            = nblocks * blocksize / bytes_per_inode = 20480 * 1024 / 4096 = 5120
inodes_per_group   = roundup(ninodes / ngroups) = roundup(5120 / 3) = 1707
inode_table_blocks = roundup(inodes_per_group * inodesize / blocksize) = roundup(1707 * 128 / 1024) = 214

overhead  = sb_blocks(1) + group_desc_blocks(1) + bbmp(1) + ibmp(1) + inode_table_blocks(214) = 218
remainder = (nblocks - first_block) % blocks_per_group = 4105

示例2:61KB块设备(不能低于60KB)

  • 块数:61
  • 块组数:1
  • 块组描述符占用块数:1
  • 每组inode数:16
  • 每组inode表占用 块数:2
kilobytes = 61KB
blocksize = 1024
inodesize = 128
nblocks   = kilobytes * 1024 / blocksize= 61

bytes_per_inode  = 8192
first_block      = 1
blocks_per_group = 8 * 1024 = 8192
ngroups          = roundup((nblocks - first_block) / blocks_per_group) = 1 // 向上取整

gdsize             = sizeof(struct ext2_group_desc) = 24
group_desc_blocks  = roundup(ngroups / (blocksize / gdsize)) = 1

ninodes            = nblocks * blocksize / bytes_per_inode = 61 * 1024 / 8192 = 7
inodes_per_group   = roundup(ninodes / ngroups) = roundup(7 / 1) = 7 // 不足16个,向上取16
inodes_per_group   = 16
inode_table_blocks = roundup(inodes_per_group * inodesize / blocksize) = roundup(16 * 128 / 1024) = 2

overhead  = sb_blocks(1) + group_desc_blocks(1) + bbmp(1) + ibmp(1) + inode_table_blocks(2) = 6
remainder = (nblocks - first_block) % blocks_per_group = 60

三、设备格式化

1.构造超级块

分配1KB的块,然后填充整个超级块。

// fill the superblock
sb = xzalloc(1024);
STORE_LE(sb->s_rev_level, EXT2_DYNAMIC_REV); // revision 1 filesystem
STORE_LE(sb->s_magic, EXT2_SUPER_MAGIC);
STORE_LE(sb->s_inode_size, inodesize);
// set "Required extra isize" and "Desired extra isize" fields to 28
if (inodesize != sizeof(*inode)) {
	STORE_LE(sb->s_min_extra_isize, 0x001c);
	STORE_LE(sb->s_want_extra_isize, 0x001c);
}
STORE_LE(sb->s_first_ino, EXT2_GOOD_OLD_FIRST_INO);
STORE_LE(sb->s_log_block_size, blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
STORE_LE(sb->s_log_frag_size, blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
// first 1024 bytes of the device are for boot record. If block size is 1024 bytes, then
// the first block is 1, otherwise 0
STORE_LE(sb->s_first_data_block, first_block);
// block and inode bitmaps occupy no more than one block, so maximum number of blocks is
STORE_LE(sb->s_blocks_per_group, blocks_per_group);
STORE_LE(sb->s_frags_per_group, blocks_per_group);
// blocks
STORE_LE(sb->s_blocks_count, nblocks);
// reserve blocks for superuser
STORE_LE(sb->s_r_blocks_count, nreserved);
.....

2.循环写入block bitmap和inode bitmap

大循环写入block bitmap和inode bitmap,overhead是计算块偏移,allocate()用于设置标志位,PUT()函数是把缓冲区写入到块设备。

// calculate filesystem skeleton structures
gd = xzalloc(group_desc_blocks * blocksize);
buf = xmalloc(blocksize);
sb->s_free_blocks_count = 0;
for (i = 0, pos = first_block, n = nblocks - first_block;
	i < ngroups;
	i++, pos += blocks_per_group, n -= blocks_per_group
) {
	uint32_t overhead = pos + (has_super(i) ? (1/*sb*/ + group_desc_blocks) : 0);
	uint32_t free_blocks;
	// fill group descriptors
	STORE_LE(gd[i].bg_block_bitmap, overhead + 0);
	STORE_LE(gd[i].bg_inode_bitmap, overhead + 1);
	STORE_LE(gd[i].bg_inode_table, overhead + 2);
	overhead = overhead - pos + 1/*bbmp*/ + 1/*ibmp*/ + inode_table_blocks;
	gd[i].bg_free_inodes_count = inodes_per_group;
	......

	// cache free block count of the group
	free_blocks = (n < blocks_per_group ? n : blocks_per_group) - overhead;

	// mark preallocated blocks as allocated
//bb_error_msg("ALLOC: [%u][%u][%u]", blocksize, overhead, blocks_per_group - (free_blocks + overhead));
	allocate(buf, blocksize,
		// reserve "overhead" blocks
		overhead,
		// mark unused trailing blocks
		blocks_per_group - (free_blocks + overhead)
	);
	// dump block bitmap
	PUT((uint64_t)(FETCH_LE32(gd[i].bg_block_bitmap)) * blocksize, buf, blocksize);
	STORE_LE(gd[i].bg_free_blocks_count, free_blocks);

	// mark preallocated inodes as allocated
	allocate(buf, blocksize,
		// mark reserved inodes
		inodes_per_group - gd[i].bg_free_inodes_count,
		// mark unused trailing inodes
		blocks_per_group - inodes_per_group
	);
	// dump inode bitmap
	//PUT((uint64_t)(FETCH_LE32(gd[i].bg_block_bitmap)) * blocksize, buf, blocksize);
	//but it's right after block bitmap, so we can just:
	xwrite(fd, buf, blocksize);
	STORE_LE(gd[i].bg_free_inodes_count, gd[i].bg_free_inodes_count);

	// count overall free blocks
	sb->s_free_blocks_count += free_blocks;
}

3.写入超级块和块组描述符

给第0块,以及3,5,7次幂位置的块,按顺序写入超级块和块组文件描述符。

// dump filesystem skeleton structures

for (i = 0, pos = first_block; i < ngroups; i++, pos += blocks_per_group) {
	// dump superblock and group descriptors and their backups
	if (has_super(i)) {
		// N.B. 1024 byte blocks are special
		PUT(((uint64_t)pos * blocksize) + ((0 == i && 1024 != blocksize) ? 1024 : 0),
				sb, 1024);
		PUT(((uint64_t)pos * blocksize) + blocksize,
				gd, group_desc_blocks * blocksize);
	}
}

4.写入inode table

// zero inode tables
for (i = 0; i < ngroups; ++i)
	for (n = 0; n < inode_table_blocks; ++n)
		PUT((uint64_t)(FETCH_LE32(gd[i].bg_inode_table) + n) * blocksize,
			buf, blocksize);

5.写入根目录inode

根目录的inode编号为2,写入到第一个inode table第二个inode位置。

#define EXT2_ROOT_INO		 2	/* Root inode */

// prepare directory inode
inode = (struct ext2_inode *)buf;
STORE_LE(inode->i_mode, S_IFDIR | S_IRWXU | S_IRGRP | S_IROTH | S_IXGRP | S_IXOTH);
STORE_LE(inode->i_mtime, timestamp);
STORE_LE(inode->i_atime, timestamp);
STORE_LE(inode->i_ctime, timestamp);
STORE_LE(inode->i_size, blocksize);
// inode->i_blocks stores the number of 512 byte data blocks
// (512, because it goes directly to struct stat without scaling)
STORE_LE(inode->i_blocks, blocksize / 512);

// dump root dir inode
STORE_LE(inode->i_links_count, 3); // "/.", "/..", "/lost+found/.." point to this inode
STORE_LE(inode->i_block[0], FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks);
PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_ROOT_INO-1) * inodesize,
			buf, inodesize);

6.写入lost+found目录的inode和数据块

// dump lost+found dir inode
STORE_LE(inode->i_links_count, 2); // both "/lost+found" and "/lost+found/." point to this inode
STORE_LE(inode->i_size, lost_and_found_blocks * blocksize);
STORE_LE(inode->i_blocks, (lost_and_found_blocks * blocksize) / 512);
n = FETCH_LE32(inode->i_block[0]) + 1;
for (i = 0; i < lost_and_found_blocks; ++i)
	STORE_LE(inode->i_block[i], i + n); // use next block
//bb_error_msg("LAST BLOCK USED[%u]", i + n);
PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_GOOD_OLD_FIRST_INO-1) * inodesize,
			buf, inodesize);

// dump directories
memset(buf, 0, blocksize);
dir = (struct ext2_dir *)buf;

// dump 2nd+ blocks of "/lost+found"
STORE_LE(dir->rec_len1, blocksize); // e2fsck 1.41.4 compat (1.41.9 does not need this)
for (i = 1; i < lost_and_found_blocks; ++i)
	PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 1+i) * blocksize,
			buf, blocksize);

// dump 1st block of "/lost+found"
STORE_LE(dir->inode1, EXT2_GOOD_OLD_FIRST_INO);
STORE_LE(dir->rec_len1, 12);
STORE_LE(dir->name_len1, 1);
STORE_LE(dir->file_type1, EXT2_FT_DIR);
dir->name1[0] = '.';
STORE_LE(dir->inode2, EXT2_ROOT_INO);
STORE_LE(dir->rec_len2, blocksize - 12);
STORE_LE(dir->name_len2, 2);
STORE_LE(dir->file_type2, EXT2_FT_DIR);
dir->name2[0] = '.'; dir->name2[1] = '.';
PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 1) * blocksize, buf, blocksize);

7.写入根目录数据块

// dump root dir block
STORE_LE(dir->inode1, EXT2_ROOT_INO);
STORE_LE(dir->rec_len2, 12);
STORE_LE(dir->inode3, EXT2_GOOD_OLD_FIRST_INO);
STORE_LE(dir->rec_len3, blocksize - 12 - 12);
STORE_LE(dir->name_len3, 10);
STORE_LE(dir->file_type3, EXT2_FT_DIR);
strcpy(dir->name3, "lost+found");
PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 0) * blocksize, buf, blocksize);

上一篇:ext2文件系统物理结构剖析

参考资料:busybox代码仓

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注