要使用ext2文件系统,要先在设备上创建文件系统,也就是对设备进行格式化。常见的格式化工具有e2fsprog的mkefs,还有busybox的mkfs.ext2。接下来就以busybox的工具为例,介绍具体的格式化流程。
一、打开设备
打开块设备,设备路径在argv[0]中,用的是系统调用open()函数,紧接着调用xmove_fd()函数。xmove_fd()函数就是调用dup2(),实现文件描述符复制,新的文件描述符fd,也指向相同块设备,后面写入数据时,都使用新的描述符,这样操作更方便。
接着调用stat(),获取块设备信息,确认是否块设备。
// mkfs_ext2.c
// open the device, check the device is a block device
xmove_fd(xopen(argv[0], O_WRONLY), fd);
xfstat(fd, &st, argv[0]);
if (!S_ISBLK(st.st_mode) && !(option_mask32 & OPT_F))
bb_error_msg_and_die("%s: not a block device", argv[0]);
// "Renumber" opened fd
void FAST_FUNC xmove_fd(int from, int to)
{
if (from == to)
return;
xdup2(from, to);
close(from);
}
二、初始化块相关信息
以下代码是初始化块信息,标红色部分是重点代码,因为字段较多,这里只举例说明,不做代码介绍:
- kilobytes:块设备大小(KB)
- blocksize:单块大小(默认1024字节,也就是1KB)
- inodesize:单个inode大小(普通inode是128字节,sizeof(struct ext2_inode))
- nblocks:块设备总块数
- bytes_per_inode:单个inode占用的字节数,用于控制inode空间占比。如果块设备小于3MB,inode按照8192字节计算;如果块设备小于512MB,inode按照4096字节计算,除此之外使用16384字节,也就是16KB
- first_block:第一块是否引导区,是的话first_block=1,否则为0
- blocks_per_group:每个块组组包含总块数(默认是8192个块)
- ngroups:容纳的总块组数(不含引导块)
- gdsize:块组描述符大小,sizeof(struct ext2_group_desc),24个字节
- group_desc_blocks:所有的块组描述符,占用的总块数
- ninodes:容纳的inode表总数
- inodes_per_group:每组容纳的inode总数
- inode_table_blocks:单个inode表占用的块数
- overhead:单个块组数据块之前元数据块数
- remainder:按块组计算,剩余不够一个块组的块数量
// mkfs_ext2.c mkfs_ext2_main()
// get size in kbytes
kilobytes = get_volume_size_in_bytes(fd, argv[1], 1024, /*extend:*/ !(option_mask32 & OPT_n)) / 1024;
bytes_per_inode = 16384;
if (kilobytes < 512*1024)
bytes_per_inode = 4096;
if (kilobytes < 3*1024)
bytes_per_inode = 8192;
if (option_mask32 & OPT_i)
bytes_per_inode = bpi;
// Determine block size and inode size
// block size is a multiple of 1024
// inode size is a multiple of 128
blocksize = 1024;
inodesize = sizeof(struct ext2_inode); // 128
if (kilobytes >= 512*1024) { // mke2fs 1.41.9 compat
blocksize = 4096;
inodesize = 256;
}
.....
if (option_mask32 & OPT_b)
blocksize = bs;
if (blocksize < EXT2_MIN_BLOCK_SIZE
|| blocksize > EXT2_MAX_BLOCK_SIZE
|| (blocksize & (blocksize - 1)) // not power of 2
) {
bb_error_msg_and_die("blocksize %u is bad", blocksize);
}
.....
if ((int32_t)bytes_per_inode < blocksize)
bb_error_msg_and_die("-%c is bad", 'i');
// number of bits in one block, i.e. 8*blocksize
#define blocks_per_group (8 * blocksize)
first_block = (EXT2_MIN_BLOCK_SIZE == blocksize);
blocksize_log2 = int_log2(blocksize);
// Determine number of blocks
kilobytes >>= (blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
nblocks = kilobytes;
if (nblocks != kilobytes)
bb_simple_error_msg_and_die("block count doesn't fit in 32 bits");
#define kilobytes kilobytes_unused_after_this
// Experimentally, standard mke2fs won't work on images smaller than 60k
if (nblocks < 60)
bb_simple_error_msg_and_die("need >= 60 blocks");
......
// N.B. killing e2fsprogs feature! Unused blocks don't account in calculations
nblocks_full = nblocks;
// If last block group is too small, nblocks may be decreased in order
// to discard it, and control returns here to recalculate some
// parameters.
// Note: blocksize and bytes_per_inode are never recalculated.
retry:
// N.B. a block group can have no more than blocks_per_group blocks
ngroups = div_roundup(nblocks - first_block, blocks_per_group);
group_desc_blocks = div_roundup(ngroups, blocksize / sizeof(*gd));
......
{
// N.B. e2fsprogs does as follows!
uint32_t overhead, remainder;
// ninodes is the max number of inodes in this filesystem
uint32_t ninodes = ((uint64_t) nblocks_full * blocksize) / bytes_per_inode;
if (ninodes < EXT2_GOOD_OLD_FIRST_INO+1)
ninodes = EXT2_GOOD_OLD_FIRST_INO+1;
inodes_per_group = div_roundup(ninodes, ngroups);
// minimum number because the first EXT2_GOOD_OLD_FIRST_INO-1 are reserved
if (inodes_per_group < 16)
inodes_per_group = 16;
// a block group can't have more inodes than blocks
if (inodes_per_group > blocks_per_group)
inodes_per_group = blocks_per_group;
// adjust inodes per group so they completely fill the inode table blocks in the descriptor
inodes_per_group = (div_roundup(inodes_per_group * inodesize, blocksize) * blocksize) / inodesize;
// make sure the number of inodes per group is a multiple of 8
inodes_per_group &= ~7;
inode_table_blocks = div_roundup(inodes_per_group * inodesize, blocksize);
......
// the last group needs more attention: isn't it too small for possible overhead?
overhead = (has_super(ngroups - 1) ? (1/*sb*/ + group_desc_blocks) : 0) + 1/*bbmp*/ + 1/*ibmp*/ + inode_table_blocks;
remainder = (nblocks - first_block) % blocks_per_group;
......
}
示例1:20MB块设备,上述变量值情况
- 块数:20480
- 块组数:3
- 块组描述符占用块数:1
- 每组inode数:1707
- 每组inode表占用 块数:214
kilobytes = 20 * 1024KB = 20480KB
blocksize = 1024
inodesize = 128
nblocks = kilobytes * 1024 / blocksize= 20480
bytes_per_inode = 4096
first_block = 1
blocks_per_group = 8 * 1024 = 8192
ngroups = roundup((nblocks - first_block) / blocks_per_group) = 3 // 2.5向上取整
gdsize = sizeof(struct ext2_group_desc) = 24
group_desc_blocks = roundup(ngroups / (blocksize / gdsize)) = 1
ninodes = nblocks * blocksize / bytes_per_inode = 20480 * 1024 / 4096 = 5120
inodes_per_group = roundup(ninodes / ngroups) = roundup(5120 / 3) = 1707
inode_table_blocks = roundup(inodes_per_group * inodesize / blocksize) = roundup(1707 * 128 / 1024) = 214
overhead = sb_blocks(1) + group_desc_blocks(1) + bbmp(1) + ibmp(1) + inode_table_blocks(214) = 218
remainder = (nblocks - first_block) % blocks_per_group = 4105
示例2:61KB块设备(不能低于60KB)
- 块数:61
- 块组数:1
- 块组描述符占用块数:1
- 每组inode数:16
- 每组inode表占用 块数:2
kilobytes = 61KB
blocksize = 1024
inodesize = 128
nblocks = kilobytes * 1024 / blocksize= 61
bytes_per_inode = 8192
first_block = 1
blocks_per_group = 8 * 1024 = 8192
ngroups = roundup((nblocks - first_block) / blocks_per_group) = 1 // 向上取整
gdsize = sizeof(struct ext2_group_desc) = 24
group_desc_blocks = roundup(ngroups / (blocksize / gdsize)) = 1
ninodes = nblocks * blocksize / bytes_per_inode = 61 * 1024 / 8192 = 7
inodes_per_group = roundup(ninodes / ngroups) = roundup(7 / 1) = 7 // 不足16个,向上取16
inodes_per_group = 16
inode_table_blocks = roundup(inodes_per_group * inodesize / blocksize) = roundup(16 * 128 / 1024) = 2
overhead = sb_blocks(1) + group_desc_blocks(1) + bbmp(1) + ibmp(1) + inode_table_blocks(2) = 6
remainder = (nblocks - first_block) % blocks_per_group = 60
三、设备格式化
1.构造超级块
分配1KB的块,然后填充整个超级块。
// fill the superblock
sb = xzalloc(1024);
STORE_LE(sb->s_rev_level, EXT2_DYNAMIC_REV); // revision 1 filesystem
STORE_LE(sb->s_magic, EXT2_SUPER_MAGIC);
STORE_LE(sb->s_inode_size, inodesize);
// set "Required extra isize" and "Desired extra isize" fields to 28
if (inodesize != sizeof(*inode)) {
STORE_LE(sb->s_min_extra_isize, 0x001c);
STORE_LE(sb->s_want_extra_isize, 0x001c);
}
STORE_LE(sb->s_first_ino, EXT2_GOOD_OLD_FIRST_INO);
STORE_LE(sb->s_log_block_size, blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
STORE_LE(sb->s_log_frag_size, blocksize_log2 - EXT2_MIN_BLOCK_LOG_SIZE);
// first 1024 bytes of the device are for boot record. If block size is 1024 bytes, then
// the first block is 1, otherwise 0
STORE_LE(sb->s_first_data_block, first_block);
// block and inode bitmaps occupy no more than one block, so maximum number of blocks is
STORE_LE(sb->s_blocks_per_group, blocks_per_group);
STORE_LE(sb->s_frags_per_group, blocks_per_group);
// blocks
STORE_LE(sb->s_blocks_count, nblocks);
// reserve blocks for superuser
STORE_LE(sb->s_r_blocks_count, nreserved);
.....
2.循环写入block bitmap和inode bitmap
大循环写入block bitmap和inode bitmap,overhead是计算块偏移,allocate()用于设置标志位,PUT()函数是把缓冲区写入到块设备。
// calculate filesystem skeleton structures
gd = xzalloc(group_desc_blocks * blocksize);
buf = xmalloc(blocksize);
sb->s_free_blocks_count = 0;
for (i = 0, pos = first_block, n = nblocks - first_block;
i < ngroups;
i++, pos += blocks_per_group, n -= blocks_per_group
) {
uint32_t overhead = pos + (has_super(i) ? (1/*sb*/ + group_desc_blocks) : 0);
uint32_t free_blocks;
// fill group descriptors
STORE_LE(gd[i].bg_block_bitmap, overhead + 0);
STORE_LE(gd[i].bg_inode_bitmap, overhead + 1);
STORE_LE(gd[i].bg_inode_table, overhead + 2);
overhead = overhead - pos + 1/*bbmp*/ + 1/*ibmp*/ + inode_table_blocks;
gd[i].bg_free_inodes_count = inodes_per_group;
......
// cache free block count of the group
free_blocks = (n < blocks_per_group ? n : blocks_per_group) - overhead;
// mark preallocated blocks as allocated
//bb_error_msg("ALLOC: [%u][%u][%u]", blocksize, overhead, blocks_per_group - (free_blocks + overhead));
allocate(buf, blocksize,
// reserve "overhead" blocks
overhead,
// mark unused trailing blocks
blocks_per_group - (free_blocks + overhead)
);
// dump block bitmap
PUT((uint64_t)(FETCH_LE32(gd[i].bg_block_bitmap)) * blocksize, buf, blocksize);
STORE_LE(gd[i].bg_free_blocks_count, free_blocks);
// mark preallocated inodes as allocated
allocate(buf, blocksize,
// mark reserved inodes
inodes_per_group - gd[i].bg_free_inodes_count,
// mark unused trailing inodes
blocks_per_group - inodes_per_group
);
// dump inode bitmap
//PUT((uint64_t)(FETCH_LE32(gd[i].bg_block_bitmap)) * blocksize, buf, blocksize);
//but it's right after block bitmap, so we can just:
xwrite(fd, buf, blocksize);
STORE_LE(gd[i].bg_free_inodes_count, gd[i].bg_free_inodes_count);
// count overall free blocks
sb->s_free_blocks_count += free_blocks;
}
3.写入超级块和块组描述符
给第0块,以及3,5,7次幂位置的块,按顺序写入超级块和块组文件描述符。
// dump filesystem skeleton structures
for (i = 0, pos = first_block; i < ngroups; i++, pos += blocks_per_group) {
// dump superblock and group descriptors and their backups
if (has_super(i)) {
// N.B. 1024 byte blocks are special
PUT(((uint64_t)pos * blocksize) + ((0 == i && 1024 != blocksize) ? 1024 : 0),
sb, 1024);
PUT(((uint64_t)pos * blocksize) + blocksize,
gd, group_desc_blocks * blocksize);
}
}
4.写入inode table
// zero inode tables
for (i = 0; i < ngroups; ++i)
for (n = 0; n < inode_table_blocks; ++n)
PUT((uint64_t)(FETCH_LE32(gd[i].bg_inode_table) + n) * blocksize,
buf, blocksize);
5.写入根目录inode
根目录的inode编号为2,写入到第一个inode table第二个inode位置。
#define EXT2_ROOT_INO 2 /* Root inode */
// prepare directory inode
inode = (struct ext2_inode *)buf;
STORE_LE(inode->i_mode, S_IFDIR | S_IRWXU | S_IRGRP | S_IROTH | S_IXGRP | S_IXOTH);
STORE_LE(inode->i_mtime, timestamp);
STORE_LE(inode->i_atime, timestamp);
STORE_LE(inode->i_ctime, timestamp);
STORE_LE(inode->i_size, blocksize);
// inode->i_blocks stores the number of 512 byte data blocks
// (512, because it goes directly to struct stat without scaling)
STORE_LE(inode->i_blocks, blocksize / 512);
// dump root dir inode
STORE_LE(inode->i_links_count, 3); // "/.", "/..", "/lost+found/.." point to this inode
STORE_LE(inode->i_block[0], FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks);
PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_ROOT_INO-1) * inodesize,
buf, inodesize);
6.写入lost+found目录的inode和数据块
// dump lost+found dir inode
STORE_LE(inode->i_links_count, 2); // both "/lost+found" and "/lost+found/." point to this inode
STORE_LE(inode->i_size, lost_and_found_blocks * blocksize);
STORE_LE(inode->i_blocks, (lost_and_found_blocks * blocksize) / 512);
n = FETCH_LE32(inode->i_block[0]) + 1;
for (i = 0; i < lost_and_found_blocks; ++i)
STORE_LE(inode->i_block[i], i + n); // use next block
//bb_error_msg("LAST BLOCK USED[%u]", i + n);
PUT(((uint64_t)FETCH_LE32(gd[0].bg_inode_table) * blocksize) + (EXT2_GOOD_OLD_FIRST_INO-1) * inodesize,
buf, inodesize);
// dump directories
memset(buf, 0, blocksize);
dir = (struct ext2_dir *)buf;
// dump 2nd+ blocks of "/lost+found"
STORE_LE(dir->rec_len1, blocksize); // e2fsck 1.41.4 compat (1.41.9 does not need this)
for (i = 1; i < lost_and_found_blocks; ++i)
PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 1+i) * blocksize,
buf, blocksize);
// dump 1st block of "/lost+found"
STORE_LE(dir->inode1, EXT2_GOOD_OLD_FIRST_INO);
STORE_LE(dir->rec_len1, 12);
STORE_LE(dir->name_len1, 1);
STORE_LE(dir->file_type1, EXT2_FT_DIR);
dir->name1[0] = '.';
STORE_LE(dir->inode2, EXT2_ROOT_INO);
STORE_LE(dir->rec_len2, blocksize - 12);
STORE_LE(dir->name_len2, 2);
STORE_LE(dir->file_type2, EXT2_FT_DIR);
dir->name2[0] = '.'; dir->name2[1] = '.';
PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 1) * blocksize, buf, blocksize);
7.写入根目录数据块
// dump root dir block
STORE_LE(dir->inode1, EXT2_ROOT_INO);
STORE_LE(dir->rec_len2, 12);
STORE_LE(dir->inode3, EXT2_GOOD_OLD_FIRST_INO);
STORE_LE(dir->rec_len3, blocksize - 12 - 12);
STORE_LE(dir->name_len3, 10);
STORE_LE(dir->file_type3, EXT2_FT_DIR);
strcpy(dir->name3, "lost+found");
PUT((uint64_t)(FETCH_LE32(gd[0].bg_inode_table) + inode_table_blocks + 0) * blocksize, buf, blocksize);
上一篇:ext2文件系统物理结构剖析
参考资料:busybox代码仓