cha14.系统编程概念

14.1

编写一程序,试对在单目录下创建和删除大量1字节文件所需的时间进行度量。该程序应以xNNNNNN命名格式来创建文件,其中 NNNNNN为随机的6位数字。文件的创建顺序与生成文件名相同,为随机方式,删除文件则按数字升序操作(删除与创建的顺序不同)。文件的数量(FN)和文件所在目录应由命令行指定。针对不同的NF值(比如,在1000和20000之间取值)和不同的文件系统(比如 ext2、ext3和 XFS)来测量时间。随着NF的递增,每个文件系统下耗时的变化模式如何?不同文件系统之间,情况又是如何呢?如果按数字升序来创建文件(x000001、x000001、x0000002等),然后以相同顺序加以删除,结果会改变吗?如果会,原因何在?此外,上述结果会随文件系统类型的不同而改变吗?

c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#include <stdio.h>
#include <limits.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <time.h>
#include <stdlib.h>
#include <sys/times.h>
#include <sys/time.h>

#define CHECK(flag, msg, ...) do { \
if(!(flag)) {\
fprintf(stderr, "FATAL: "); \
fprintf(stderr, msg, ##__VA_ARGS__); \
fprintf(stderr, " ERROR: %s\n", strerror(errno)); \
exit(2); \
} \
} while(0)


bool str2int(const char *num, int *ret) {
errno = 0;
char *end;
*ret = strtol(num, &end, 10);
return !(end == num || *end != '\0' || errno != 0);
}

int *seqArr(int len) {
int *nums = malloc(len * sizeof(int));
for(int i = 0; i < len; i++) {
nums[i] = i;
}
return nums;
}

int* randArr(int len) {
int *visited = malloc(len * sizeof(int));
int *nums = malloc(len * sizeof(int));
memset(visited, 0, len * sizeof(int));
for(int i = 0; i < len; i++) {
int uniq;
while(visited[(uniq = rand() % len)]);
visited[uniq] = 1;
nums[i] = uniq;
}
free(visited);
return nums;
}

char *path = NULL;
void creatFiles(int *arr, int fn) {
char *filename = (char *) malloc((9 + strlen(path))*sizeof(char));
for(int i = 0; i < fn; i++) {
sprintf(filename, "%s/x%06d", path, arr[i]);
int fd = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
CHECK(fd != -1, "fail to open file %s, fd = %d", filename, fd);
CHECK(write(fd, " ", 1) == 1, "fail to write");
// fsync(fd); // Synchronized I/O file integrity completion,能否保证文件创建?
close(fd);
}
free(filename);

}

struct RmFilesArgs {
int *arr;
int fn;
};

void rmFiles(void *args) {
int *arr = ((struct RmFilesArgs*)args)->arr;
int fn = ((struct RmFilesArgs*)args)->fn;
char filename[8] = {0};
for(int i = 0; i < fn; i++) {
sprintf(filename, "x%06d", arr[i]);
unlink(filename);
}
}

long clockTic = 0;

// clock_t long int
// time_t long int
#define FIX_MINUS(x, max) ((x) < 0 ? ((x)) : (x))

void timeIt(void (*test)(void *args), void *args, double *system, double *user, double *process, double *real) {
clock_t processStart, processEnd;
struct timeval realStart, realEnd;
struct tms start, end;
CHECK((processStart = clock()) != (clock_t)-1, "fail to get clock, ERROR: %s", strerror(errno));
CHECK(gettimeofday(&realStart, NULL) != -1, "fail to get timeofday, ERROR: %s", strerror(errno));
CHECK(times(&start) != (clock_t)-1, "fail to get times, ERROR: %s", strerror(errno));
test(args);
CHECK(times(&end) != (clock_t)-1, "fail to get times, ERROR: %s", strerror(errno));
CHECK(gettimeofday(&realEnd, NULL) != -1, "fail to get timeofday, ERROR: %s", strerror(errno));
CHECK((processEnd = clock()) != (clock_t)-1, "fail to get clock, ERROR: %s", strerror(errno));
*process = (double)(FIX_MINUS((processEnd - processStart), LONG_MAX)) / CLOCKS_PER_SEC;
*real = (double)(FIX_MINUS((realEnd.tv_usec - realStart.tv_usec), LONG_MAX)) / 1000;
*user = (double)(FIX_MINUS((end.tms_utime - start.tms_utime), LONG_MAX)) / clockTic;
*system = (double)(FIX_MINUS((end.tms_stime - start.tms_stime), LONG_MAX)) / clockTic;
#ifdef DEBUG
char *format = "system = %lfs, user = %lfs, process = %lfs, real = %lfms\n";
printf(format, *system, *user, *process, *real);
#endif
}

int NOP(const char * command) {
return 0;
}

int main(int argc, char **argv) {
srand(time(NULL));
int fn = 0;
char *format = "system = %.4lfms, user = %.4lfms, process = %.4lfms, real = %.4lfms\n";

int (*bash)(const char *) = system;
#ifndef COMMAND
bash = NOP;
#endif
double system, user, process, real;
for(int i = 1; i < argc; i++) {
if(strcmp(argv[i], "-fn") == 0) {
CHECK(i + 1 < argc, "no enough args\n");
const char *num = argv[++i];
CHECK(str2int(num, &fn), "%s is not a integer!\n", num);
} else if(strcmp(argv[i], "-path") == 0) {
CHECK(i + 1 < argc, "no enough args\n");
path = argv[++i];
}
}
clockTic = sysconf(_SC_CLK_TCK);
CHECK(clockTic != -1, "fail to get sysconf: _SC_CLK_TCK, ERROR:%s", strerror(errno));

int *randIntArr = randArr(fn);
int *seqIntArr = seqArr(fn);

creatFiles(randIntArr, fn);bash("ls -lh");
timeIt(rmFiles, &(struct RmFilesArgs){
.arr=seqIntArr,
.fn=fn
}, &system, &user, &process, &real);bash("ls -lh");
printf(format, system * 1000, user * 1000, process * 1000, real);

creatFiles(seqIntArr, fn);bash("ls -lh");
timeIt(rmFiles, &(struct RmFilesArgs){
.arr=seqIntArr,
.fn=fn
}, &system, &user, &process, &real);bash("ls -lh");
printf(format, system * 1000, user * 1000, process * 1000, real);

free(seqIntArr);
free(randIntArr);
return 0;
}

没有刻意复杂化,被测函数执行相同的函数保证测试的相对准确性

结果

1
2
3
4
gcc -O3 practice14.1.c -o practice14.1 
./practice14.1 -fn 1000 -path .
system = 20.0000ms, user = 0.0000ms, process = 18.7450ms, real = 18.7320ms
system = 20.0000ms, user = 0.0000ms, process = 37.0100ms, real = 38.6430ms

O3优化掉CHECK多余的while(0)循环,计时更精确
大部分时候第二次大于第一次
reeal偶尔为负数,很奇怪

解释

磁盘分区的结构为:引导块 超级块 i节点表 数据块

假设i节点表使用数组管理,删除文件时需要删除i-node。如果按照与创建顺序相同的顺序删除文件,那么数组在这个过程中需要移动 $ \sum_{i=0}^{n-1}i $ 次。

如果随机删除,则移动次数一定小于$ \sum_{i=0}^{n-1}i $ 次。

作者

Meow Meow Liu

发布于

2023-04-28

更新于

2024-04-23

许可协议

评论