I was recently tasked to develop a system that would need to write data in the Gigabytes per second range directly to disk. While doing so, I needed to spec out and build a machine that allowed for that type of bandwidth to be persisted as well as find the most optimized filesystem and write method available in C. Once the specs were done and the machine arrived, it was time to determine which filesystem and write method under Linux would give the best results for writing. I initially tested with bonnie++, but soon became aware that the results I was receiving were not a true test of the raid configuration and filesystem, but were instead block writes and reads. For my purposes I need to benchmark the write speed of several different writing types including asynchronous writing, standard IO and generic (open,write,read) IO. To do so, I created a simple writetest program:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
#include <aio.h>
#include <time.h>
#include <stdio.h>
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <math.h>
 
int nVersionMajor = 1;
int nVersionMinor = 0;
int nVersionBuild = 1;
int64_t mb = 1048576;
char szOutputDir[1024];
long nSize = 0;
int nIteration;
short nUseAio;
short nUseStd;
short nUseGen;
unsigned long long nStdioAvg = 0;
unsigned long long nAioAvg = 0;
unsigned long long nGenAvg = 0;
double nSec = 1000000.0;
 
char * pBytes;
 
void stdioWrite(char * filename){
FILE * pFile;
struct timeval tvStart;
struct timeval tvEnd;
int64_t nEnd;
int64_t nStart;
 
gettimeofday(&amp;tvStart);
nStart = tvStart.tv_sec * 1000000 + tvStart.tv_usec;
 
pFile = fopen(filename,"w");
if(!pFile){
fprintf(stderr, "[Error] could not open %s for writing.n", filename);
exit(1);
}//end if
 
fwrite(pBytes, strlen(pBytes), 1, pFile);
 
fclose(pFile);
 
gettimeofday(&amp;tvEnd);
nEnd = tvEnd.tv_sec * 1000000 + tvEnd.tv_usec;
 
nStdioAvg += (nEnd-nStart);
printf("stdio->fopen,fwrite,fclose t%f sec.n", (nEnd-nStart)/nSec);
}//end stdioWrite()
 
void aioWrite(char * filename){
struct aiocb aio;
int outFile;
struct timeval tvStart;
struct timeval tvEnd;
int64_t nEnd;
int64_t nStart;
 
gettimeofday(&amp;tvStart);
nStart = tvStart.tv_sec * 1000000 + tvStart.tv_usec;
 
outFile = open(filename, O_WRONLY|O_CREAT, 0666);
 
if(outFile == -1){
fprintf(stderr, "[Error] Could not open %s for writing.n", filename);
exit(1);
}//end if
 
aio.aio_offset = 0;
aio.aio_fildes = outFile;
aio.aio_reqprio = 0;
aio.aio_buf = pBytes;
aio.aio_nbytes = strlen(pBytes);
aio.aio_sigevent.sigev_notify = SIGEV_NONE;
 
aio_write(&amp;aio);
 
while(aio_error(&amp;aio) == EINPROGRESS);
close(outFile);
 
gettimeofday(&amp;tvEnd);
nEnd = tvEnd.tv_sec * 1000000 + tvEnd.tv_usec;
 
nAioAvg += (nEnd-nStart);
printf("aio->open,aio_write,close t%f sec.n", (nEnd-nStart)/nSec);
}//end aioWrite()
 
void genWrite(char * filename){
int outFile;
struct timeval tvStart;
struct timeval tvEnd;
int64_t nEnd;
int64_t nStart;
 
gettimeofday(&amp;tvStart);
nStart = tvStart.tv_sec * 1000000 + tvStart.tv_usec;
 
outFile = open(filename, O_WRONLY|O_CREAT, 0666);
 
if(outFile == -1){
fprintf(stderr, "[Error] Could not open %s for writing.n", filename);
exit(1);
}//end if
 
write(outFile, pBytes, strlen(pBytes));
 
close(outFile);
 
gettimeofday(&amp;tvEnd);
nEnd = tvEnd.tv_sec * 1000000 + tvEnd.tv_usec;
 
nGenAvg += (nEnd-nStart);
printf("gen->open,write,close     t%f sec.n", (nEnd-nStart)/nSec);
}//end genWrite()
 
void displayUsage(){
printf("writetest - GodLikeMouse file write testing Version %d.%d.%dn", nVersionMajor, nVersionMinor, nVersionBuild);
printf("tCopyright 2008 GodLikeMouse (www.GodLikeMouse.com)n");
printf("n");
printf("Usage:n");
printf("tnqpcapd [options]n");
printf("n");
printf("Options:n");
printf("t--output-dir [directory]n");
printf("ttThe directory to write to for testing (default .test).n");
printf("t--i [iterations]n");
printf("ttThe amount of times to write (default 1).n");
printf("t--mb [megabytes to write]n");
printf("ttThe size of the files to write in megabytes.n");
printf("t--b [bytes to write]n");
printf("ttThe size of the files to write in bytes.n");
printf("ttwritten (default /data).n");
printf("t--stdion");
printf("ttUse fopen,fwrite,fclose.n");
printf("t--aion");
printf("ttUse open,aio_write,close.n");
printf("t--genion");
printf("ttUse open,write,close.n");
printf("t--helpn");
printf("ttDisplay this help message.n");
printf("t--versionn");
printf("ttDisplay the version information.n");
printf("n");
}//end displayUsage()
 
void parseArgs(int argc, char ** argv){
int i;
 
for(i=0; i<argc; i++){
 
if(strstr(argv[i], "--output-dir")){
sprintf(szOutputDir, "%s", argv[++i]);
continue;
}//end if
 
if(strstr(argv[i], "--mb")){
nSize = mb * atol(argv[++i]);
continue;
}//end if
 
if(strstr(argv[i], "--b")){
nSize = atol(argv[++i]);
continue;
}//end if
 
if(strstr(argv[i], "--help")){
displayUsage();
exit(0);
}//end if
 
if(strstr(argv[i], "--i")){
nIteration = atoi(argv[++i]);
continue;
}//end if
 
if(strstr(argv[i], "--aio")){
nUseAio = 1;
continue;
}//end if
 
if(strstr(argv[i], "--stdio")){
nUseStd = 1;
continue;
}//end if
 
if(strstr(argv[i], "--genio")){
nUseGen = 1;
continue;
}//end if
 
if(strstr(argv[i], "--version")){
printf("writetest - GodLikeMouse file write testing Version %d.%d.%dn", nVersionMajor, nVersionMinor, nVersionBuild);
exit(0);
}//end if
}//end for
}//end parseArgs()
 
void printSeparator(){
printf("---------------------------------------------n");
}//end printSeparator()
 
void printAverages(){
double nTemp;
 
if(nUseStd){
nTemp = ((double)nStdioAvg/nIteration)/nSec;
printf("stdio average write time: t%f sec.n", nTemp);
printf("stdio average throughput: t%.0f bytes/sec ", nSize/nTemp);
printf("%.0f MB/secn", (nSize/nTemp)/mb);
}//end if
 
if(nUseAio){
nTemp = ((double)nAioAvg/nIteration)/nSec;
printf("aio average write time: t%f sec.n", nTemp);
printf("aio average throughput: t%.0f bytes/sec ", nSize/nTemp);
printf("%.0f MB/secn", (nSize/nTemp)/mb);
}//end if
 
if(nUseGen){
nTemp = ((double)nGenAvg/nIteration)/nSec;
printf("gen average write time: t%f sec.n", nTemp);
printf("gen average throughput: t%.0f bytes/sec ", nSize/nTemp);
printf("%.0f MB/secn", (nSize/nTemp)/mb);
}//end if
}//end printAverages()
 
int main(int argc, char ** argv){
int i;
char szFile[2048];
 
nIteration = 1;
nSize = 1024;
strcpy(szOutputDir, ".test");
nUseStd = 0;
nUseAio = 0;
nUseGen = 0;
 
parseArgs(argc, argv);
 
printf("n");
printf("Beginning cycle writen");
printf("Writing %ld bytes, %ld MBn", nSize, (nSize/mb));
 
printSeparator();
 
for(i=0; i<nIteration; i++){
if(nUseStd){
pBytes = (char *)malloc(nSize);
memset(pBytes, 'X', nSize);
sprintf(szFile, "%s/%s.%d", szOutputDir, "stdio", i);
stdioWrite(szFile);
free(pBytes);
}//end if
 
if(nUseAio){
pBytes = (char *)malloc(nSize);
memset(pBytes, 'X', nSize);
sprintf(szFile, "%s/%s.%d", szOutputDir, "aio", i);
aioWrite(szFile);
free(pBytes);
}//end if
 
if(nUseGen){
pBytes = (char *)malloc(nSize);
memset(pBytes, 'X', nSize);
sprintf(szFile, "%s/%s.%d", szOutputDir, "gen", i);
genWrite(szFile);
free(pBytes);
}//end if
 
printSeparator();
}//end for
 
printf("n");
printAverages();
printf("n");
printf("n");
 
printf("Beginning sequential writen");
printf("Writing %ld bytes, %ld MBn", nSize, (nSize/mb));
 
printSeparator();
 
nStdioAvg = 0;
nAioAvg = 0;
nGenAvg = 0;
 
if(nUseStd){
for(i=0; i<nIteration; i++){
pBytes = (char *)malloc(nSize);
memset(pBytes, 'X', nSize);
sprintf(szFile, "%s/%s.%d", szOutputDir, "stdio", i);
stdioWrite(szFile);
free(pBytes);
}//end for
printSeparator();
}//end if
 
if(nUseAio){
for(i=0; i<nIteration; i++){
pBytes = (char *)malloc(nSize);
memset(pBytes, 'X', nSize);
sprintf(szFile, "%s/%s.%d", szOutputDir, "aio", i);
aioWrite(szFile);
free(pBytes);
}//end for
printSeparator();
}//end if
 
if(nUseGen){
for(i=0; i<nIteration; i++){
pBytes = (char *)malloc(nSize);
memset(pBytes, 'X', nSize);
sprintf(szFile, "%s/%s.%d", szOutputDir, "gen", i);
genWrite(szFile);
free(pBytes);
}//end for
printSeparator();
}//end if
 
printf("n");
printAverages();
printf("n");
 
}//end main()

This simple program took in a set of parameters to determine which write methods to use to write, how much data, how many times and where to write it. Using this I began testing all the available free filesystems I could find for Linux. The fastest write speeds were given by XFS with the following make and mount options.

The first round metrics I received using this method to test are as follows:

Writing 500MB of data directly to disk using stdio (fopen,fwrite,fclose) aio (open,aio_write,close) and genio (open,write,close) for 3 iterations yeilded 456 when calling open,write,close consecutively; which is great, it meant I’m on the right track. After a few more tweaks and using genio (open,write,close) I started seeing:

Perfect, now I getting where I need to go and I know that XFS with the options I specified along with using the generic open,read,write approach would give me the best write times. Feel free to take the above writetest program and use it to tune your filesystem and to make sure that you’re using the fastest possible write method for your chosen filesystem. Be sure to compile with -laio. I thought these findings were worth mentioning.

**************** Update ********************

After changing the OS from a 32-bit to 64-bit system and with a few additional modifications to the kernel, new and even more impressive speeds have been reached.