DFLY: syscall frequency
What does the syscall traffic look like for a “make buildkernel”?
% ktrace -di -f /home/dion/projects/syscall-hist/trace1.ktlog -t c make buildkernel
% ktrace -di -f /home/dion/projects/syscall-hist/trace2-cw.ktlog -t cw make buildkernel
Then, I used this “thing” to chew the log files and spit out a nicer form. This code is pretty loosely based on the source to kdump.
#include
#include
#include
#include
#include
#include
static void process_ktrace_log(FILE *logf);
uint64_t syscalls[512] = { 0 };
int main(int argc, char *argv[]) {
FILE *logf;
logf = fopen(argv[1], "r");
process_ktrace_log(logf);
fclose(logf);
return 0;
}
static void process_ktrace_log(FILE *logf) {
struct ktr_header ktrhdr;
struct timeval tv_curr, tv_first, *tvp_first = NULL;
int ktrlen;
char *m = NULL;
int m_size = 0;
FILE *logsc = fopen("sc.log", "w");
FILE *logcsw = fopen("csw.log", "w");
while (fread(&ktrhdr, sizeof(struct ktr_header), 1, logf) == 1) {
if ((ktrlen = ktrhdr.ktr_len) m_size) {
m = (void *)realloc(m, ktrlen+1);
if (m == NULL)
errx(1, "%s", strerror(ENOMEM));
m_size = ktrlen;
}
if (ktrlen && fread(m, ktrlen, 1, logf) == 0)
errx(1, "data too short");
if (!tvp_first) {
tv_first = ktrhdr.ktr_time;
tvp_first = &tv_first;
}
timersub(&ktrhdr.ktr_time, tvp_first, &tv_curr);
switch (ktrhdr.ktr_type) {
case KTR_SYSCALL:
{
struct ktr_syscall *ktrsc = (struct ktr_syscall *)m;
fprintf(logsc, "%16lld %d\n",
(uint64_t) tv_curr.tv_sec * 1000000L +
tv_curr.tv_usec,
ktrsc->ktr_code);
syscalls[ktrsc->ktr_code]++;
}
break;
case KTR_CSW:
{
struct ktr_csw *ktrcsw = (struct ktr_csw *)m;
}
break;
default:
break;
}
}
fclose(logsc);
fclose(logcsw);
{
FILE *schistlog = fopen("schist.log", "w");
int i;
for(i = 0; i < 512; i++) {
if (syscalls[i]) fprintf(schistlog, "%d %lld\n", i, syscalls[i]);
}
fclose(schistlog);
}
}
Then I used this gnuplot input to generate a quick plot:
set terminal png set output 'schist-1m.png' set xrange [0:60000000] plot "sc.log" using 1:2 with dots
Finally, I used this little python script (I’m guessing some awk/sed genius could do it on the command line, but I can’t):
#!/usr/pkg/bin/python2.4
import sys
import re
sc = open("/usr/include/sys/syscall.h")
scdict = {}
for line in sc.readlines():
mo = re.match("^#define\s+SYS_(\S+)\s+(\d+)\s*$", line)
if mo is not None:
scdict[mo.group(2)] = mo.group(1)
sc.close()
dpair_re = re.compile("^\s*(\d+) (\d+)$")
for line in sys.stdin.readlines():
mo = dpair_re.match(line)
if mo is not None:
print '%15s %3s %s' % (mo.group(2), mo.group(1), scdict[mo.group(1)])
Then I sorted it on the command line:
% ./schist.py schist-chew.log
And the top 15 are: (full results)
1047837 197 mmap
828677 5 open
412613 3 read
355321 6 close
343725 476 fstat
287935 73 munmap
278365 475 stat
114136 4 write
108059 342 sigaction
88929 199 lseek
64747 477 lstat
42090 92 fcntl
40985 472 set_tls_area
29434 253 issetugid
27946 59 execve
[EDIT] Added the graph and final results.
Please include links to the ouput you generated, thanks!
Jonathan
April 29, 2008 at 10:35 pm
Added the graph and some of the results (with link to the full results).
More data may pop up here.
dionthegod
April 30, 2008 at 4:53 am