After nearly 5 years, zlib finally gets updated. In the release notes, we see this: “Wholesale replacement of gz* functions with faster versions”. How much faster? Let’s see.
In the following table, gzread/gzgetc/gzgets are direct zlib function calls. ks_getc/ks_getuntil are from my generic buffered wrapper.
| Metrics | 1.2.3 (CPU sec) | 1.2.5 (CPU sec) |
| gzread(), 4KB buffer | 2.69 | 2.87 |
| gzgetc() | 73.25 | 7.34 |
| gzgets(), max 4KB | 73.29 | 4.32 |
| ks_getc() | 4.06 | 4.38 |
| ks_getuntil() | 3.57 | 3.76 |
It seems that gzread() in 1.2.5 becomes slightly slower, which also slows down my ks_getc/ks_getuntil. But gzgetc/gzgets show a 10-fold speedup. This greatly helps to simplify programming as we do not need to implement a buffer by ourselves (although my wrapper is slightly more efficient). Greak work!
Benchmarking source code:
#include <zlib.h>
#include <stdio.h>
#include <time.h>
#include <stdint.h>
#include <stdlib.h>
#include "kseq.h"
#define BUF_SIZE 4096
KSTREAM_INIT(gzFile, gzread, BUF_SIZE)
int main(int argc, char *argv[])
{
gzFile fp;
clock_t t;
if (argc == 1) {
fprintf(stderr, "Usage: kseq_bench <in.gz>\n");
return 1;
}
{
uint8_t *buf = malloc(BUF_SIZE);
fp = gzopen(argv[1], "r");
t = clock();
while (gzread(fp, buf, BUF_SIZE) > 0);
fprintf(stderr, "[gzread] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
gzclose(fp);
free(buf);
}
{
kstream_t *ks;
fp = gzopen(argv[1], "r");
ks = ks_init(fp);
t = clock();
while (ks_getc(ks) >= 0);
fprintf(stderr, "[ks_getc] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
ks_destroy(ks);
gzclose(fp);
}
{
kstream_t *ks;
kstring_t *s;
int dret;
s = calloc(1, sizeof(kstring_t));
fp = gzopen(argv[1], "r");
ks = ks_init(fp);
t = clock();
while (ks_getuntil(ks, '\n', s, &dret) >= 0);
fprintf(stderr, "[ks_getuntil] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
ks_destroy(ks);
gzclose(fp);
free(s->s); free(s);
}
{
fp = gzopen(argv[1], "r");
t = clock();
while (gzgetc(fp) >= 0);
fprintf(stderr, "[gzgetc] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
gzclose(fp);
}
{
char *buf = malloc(BUF_SIZE);
fp = gzopen(argv[1], "r");
t = clock();
while (gzgets(fp, buf, BUF_SIZE) > 0);
fprintf(stderr, "[gzgets] %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
gzclose(fp);
free(buf);
}
return 0;
}
and the source code of kseq.h:
#ifndef AC_KSEQ_H
#define AC_KSEQ_H
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
#define KS_SEP_TAB 1 // isspace() && !' '
#define KS_SEP_MAX 1
#define __KS_TYPE(type_t) \
typedef struct __kstream_t { \
unsigned char *buf; \
int begin, end, is_eof; \
type_t f; \
} kstream_t;
#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
#define __KS_BASIC(type_t, __bufsize) \
static inline kstream_t *ks_init(type_t f) \
{ \
kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
ks->f = f; \
ks->buf = malloc(__bufsize); \
return ks; \
} \
static inline void ks_destroy(kstream_t *ks) \
{ \
if (ks) { \
free(ks->buf); \
free(ks); \
} \
}
#define __KS_GETC(__read, __bufsize) \
static inline int ks_getc(kstream_t *ks) \
{ \
if (ks->is_eof && ks->begin >= ks->end) return -1; \
if (ks->begin >= ks->end) { \
ks->begin = 0; \
ks->end = __read(ks->f, ks->buf, __bufsize); \
if (ks->end < __bufsize) ks->is_eof = 1; \
if (ks->end == 0) return -1; \
} \
return (int)ks->buf[ks->begin++]; \
}
#ifndef KSTRING_T
#define KSTRING_T kstring_t
typedef struct __kstring_t {
size_t l, m;
char *s;
} kstring_t;
#endif
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#define __KS_GETUNTIL(__read, __bufsize) \
static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
{ \
if (dret) *dret = 0; \
str->l = 0; \
if (ks->begin >= ks->end && ks->is_eof) return -1; \
for (;;) { \
int i; \
if (ks->begin >= ks->end) { \
if (!ks->is_eof) { \
ks->begin = 0; \
ks->end = __read(ks->f, ks->buf, __bufsize); \
if (ks->end < __bufsize) ks->is_eof = 1; \
if (ks->end == 0) break; \
} else break; \
} \
if (delimiter > KS_SEP_MAX) { \
for (i = ks->begin; i < ks->end; ++i) \
if (ks->buf[i] == delimiter) break; \
} else if (delimiter == KS_SEP_SPACE) { \
for (i = ks->begin; i < ks->end; ++i) \
if (isspace(ks->buf[i])) break; \
} else if (delimiter == KS_SEP_TAB) { \
for (i = ks->begin; i < ks->end; ++i) \
if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
} else i = 0; /* never come to here! */ \
if (str->m - str->l < i - ks->begin + 1) { \
str->m = str->l + (i - ks->begin) + 1; \
kroundup32(str->m); \
str->s = (char*)realloc(str->s, str->m); \
} \
memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
str->l = str->l + (i - ks->begin); \
ks->begin = i + 1; \
if (i < ks->end) { \
if (dret) *dret = ks->buf[i]; \
break; \
} \
} \
if (str->l == 0) { \
str->m = 1; \
str->s = (char*)calloc(1, 1); \
} \
str->s[str->l] = '\0'; \
return str->l; \
}
#define KSTREAM_INIT(type_t, __read, __bufsize) \
__KS_TYPE(type_t) \
__KS_BASIC(type_t, __bufsize) \
__KS_GETC(__read, __bufsize) \
__KS_GETUNTIL(__read, __bufsize)
#endif
update from 1.2.3 to 1.2.5 adds 2 seconds (7+2) extra for completing the following:
$ time pnmtopng 74MB.pnm > /dev/null
Is this caused by a slower I/O interface (i.e. gz* functions) or a slower compression (i.e. deflate)? I tend to believe it is the latter. Anyway, it is good know. Thanks.