TRANSPARENT MAILDIR COMPRESSION FOR QMAIL Frank DENIS "Jedi/Sector One" Most of the email traffic contains plain text, or formatted text (HTML, Word, RTF) . This kind of data can usually be compressed with very interesting ratios. So why aren't messages stored as compressed files ? Benefits : - Saves *a lot* of space on hard drives for middle-sized messages, - Saves some space even with tiny messages on a ReiserFS partition using tails, - May even improve performances if your CPU is way faster than hard drives. Caveats : - May increase your load average and memory requirements. This is not significant on occasional mail servers. - May increase POP3 retrieval time for very long messages with a very fast bandwidth. Before saying "okee, it will slow down a lot the traffic", please give it a try. Don't speculate, just profile. HOW DOES IT WORK ? Incoming mail is spooled in the queue by "qmail-queue" and fetched by "qmail-send". If the destination is a local user, "qmail-lspawn" finds out special rules, runs "qmail-getpw" and finally "qmail-local" to deliver the message in a mailbox file or in a Maildir spool. With that patch, qmail-local will compress incoming messages on-the-fly and save them as gzip files. There are three ways to read these compressed mails : - Use a mail user agent that recognizes these compressed files. None AFAIK... - Uncompress all files before reading them, or automate the task (a simple shell alias should do the trick, like "gunzip ~/Maildir/new/*.gz;mutt -f ~/Maildir" . - Use POP3. Yes, the patch also changes qmail-pop3d in order to add on-the- fly decompression. So on a mail server with only POP3 access, adding compression is fully transparent. What users will download is uncompressed messages, nobody will care about how they were stored on the server hard disk. It works only with Maildir spools and I've no plan to add mailbox files support. Because this can be slow, painful to parse and unreliable. INSTALLATION Uncompress qmail (successfully tested with version 1.03) and go into the newly created directory : tar xzvf qmail-1.03.tar.gz cd qmail-1.03 Now, apply the patch : patch -p1 < ../qmail-compression.patch Change "../qmail-compression.patch" according to the path of that file. Yes, the file you are reading is the patch itself. Compile, install and launch Qmail according to the traditional instructions. Nothing changes in the way you invoke it. The POP3 server can handle Maildir spools with both compressed and uncompressed messages. So you can safely apply this patch without losing any previous mail. You will need the "zlib" library. Almost every Linux distro has it, but you may need to adjust conf-cc and conf-ld if the compiler doesn't find it. BUGS The "STAT" POP3 command doesn't return the real size of messages, but their compressed size. Doing the right thing needs to add a header to each file, breaking the "gunzip *;mua" thing. Another way would be to uncompress each file in order to get their real size. It can be slow down the system (well, "RETR" does it anyway, but why should we do it twice ?) . I don't think this missing feature will hurt any mail user agent. But please report any problem with this. I will add real sizes support if this is really annoying. DO WE NEED A LOT OF MEMORY TO UNCOMPRESS LARGE MESSAGES ? Noppe. Zlib rules. diff -u -r qmail-1.03.old/conf-cc qmail-1.03/conf-cc --- qmail-1.03.old/conf-cc Mon Jun 15 12:53:16 1998 +++ qmail-1.03/conf-cc Thu Jun 15 21:24:58 2000 @@ -1,3 +1,3 @@ -cc -O2 +cc -O2 -I/usr/include -I/usr/X11R6/include -DCOMPRESS=1 This will be used to compile .c files. diff -u -r qmail-1.03.old/conf-ld qmail-1.03/conf-ld --- qmail-1.03.old/conf-ld Mon Jun 15 12:53:16 1998 +++ qmail-1.03/conf-ld Thu Jun 15 20:24:57 2000 @@ -1,3 +1,3 @@ -cc -s +cc -s -L/usr/lib -L/usr/X11R6/lib -lz This will be used to link .o files into an executable. diff -u -r qmail-1.03.old/qmail-local.c qmail-1.03/qmail-local.c --- qmail-1.03.old/qmail-local.c Mon Jun 15 12:53:16 1998 +++ qmail-1.03/qmail-local.c Thu Jun 15 21:00:21 2000 @@ -1,5 +1,8 @@ #include #include +#ifdef COMPRESS +# include +#endif #include "readwrite.h" #include "sig.h" #include "env.h" @@ -99,7 +102,11 @@ s += fmt_str(s,"tmp/"); s += fmt_ulong(s,time); *s++ = '.'; s += fmt_ulong(s,pid); *s++ = '.'; - s += fmt_strn(s,host,sizeof(host)); *s++ = 0; + s += fmt_strn(s,host,sizeof(host)); +#ifdef COMPRESS + s += fmt_str(s,".gz"); +#endif + *s++ = 0; if (stat(fntmptph,&st) == -1) if (errno == error_noent) break; /* really should never get to this point */ if (loop == 2) _exit(1); @@ -108,12 +115,14 @@ str_copy(fnnewtph,fntmptph); byte_copy(fnnewtph,3,"new"); - alarm(86400); + alarm(86400); fd = open_excl(fntmptph); if (fd == -1) _exit(1); - substdio_fdbuf(&ss,read,0,buf,sizeof(buf)); substdio_fdbuf(&ssout,write,fd,outbuf,sizeof(outbuf)); +#ifdef COMPRESS + if (substdio_setcompress(&ssout, "wb") < 0) goto fail; +#endif if (substdio_put(&ssout,rpline.s,rpline.len) == -1) goto fail; if (substdio_put(&ssout,dtline.s,dtline.len) == -1) goto fail; @@ -125,7 +134,11 @@ if (substdio_flush(&ssout) == -1) goto fail; if (fsync(fd) == -1) goto fail; +#ifdef COMPRESS + if (gzclose(ssout.c) < 0) goto fail; +#else if (close(fd) == -1) goto fail; /* NFS dorks */ +#endif if (link(fntmptph,fnnewtph) == -1) goto fail; /* if it was error_exist, almost certainly successful; i hate NFS */ diff -u -r qmail-1.03.old/qmail-pop3d.c qmail-1.03/qmail-pop3d.c --- qmail-1.03.old/qmail-pop3d.c Mon Jun 15 12:53:16 1998 +++ qmail-1.03/qmail-pop3d.c Thu Jun 15 21:06:30 2000 @@ -269,8 +269,15 @@ if (fd == -1) { err_nosuch(); return; } okay(); substdio_fdbuf(&ssmsg,read,fd,ssmsgbuf,sizeof(ssmsgbuf)); +#ifdef COMPRESS + if (substdio_setcompress(&ssmsg, "rb") < 0) return; +#endif blast(&ssmsg,limit); +#ifdef COMPRESS + gzclose(ssmsg.c); +#else close(fd); +#endif } struct commands pop3commands[] = { diff -u -r qmail-1.03.old/substdi.c qmail-1.03/substdi.c --- qmail-1.03.old/substdi.c Mon Jun 15 12:53:16 1998 +++ qmail-1.03/substdi.c Thu Jun 15 21:14:17 2000 @@ -2,7 +2,13 @@ #include "byte.h" #include "error.h" +#ifdef COMPRESS +# include +static int oneread(c,op,fd,buf,len) +register gzFile c; +#else static int oneread(op,fd,buf,len) +#endif register int (*op)(); register int fd; register char *buf; @@ -11,7 +17,15 @@ register int r; for (;;) { - r = op(fd,buf,len); +#ifdef COMPRESS + if (c != 0) { + r = gzread(c,buf,(unsigned) len); + } else { +#endif + r = op(fd,buf,len); +#ifdef COMPRESS + } +#endif if (r == -1) if (errno == error_intr) continue; return r; } @@ -41,7 +55,11 @@ if (s->p) return s->p; q = s->n; +#ifdef COMPRESS + r = oneread(s->c,s->op,s->fd,s->x,q); +#else r = oneread(s->op,s->fd,s->x,q); +#endif if (r <= 0) return r; s->p = r; q -= r; @@ -57,8 +75,13 @@ { register int r; - if (s->p > 0) return getthis(s,buf,len); - r = s->n; if (r <= len) return oneread(s->op,s->fd,buf,r); + if (s->p > 0) return getthis(s,buf,len); + r = s->n; +#ifdef COMPRESS + if (r <= len) return oneread(s->c,s->op,s->fd,buf,r); +#else + if (r <= len) return oneread(s->op,s->fd,buf,r); +#endif r = substdio_feed(s); if (r <= 0) return r; return getthis(s,buf,len); } @@ -71,7 +94,11 @@ register int r; if (s->p > 0) return getthis(s,buf,len); +#ifdef COMPRESS + if (s->n <= len) return oneread(s->c,s->op,s->fd,buf,len); +#else if (s->n <= len) return oneread(s->op,s->fd,buf,len); +#endif r = substdio_feed(s); if (r <= 0) return r; return getthis(s,buf,len); } diff -u -r qmail-1.03.old/substdio.c qmail-1.03/substdio.c --- qmail-1.03.old/substdio.c Mon Jun 15 12:53:16 1998 +++ qmail-1.03/substdio.c Thu Jun 15 20:24:33 2000 @@ -1,4 +1,7 @@ #include "substdio.h" +#ifdef COMPRESS +# include +#endif void substdio_fdbuf(s,op,fd,buf,len) register substdio *s; @@ -12,4 +15,27 @@ s->op = op; s->p = 0; s->n = len; +#ifdef COMPRESS + s->c = 0; +#endif } + +#ifdef COMPRESS +int substdio_setcompress(s,mode) +register substdio *s; +register char *mode; +{ + if (s == 0) { + return -1; + } + if ((s->c = gzdopen(s->fd, mode)) == 0) { + return -2; + } +# ifdef Z_DEFLATED + gzsetparams(s->c, Z_DEFLATED, DEFLATE_LEVEL); +# endif + + return 0; + +} +#endif diff -u -r qmail-1.03.old/substdio.h qmail-1.03/substdio.h --- qmail-1.03.old/substdio.h Mon Jun 15 12:53:16 1998 +++ qmail-1.03/substdio.h Thu Jun 15 20:38:15 2000 @@ -1,15 +1,26 @@ #ifndef SUBSTDIO_H #define SUBSTDIO_H +#ifdef COMPRESS +# include +#endif + typedef struct substdio { char *x; int p; int n; int fd; int (*op)(); +#ifdef COMPRESS + gzFile c; +#endif } substdio; -#define SUBSTDIO_FDBUF(op,fd,buf,len) { (buf), 0, (len), (fd), (op) } +#ifdef COMPRESS +# define SUBSTDIO_FDBUF(op,fd,buf,len) { (buf), 0, (len), (fd), (op), 0 } +#else +# define SUBSTDIO_FDBUF(op,fd,buf,len) { (buf), 0, (len), (fd), (op) } +#endif extern void substdio_fdbuf(); @@ -32,6 +43,11 @@ #define SUBSTDIO_INSIZE 8192 #define SUBSTDIO_OUTSIZE 8192 + +#ifdef COMPRESS +# define DEFLATE_LEVEL 9 +extern int substdio_setcompress(); +#endif #define substdio_PEEK(s) ( (s)->x + (s)->n ) #define substdio_SEEK(s,len) ( ( (s)->p -= (len) ) , ( (s)->n += (len) ) ) diff -u -r qmail-1.03.old/substdo.c qmail-1.03/substdo.c --- qmail-1.03.old/substdo.c Mon Jun 15 12:53:16 1998 +++ qmail-1.03/substdo.c Thu Jun 15 21:10:46 2000 @@ -3,7 +3,13 @@ #include "byte.h" #include "error.h" +#ifdef COMPRESS +# include +static int allwrite(c,op,fd,buf,len) +register gzFile c; +#else static int allwrite(op,fd,buf,len) +#endif register int (*op)(); register int fd; register char *buf; @@ -12,12 +18,24 @@ register int w; while (len) { - w = op(fd,buf,len); - if (w == -1) { - if (errno == error_intr) continue; - return -1; /* note that some data may have been written */ +#ifdef COMPRESS + if (c != 0) { + w = gzwrite(c,buf,(unsigned) len); + if (w <= 0) { + if (errno == error_intr) continue; + return -1; /* note that some data may have been written */ + } + } else { +#endif + w = op(fd,buf,len); + if (w == -1) { + if (errno == error_intr) continue; + return -1; /* note that some data may have been written */ + } + if (w == 0) ; /* luser's fault */ +#ifdef COMPRESS } - if (w == 0) ; /* luser's fault */ +#endif buf += w; len -= w; } @@ -32,7 +50,11 @@ p = s->p; if (!p) return 0; s->p = 0; - return allwrite(s->op,s->fd,s->x,p); +#ifdef COMPRESS + return allwrite(s->c,s->op,s->fd,s->x,p); +#else + return allwrite(s->op,s->fd,s->x,p); +#endif } int substdio_bput(s,buf,len) @@ -56,7 +78,7 @@ register substdio *s; register char *buf; register int len; -{ +{ register int n; n = s->n; @@ -66,7 +88,11 @@ if (n < SUBSTDIO_OUTSIZE) n = SUBSTDIO_OUTSIZE; while (len > s->n) { if (n > len) n = len; +#ifdef COMPRESS + if (allwrite(s->c,s->op,s->fd,buf,n) == -1) return -1; +#else if (allwrite(s->op,s->fd,buf,n) == -1) return -1; +#endif buf += n; len -= n; } @@ -83,7 +109,11 @@ register int len; { if (substdio_flush(s) == -1) return -1; +#ifdef COMPRESS + return allwrite(s->c,s->op,s->fd,buf,len); +#else return allwrite(s->op,s->fd,buf,len); +#endif } int substdio_bputs(s,buf)