From bob@nas.com Thu May 9 18:00:16 2002 Date: Thu, 25 Apr 2002 11:01:11 -0700 From: Bob Finch To: info-cyrus@lists.andrew.cmu.edu Subject: "spam" sieve extension I've gotten a handful of requests for the "spam" sieve extension I mentioned yesterday. The diffs aren't too big, so I'll post them to the list. The diffs add a test to sieve that passes the message to SpamAssassin. If SpamAssassin scores the messages as spam, the test fails, otherwise the test succeeds. Here's a simple sieve script using the spam extension: require [ "spam", "fileinto" ]; if spam { fileinto "spamfolder"; } The diffs are based on cyrus-imapd-2.1.3 -- I haven't tried them with other versions. You'll also need to install SpamAssassin (see http://www.spamassassin.org/) and have spamd running. I'm currently running SpamAssassin 2.11. I've been running it for about a month on several mailboxes that typically get about 500 messages a day. The extension adds three new imapd.conf parameters: max_size: 256000 The spam test will always return false for messages larger than max_size. Messages larger than max_size will not be passed to spamd. spam_spamd_host: 127.0.0.1 The IP addresses or hostname of the spamd server. spam_spamd_port: 783 The port number of the spamd server. When sieve evaluates the spam test, it calls a callback in imap/lmptd.c that opens a connection to spamd. It then sends a CHECK command writes the message to spamd. It parses the response from spamd and returns the result of the test to sieve. Things on my to-do list: * Add a configure option and ifdefs to conditionally include the spam extension * Provide a way for users to get more information about how SpamAssassin scores messages. This will require help from spamd, since it currently just returns the score and threshold. * Documentation -- Bob diff -cr cyrus-imapd-2.1.3-orig/imap/lmtpd.c cyrus-imapd-2.1.3/imap/lmtpd.c *** cyrus-imapd-2.1.3-orig/imap/lmtpd.c Thu Mar 7 09:55:28 2002 --- cyrus-imapd-2.1.3/imap/lmtpd.c Thu Apr 18 21:14:10 2002 *************** *** 111,116 **** --- 111,124 ---- char *authuser; /* user who submitted message */ struct auth_state *authstate; + + /* spam stuff */ + + /* This is in script_data, but the spam callback can't get to it */ + /* so we put a copy here */ + char *username; /* Username of mailbox */ + int spam_result_valid; /* != 0 iff spam result is valid */ + int spam_result; /* != iff message is spam */ } mydata_t; /* data per script */ *************** *** 910,915 **** --- 918,1087 ---- static char *markflags[] = { "\\flagged" }; static sieve_imapflags_t mark = { markflags, 1 }; + /* spam support */ + + static int + getline (int s, char *buf, int len) + { + char *bp = buf; + int ret = 1; + char ch; + + while ((ret = read (s, &ch, 1)) == 1 && ch != '\n') { + if (len > 0) { + *bp++ = ch; + len--; + } + } + if (len > 0) + *bp = '\0'; + return (buf != bp); + } + + + static int + full_write (int s, char *buf, int len) + { + int total; + int ret; + + for (total = 0; total < len; total += ret) { + ret = write (s, buf + total, len - total); + if (ret < 0) + return 0; + } + return total == len; + } + + + static int + read_response (int s, int *result) + { + char is_spam[6]; + char buf[1024]; + int major; + int minor; + int response; + int score; + int threshold; + + if (! getline (s, buf, sizeof (buf))) { + syslog (LOG_ERR, "read_response: response getline failed"); + return SIEVE_FAIL; + } + if (sscanf (buf, "SPAMD/%d.%d %d %*s", &major, &minor, &response) != 3) { + syslog (LOG_ERR, "read_response: response sscanf failed, buf: %s", + buf); + return SIEVE_FAIL; + } + if (major < 1 || (major == 1 && minor < 1)) { + syslog (LOG_ERR, "read_response: bad spamd version: %d.%d", + major, minor); + return SIEVE_FAIL; + } + if (! getline (s, buf, sizeof (buf))) { + syslog (LOG_ERR, "read_response: header getline failed"); + return SIEVE_FAIL; + } + if (sscanf (buf, "Spam: %5s ; %d / %d", is_spam, &score, &threshold) != 3) { + syslog (LOG_ERR, "read_response: header sscanf failed, buf: %s", + buf); + return SIEVE_FAIL; + } + + *result = ! strcmp(is_spam, "True"); + return SIEVE_OK; + } + + + int spam (void *mc, int *is_spam) + { + mydata_t *d = (mydata_t *) mc; + message_data_t *m = d->m; + int s; + struct sockaddr_in addr; + struct hostent *host; + char header[128]; + int max_size = config_getint ("spam_max_size", 250 * 1024); + const char *hostname = config_getstring ("spam_spamd_host", "127.0.0.1"); + int port = config_getint ("spam_spamd_port", 783); + char *msg_buf; + int ret; + + /* Assume message isn't spam if it is larger than max_size */ + if (m->size > max_size) { + syslog (LOG_INFO, "spam: skipping message bigger than %d", max_size); + return SIEVE_FAIL; + } + + memset (&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + + if ((host = gethostbyname (hostname)) == NULL) { + syslog (LOG_ERR, "spam: gethostbyname failed"); + return SIEVE_FAIL; + } + memcpy (&addr.sin_addr, host->h_addr, sizeof (addr.sin_addr)); + + if((s = socket (PF_INET, SOCK_STREAM, 0)) < 0) { + syslog (LOG_ERR, "spam: socket failed"); + return SIEVE_FAIL; + } + + if (connect (s, (const struct sockaddr *) &addr, sizeof (addr)) < 0) { + syslog (LOG_ERR, "spam: connect failed"); + close (s); + return SIEVE_FAIL; + } + + if ((msg_buf = malloc (m->size)) == NULL) { + syslog (LOG_ERR, "spam: malloc(%d) failed", m->size); + close (s); + return SIEVE_FAIL; + } + rewind (m->f); + if (fread (msg_buf, 1, m->size, m->f) != m->size || ferror (m->f)) { + syslog (LOG_ERR, "spam: read message failed"); + free (msg_buf); + close (s); + return SIEVE_FAIL; + } + + if (d->username) { + snprintf (header, sizeof (header), + "CHECK SPAMC/1.2\r\nUser: %s\r\nContent-length: %d\r\n\r\n", + d->username, m->size); + } + else { + snprintf (header, sizeof (header), + "CHECK SPAMC/1.2\r\nContent-length: %d\r\n\r\n", m->size); + } + if (! full_write (s, header, strlen (header))) { + syslog (LOG_ERR, "spam: write header failed"); + free (msg_buf); + close (s); + return SIEVE_FAIL; + } + if (! full_write (s, msg_buf, m->size)) { + syslog (LOG_ERR, "spam: write message failed"); + free (msg_buf); + close (s); + return SIEVE_FAIL; + } + + shutdown (s, SHUT_WR); + ret = read_response (s, is_spam); + shutdown (s, SHUT_RD); + + free (msg_buf); + close (s); + + syslog(LOG_DEBUG, "spam result: %d\n", ret); + return ret; + } + + int sieve_parse_error_handler(int lineno, const char *msg, void *ic, void *sc) { script_data_t *sd = (script_data_t *) sc; *************** *** 999,1004 **** --- 1171,1182 ---- fatal("sieve_register_vacation()", EC_SOFTWARE); } + res = sieve_register_spam(sieve_interp, &spam); + if (res != SIEVE_OK) { + syslog(LOG_ERR, "sieve_register_spam() returns %d\n", res); + fatal("sieve_register_spam()", EC_SOFTWARE); + } + res = sieve_register_parse_error(sieve_interp, &sieve_parse_error_handler); if (res != SIEVE_OK) { syslog(LOG_ERR, "sieve_register_parse_error() returns %d\n", res); *************** *** 1148,1154 **** mydata.notifyheader = generate_notify(msgdata); mydata.authuser = authuser; mydata.authstate = authstate; ! /* loop through each recipient, attempting delivery for each */ for (n = 0; n < nrcpts; n++) { char *rcpt = xstrdup(msg_getrcpt(msgdata, n)); --- 1326,1335 ---- mydata.notifyheader = generate_notify(msgdata); mydata.authuser = authuser; mydata.authstate = authstate; ! mydata.username = NULL; ! mydata.spam_result = 0; ! mydata.spam_result_valid = 0; ! /* loop through each recipient, attempting delivery for each */ for (n = 0; n < nrcpts; n++) { char *rcpt = xstrdup(msg_getrcpt(msgdata, n)); *************** *** 1187,1192 **** --- 1368,1376 ---- sdata->username = rcpt; sdata->mailboxname = plus; sdata->authstate = auth_newstate(rcpt, (char *)0); + + /* Make a copy of mailbox username for spam stuff */ + mydata.username = sdata->username; /* slap the mailboxname back on so we hash the envelope & id when we figure out whether or not to keep the message */ diff -cr cyrus-imapd-2.1.3-orig/sieve/interp.c cyrus-imapd-2.1.3/sieve/interp.c *** cyrus-imapd-2.1.3-orig/sieve/interp.c Tue Oct 2 14:08:13 2001 --- cyrus-imapd-2.1.3/sieve/interp.c Sun Mar 24 11:39:14 2002 *************** *** 154,159 **** --- 154,166 ---- return SIEVE_OK; } + int sieve_register_spam(sieve_interp_t *interp, sieve_spam *f) + { + interp->spam = f; + + return SIEVE_OK; + } + /* add the callbacks for messages. again, undefined if used after sieve_script_parse */ int sieve_register_size(sieve_interp_t *interp, sieve_get_size *f) diff -cr cyrus-imapd-2.1.3-orig/sieve/interp.h cyrus-imapd-2.1.3/sieve/interp.h *** cyrus-imapd-2.1.3-orig/sieve/interp.h Mon Feb 21 23:56:40 2000 --- cyrus-imapd-2.1.3/sieve/interp.h Sun Mar 24 11:40:53 2002 *************** *** 35,40 **** --- 35,41 ---- sieve_callback *redirect, *discard, *reject, *fileinto, *keep; sieve_callback *notify; sieve_vacation_t *vacation; + sieve_spam *spam; sieve_get_size *getsize; sieve_get_header *getheader; diff -cr cyrus-imapd-2.1.3-orig/sieve/script.c cyrus-imapd-2.1.3/sieve/script.c *** cyrus-imapd-2.1.3-orig/sieve/script.c Wed Feb 27 13:05:13 2002 --- cyrus-imapd-2.1.3/sieve/script.c Thu Apr 18 21:02:51 2002 *************** *** 102,107 **** --- 102,114 ---- return 1; } else if (!strcmp("comparator-i;ascii-casemap", req)) { return 1; + } else if (!strcmp("spam",req)) { + if (s->interp.spam) { + s->support.spam = 1; + return 1; + } else { + return 0; + } } return 0; } *************** *** 361,366 **** --- 368,381 ---- res = (sz < t->u.sz.n); } break; + } + case SPAM: + { + int is_spam; + + if (i->spam == NULL || i->spam (m, &is_spam) != SIEVE_OK) + break; + res = is_spam; } } diff -cr cyrus-imapd-2.1.3-orig/sieve/script.h cyrus-imapd-2.1.3/sieve/script.h *** cyrus-imapd-2.1.3-orig/sieve/script.h Wed Feb 9 16:39:14 2000 --- cyrus-imapd-2.1.3/sieve/script.h Sun Mar 24 11:39:13 2002 *************** *** 45,50 **** --- 45,51 ---- int notify : 1; int regex : 1; int subaddress: 1; + int spam : 1; } support; void *script_context; diff -cr cyrus-imapd-2.1.3-orig/sieve/sieve-lex.l cyrus-imapd-2.1.3/sieve/sieve-lex.l *** cyrus-imapd-2.1.3-orig/sieve/sieve-lex.l Tue Feb 19 10:09:46 2002 --- cyrus-imapd-2.1.3/sieve/sieve-lex.l Sat Mar 23 18:43:22 2002 *************** *** 90,95 **** --- 90,96 ---- header return HEADER; not return NOT; size return SIZE; + spam return SPAM; reject return REJCT; fileinto return FILEINTO; redirect return REDIRECT; diff -cr cyrus-imapd-2.1.3-orig/sieve/sieve.y cyrus-imapd-2.1.3/sieve/sieve.y *** cyrus-imapd-2.1.3-orig/sieve/sieve.y Tue Mar 5 08:15:01 2002 --- cyrus-imapd-2.1.3/sieve/sieve.y Sun Mar 24 11:36:18 2002 *************** *** 141,146 **** --- 141,147 ---- %token SETFLAG ADDFLAG REMOVEFLAG MARK UNMARK %token NOTIFY DENOTIFY %token ANYOF ALLOF EXISTS SFALSE STRUE HEADER NOT SIZE ADDRESS ENVELOPE + %token SPAM %token COMPARATOR IS CONTAINS MATCHES REGEX OVER UNDER %token ALL LOCALPART DOMAIN USER DETAIL %token DAYS ADDRESSES SUBJECT MIME *************** *** 398,403 **** --- 399,409 ---- | NOT test { $$ = new_test(NOT); $$->u.t = $2; } | SIZE sizetag NUMBER { $$ = new_test(SIZE); $$->u.sz.t = $2; $$->u.sz.n = $3; } + | SPAM { if (!parse_script->support.spam) { + yyerror("spam not required"); + YYERROR; + } + $$ = new_test(SPAM); } | error { $$ = NULL; } ; diff -cr cyrus-imapd-2.1.3-orig/sieve/sieve_interface.h cyrus-imapd-2.1.3/sieve/sieve_interface.h *** cyrus-imapd-2.1.3-orig/sieve/sieve_interface.h Tue Feb 19 10:09:46 2002 --- cyrus-imapd-2.1.3/sieve/sieve_interface.h Sun Mar 24 19:46:44 2002 *************** *** 50,55 **** --- 50,57 ---- typedef int sieve_get_envelope(void *message_context, const char *field, const char ***contents); + typedef int sieve_spam(void *message_context, int *is_spam); + typedef struct sieve_vacation { int min_response; /* 0 -> defaults to 3 */ *************** *** 121,126 **** --- 123,129 ---- int sieve_register_vacation(sieve_interp_t *interp, sieve_vacation_t *v); int sieve_register_imapflags(sieve_interp_t *interp, sieve_imapflags_t *mark); int sieve_register_notify(sieve_interp_t *interp, sieve_callback *f); + int sieve_register_spam(sieve_interp_t *interp, sieve_spam *f); /* add the callbacks for messages. again, undefined if used after sieve_script_parse */ diff -cr cyrus-imapd-2.1.3-orig/timsieved/scripttest.c cyrus-imapd-2.1.3/timsieved/scripttest.c *** cyrus-imapd-2.1.3-orig/timsieved/scripttest.c Sun Dec 17 20:53:43 2000 --- cyrus-imapd-2.1.3/timsieved/scripttest.c Sun Mar 24 13:12:06 2002 *************** *** 171,176 **** --- 171,182 ---- return TIMSIEVE_FAIL; } + res = sieve_register_spam(i, (sieve_spam *) &foo); + if (res != SIEVE_OK) { + syslog (LOG_ERR, "sieve_register_spam() returns %d\n", res); + return TIMSIEVE_FAIL; + } + res = sieve_register_parse_error(i, &mysieve_error); if (res != SIEVE_OK) { syslog(LOG_ERR, "sieve_register_parse_error() returns %d\n", res);