/* process_query : CGI Program for generic form handling at Lehigh This CGI accepts form input from most HTML forms. Certain restrictions apply: the method must be POST, INPUT/FILE fields are not supported, and certain fields (FORMNAME, RECIPIENT, and SENDER--the names are case-sensitive) are required. Another special field (CO-RECIPIENT) is optional. RECIPIENT, CO-RECIPIENT, and SENDER are electronic mail addresses. RECIPIENT, CO-RECIPIENT, and FORMNAME are usually specified as "hidden" fields, whose values are determined by the author of the form. SENDER is intended to be supplied by the person who enters the form data, and so is usually a field of type INPUT/TEXT. The (decoded) contents of the form are sent to the address specified as RECIPIENT as an e-mail message. The message appears as if it were from the SENDER. The FORMNAME appears in the subject of the message. A copy of the message is also sent to the CO-RECIPIENT, if one is specified. The program includes support for author-defined required fields. Any field (for example, a field named "x") can be immediately preceeded by a field named "x-REQUIRED" (caps mandatory) which indicates that the following field is "required" (i.e., must not be blank). If a required field is left blank, the form is rejected. The "x-REQUIRED" field is normally specified as a "hidden" field. Any value given to this "x-REQUIRED" field is ignored. If no "x-REQUIRED" fields are encountered, no fields are required, other than FORMNAME, RECIPIENT, and SENDER ("x-REQUIRED" specification is neither required nor permitted for these fields, because they are already mandatory). If an "x-REQUIRED" field is encountered, but the next field is not "x", the "x-REQUIRED" field is treated as an ordinary field; it has no effect on the following field, and its value is displayed normally. This is to permit fields whose names contain the characters "-REQUIRED", without intending any special meaning for the form handler. (For this reason, fields which may or may not result in a name-value pair, such as checkbox or radio-button fields, should not be specified as author-defined required fields.) The form handler will reject the form when any blank required field is encountered. The error message will show the NAMEs of the blank required fields. Note that the form itself does not show the NAMEs of the field, only the text of the form, so choose NAMEs and label text which are close enough to each other that the user can figure out what's missing. SDR: 24 Apr 98 */ #include #include #include #include #include #include /* String Constants and Macros */ #define MAILCMD "/usr/sbin/sendmail -t -oi" #define ADDR_CHARS "-_.@ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" #define HTML_DOCTYPE "text/html" #define SHTML_DOCTYPE "text/x-server-parsed-html" #define HTTP_RESPONSE_HEADER(doctype) "Content-type: " doctype "\n\n" #define HTML32_PROLOGUE "\n" #define HTML20_PROLOGUE "\n" #define ERROR_STARTDOC "Lehigh University Form: ERROR\n" #define STARTDOC "%s: SUCCESS\n" #define ENDDOC "\n" #define BODY "\n" #define ERROR_HEADING "

ERROR

\n\n" #define ERROR_ID(errnum) "

CGI process_query - Error " errnum ".

\n" #define HEADING "

%s

\n\n" #define LINESIZE 80 #define MAX_NAMELEN 60 #define MAX_BADFIELD 10 #define MAX_CONTENT 20000000L typedef struct nv { char *name; char *value; } nvpair; nvpair *url_decode_data(char **buffer, int *n_fields, int len); char *html_sanitize(char *input); char *text_sanitize(char *string); char *name_sanitize(char *string); char *addr_sanitize(char *string); int toint(char c); main(int argc, char *argv[]) { FILE *mailpipe; /* Pipe to mail agent */ nvpair *formdata = NULL; /* Array of fields (name/value pairs) */ char *formbuf = NULL; /* Buffer for name/value strings */ register int field = 0; /* Index of current field */ long test_content = 0L; /* (For content-length conversion) */ int content_len = 0; /* Total length of encoded form data */ int num_fields = 0; /* Number of fields in input data */ int longest = 0; /* Length of longest field name */ int anon_field = 0; /* Are there any nameless fields? */ int trunc_name = 0; /* Were any field names truncated? */ int reqd_missing = 0; /* Were any required fields left blank? */ int name_set = 0; /* Has formname been set? */ int rcpt_set = 0; /* Has recipient been set? */ int corcpt_set = 0; /* Has corecipient been set? */ int sndr_set = 0; /* Has sender been set? */ int remote_host_len = 0; /* Length of Remote Host Name */ int ref_url_len = 0; /* Length of Referrer URL */ char *remote_host = NULL; /* Name (or IP) of remote host */ char *ref_url = NULL; /* Referrer URL (URL of form) */ char formname[LINESIZE+1]; /* Name of form (assigned by author) */ char recipient[LINESIZE+1]; /* E-mail address of recipient of data */ char corecipient[LINESIZE+1]; /* E-mail address of corecipient of data */ char sender[LINESIZE+1]; /* E-mail address of supplier of data */ char tempname[LINESIZE+1]; /* Work buffer for field name */ int which_miss[MAX_BADFIELD+1]; /* Which required fields were blank? */ char *safe_data = NULL; /* Pointer to HTML-sanitized data string */ register int i; /* General index variable */ /* Perform initializations */ for (i = 0; i <= LINESIZE; i++) { formname[i] = '\0'; recipient[i] = '\0'; corecipient[i] = '\0'; sender[i] = '\0'; } tempname[LINESIZE] = '\0'; /* Get the info for the remote */ remote_host_len = 0; remote_host = getenv("REMOTE_HOST"); if (remote_host) remote_host_len = strlen(remote_host); else { remote_host = getenv("REMOTE_ADDR"); if (remote_host) remote_host_len = strlen(remote_host); else { remote_host = "(unknown)"; remote_host_len = strlen(remote_host); } } /* Get the URL of the form page (if available) */ ref_url_len = 0; ref_url = getenv("HTTP_REFERER"); if (ref_url) ref_url_len = strlen(ref_url); else ref_url = ""; /* Make sure the METHOD is POST */ if (strcmp(getenv("REQUEST_METHOD"), "POST")) { printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (ERROR_STARTDOC); printf (BODY); printf (ERROR_HEADING); printf ("

This CGI program must be referenced with a METHOD of POST. \n"); printf ("("mailto:" forms or other forms using the GET \n"); printf ("method are not compatible with this program).

\n\n"); if (ref_url_len > 0) { printf ("

This is probably an error in the construction of \n"); printf ("the previous page \n", ref_url); printf ("Please notify the author of that page, if possible.

\n\n"); printf (ERROR_ID("2")); } else { printf ("

This program is meant to be used in conjunction with \n"); printf ("an HTML form, and should not be invoked by a URL typed \n"); printf ("directly into the browser or via a bookmark.

\n\n"); printf (ERROR_ID("1")); } printf (ENDDOC); exit(1); } /* Make sure that the data are url-encoded */ if (strcmp(getenv("CONTENT_TYPE"), "application/x-www-form-urlencoded")) { printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (ERROR_STARTDOC); printf (BODY); printf (ERROR_HEADING); printf ("

This CGI program must be used with an HTML form. The \n"); printf ("CONTENT_TYPE of the submitted data must be \n"); printf (""application/x-www-form-urlencoded". Other data \n"); printf ("encodings, such as from a form with an INPUT field of \n"); printf ("TYPE="file" (direct file upload), are not supported.

\n\n"); if (ref_url_len > 0) { printf ("

This is probably an error in the construction of \n"); printf ("the previous page. \n", ref_url); printf ("Please notify the author of that page, if possible.

\n\n"); printf (ERROR_ID("4")); } else printf (ERROR_ID("3")); printf (ENDDOC); exit(1); } /* Get content length */ errno = 0; test_content = strtol(getenv("CONTENT_LENGTH"), NULL, 10); if ((test_content >= MAX_CONTENT) || (test_content >= (long) INT_MAX)) { printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (ERROR_STARTDOC); printf (BODY); printf (ERROR_HEADING); printf ("

The form contains too much data to process. \n"); printf ("This submission is aborted. You may wish to resubmit \n"); printf ("after reducing the volume of data somewhat.

\n\n"); printf (ERROR_ID("5")); printf (ENDDOC); exit(1); } else content_len = (int) test_content; if (content_len <= 0) { printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (ERROR_STARTDOC); printf (BODY); printf (ERROR_HEADING); printf ("

The form contains no data.

\n\n"); if (ref_url_len > 0) { printf ("

This is probably an error in the construction of \n"); printf ("the previous page. \n", ref_url); printf ("Please notify the author of that page, if possible.

\n\n"); printf (ERROR_ID("7")); } else printf (ERROR_ID("6")); printf (ENDDOC); exit(1); } /* Decode url-encoded form data */ formdata = url_decode_data(&formbuf, &num_fields, content_len); if (!formdata) { printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (ERROR_STARTDOC); printf (BODY); printf (ERROR_HEADING); printf ("

Unable to finish processing form data. This may be a \n"); printf ("transient problem (connection closed before all form data \n"); printf ("received, out-of-memory, or other similar error), or it \n"); printf ("may indicate a problem with your browser. Please go back \n"); printf ("to the previous page (if possible) and resubmit the form.

\n\n"); if (ref_url_len > 0) { printf ("

If you have already received this error before, this \n"); printf ("may indicate an error in the construction of \n"); printf ("the previous page. \n", ref_url); printf ("In that case, please notify the author of that page, \n"); printf ("if possible.

\n\n"); printf (ERROR_ID("9")); } else printf (ERROR_ID("8")); printf (ENDDOC); exit(1); } /* Process fields (first pass): Get special fields and determine longest field name. Set flags for missing fields and other errors. After extracting data from special fields, set their name and value pointers to NULL, so that they do not get reported as ordinary fields. */ longest = 0; for (field = 0; field < num_fields; field++) { /* Sanitize input and validate field name. Take note of any nameless fields; truncate field names that are excessively long. */ formdata[field].name = name_sanitize(formdata[field].name); formdata[field].value = text_sanitize(formdata[field].value); if (strlen(formdata[field].name) == 0) anon_field++; if (strlen(formdata[field].name) > MAX_NAMELEN) { formdata[field].name[MAX_NAMELEN] = '\0'; trunc_name++; } /* Get formname, recipient(s), and sender */ if (!strcmp(formdata[field].name, "FORMNAME")) { name_set++; strncpy(formname, formdata[field].value, LINESIZE); formdata[field].name = NULL; formdata[field].value = NULL; continue; } if (!strcmp(formdata[field].name, "RECIPIENT")) { rcpt_set++; strncpy(recipient, formdata[field].value, LINESIZE); formdata[field].name = NULL; formdata[field].value = NULL; continue; } if (!strcmp(formdata[field].name, "CO-RECIPIENT")) { corcpt_set++; strncpy(corecipient, formdata[field].value, LINESIZE); formdata[field].name = NULL; formdata[field].value = NULL; continue; } if (!strcmp(formdata[field].name, "SENDER")) { sndr_set++; strncpy(sender, formdata[field].value, LINESIZE); formdata[field].name = NULL; formdata[field].value = NULL; continue; } /* Check to see if this is a user-defined "required" field. */ if ((field > 0) && (formdata[field - 1].name)) { /* Add "-REQUIRED" to name of current field, and compare to name of previous field. (Note: Field names have already been sanitized and truncated if necessary.) */ strncpy(tempname, formdata[field].name, LINESIZE); strcat(tempname, "-REQUIRED"); if (!strcmp(formdata[field - 1].name, tempname)) { /* Current field is required, previous field is just a flag. Check to see if current field is blank. */ if ((strlen(formdata[field].value) == 0) || (strspn(formdata[field].value, " \t\n\r") == strlen(formdata[field].value))) { /* Error--blank required field */ if (reqd_missing < MAX_BADFIELD) which_miss[reqd_missing] = field; reqd_missing++; } /* Mark out previous ("x-REQUIRED") field. */ formdata[field - 1].name = NULL; formdata[field - 1].value = NULL; continue; } } /* Unless the field has been marked out, see if it has the longest name. */ if ((formdata[field].name) && (strlen(formdata[field].name) > longest)) longest = strlen(formdata[field].name); } /* Before continuing, further verify/sanitize the three critical fields: FORMNAME, RECIPIENT, and SENDER. Errors in these fields take priority. */ /* FORMNAME: If there is exactly one FORMNAME, make sure it isn't blank. If it is, reset "name_set" (react as if FORMNAME hadn't been specified at all). If there are multiple FORMNAMEs (i.e., name_set > 1), don't bother about them being blank or not; complain about the multiplicity instead. */ if ((name_set == 1) && ((strlen(formname) == 0) || (strspn(formname, " \t\n\r") == strlen(formname)))) name_set = 0; else /* Strip all control characters from FORMNAME (it must be a single line). */ { for (i = 0; formname[i]; i++) if (formname[i] < 32) formname[i] = ' '; } /* Parse RECIPIENT, CO-RECIPIENT and SENDER as email addresses. */ if ((rcpt_set == 1) && ((strlen(recipient) == 0) || (strspn(recipient, " \t\n\r") == strlen(recipient)))) rcpt_set = 0; else if ((rcpt_set == 1) && (addr_sanitize(recipient) == NULL)) rcpt_set = -1; if ((corcpt_set == 1) && ((strlen(corecipient) == 0) || (strspn(corecipient, " \t\n\r") == strlen(corecipient)))) corcpt_set = 0; else if ((corcpt_set == 1) && (addr_sanitize(corecipient) == NULL)) corcpt_set = -1; if ((sndr_set == 1) && ((strlen(sender) == 0) || (strspn(sender, " \t\n\r") == strlen(sender)))) sndr_set = 0; else if ((sndr_set == 1) && (addr_sanitize(sender) == NULL)) sndr_set = -1; /* Otherwise, if errors were found during field processing, report them and abort the submission of the form. */ if ((anon_field > 0) || (name_set != 1) || (rcpt_set != 1) || (corcpt_set < 0) || (corcpt_set > 1) || (sndr_set != 1) || (reqd_missing > 0)) { printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (ERROR_STARTDOC); printf (BODY); printf (ERROR_HEADING); printf ("

Errors were detected during processing of form data. \n"); printf ("Usually, this is due to one or more fields on the form \n"); printf ("which have been incorrectly or incompletely filled out. \n"); printf ("You may wish to go back and resubmit the form after checking \n"); printf ("it thoroughly for mistakes or missed items.

\n\n"); printf ("

However, some or all of the problems may be due to \n"); printf ("errors in the design or construction of \n"); if (ref_url_len > 0) printf ("the previous page. \n", ref_url); else printf ("the previous page. \n"); printf ("Please notify the author of that page, if possible.

\n\n"); printf ("

The specific errors were as follows:

\n\n"); if ((name_set <= 0) || (rcpt_set == 0)) { printf ("

The FORMNAME or RECIPIENT field(s) are missing or blank. \n"); printf ("These fields should be set by the author of the form.

\n\n"); } if ((rcpt_set < 0) || (corcpt_set < 0)) { printf ("

The RECIPIENT or CO-RECIPIENT field(s) do not appear \n"); printf ("to contain valid Internet electronic mail addresses. \n"); printf ("These fields are normally set by the author of the form.

\n\n"); } if (sndr_set == 0) { printf ("

The SENDER field is missing or blank. This field \n"); printf ("should contain the electronic mail address of the person \n"); printf ("submitting data via the form. This permits the person \n"); printf ("responsible for the form to respond to this user, \n"); printf ("should the need arise. This field is required, and \n"); printf ("form submissions will not be accepted without it.

\n\n"); } if (sndr_set < 0) { printf ("

The SENDER field does not appear to contain a valid \n"); printf ("Internet electronic mail address. This field should contain \n"); printf ("the electronic mail address of the person submitting data \n"); printf ("via the form. This permits the person responsible for the \n"); printf ("form to respond to this user, should the need arise. This \n"); printf ("field is required, and form submissions will not be accepted \n"); printf ("without it.

\n\n"); printf ("

The address must be in the form "localpart@domain" \n"); printf ("(without the quotes). Multiple addresses, groups, and \n"); printf (""route addresses" (addresses enclosed \n"); printf ("in "<" and ">" brackets) are not \n"); printf ("supported, nor are comments (e.g., real names in parentheses) \n"); printf ("allowed. So-called "explicit source routing," \n"); printf ("using the UUCP bang ("!") or the "%-hack" \n"); printf ("characters to specify mail relaying, is also not supported. \n"); printf ("Most special characters are forbidden (the exceptions are \n"); printf ("period or dot, hyphen or minus, and underscore), and quoted or \n"); printf (""escaped" characters are disallowed. This means \n"); printf ("that some address forms, which might be legitimate in another \n"); printf ("context, will not work here. Please check your address, and \n"); printf ("simplify or correct it (if possible), to conform with the \n"); printf ("limitations just described before resubmitting the form.

\n\n"); } if ((name_set > 1) || (rcpt_set > 1) || (corcpt_set > 1) || (sndr_set > 1)) { printf ("

One or more of the fields FORMNAME, RECIPIENT, CO-RECIPIENT, \n"); printf ("and/or SENDER has been specified more than once. This CGI program \n"); printf ("does not support multiple senders or recipients, nor forms \n"); printf ("with multiple names. (This is a flaw in the form itself.)

\n\n"); } if (anon_field > 0) { printf ("

One or more fields has no name, or has a name which \n"); printf ("consists entirely of whitespace characters. This is not \n"); printf ("permitted. (This is a flaw in the form itself.)

\n\n"); } if (reqd_missing > 0) { printf ("

The author of the form has designated one or more of \n"); printf ("the fields on this form as "required," and \n"); if (reqd_missing > 1) { printf ("%d such fields have been left blank. \n", reqd_missing); if (reqd_missing > MAX_BADFIELD) printf ("The first %d of these are:

\n
    \n", MAX_BADFIELD); else printf ("They are:

    \n
      \n"); } else printf ("1 of these has been left blank. It is:

      \n
        \n"); i=0; while ((i < reqd_missing) && (i < MAX_BADFIELD)) { field = which_miss[i++]; printf ("
      • %s
      • \n", formdata[field].name); } printf ("
      \n"); printf ("

      Please go back to the form and resubmit after completing \n"); printf ("all required fields.

      \n\n"); } printf (ERROR_ID("10")); printf (ENDDOC); exit(1); } /* Open the pipe to the mail agent */ if (!(mailpipe = popen(MAILCMD, "w"))) { printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (ERROR_STARTDOC); printf (BODY); printf (ERROR_HEADING); printf ("

      Unable to finish submitting form data. (Unable to \n"); printf ("communicate with mail agent.) This should be a transient \n"); printf ("problem. It probably does not indicate any problem with \n"); printf ("the form, with your browser, or with the data you submitted. \n"); printf ("It most likely is a system problem at Lehigh. Please allow \n"); printf ("some time for us to find and fix the problem, and resubmit \n"); printf ("the form later.

      \n\n"); printf ("

      If this problem persists, please send electronic mail \n"); printf ("to the webmaster at \n"); printf ("www@lehigh.edu. \n"); printf ("Please include the date and time you first encountered the \n"); printf ("problem, and the URL of the form page.

      \n\n"); if (ref_url_len > 0) { printf ("

      Form page: %s

      \n\n", ref_url); } printf (ERROR_ID("11")); printf (ENDDOC); exit(1); } /* Generate mail headers. */ fprintf(mailpipe, "To: %s \n", recipient); if (corcpt_set == 1) fprintf(mailpipe, "CC: %s \n", corecipient); fprintf(mailpipe, "Sender: wwwuser@lehigh.edu \n"); fprintf(mailpipe, "From: %s \n", sender); fprintf(mailpipe, "Reply-To: %s \n", sender); fprintf(mailpipe, "Subject: %s \n", formname); fprintf(mailpipe, "Comments: Lehigh University Web Form Submission " "from %s \n", remote_host); fprintf(mailpipe, "Errors-To: www@lehigh.edu \n\n"); /* Generate message with form data. */ if (trunc_name > 0) fprintf(mailpipe, "(Note: Some field names have been truncated.) \n\n"); fprintf(mailpipe, "*** START OF DATA *** \n\n"); for (field = 0; field < num_fields; field++) { /* Display the field unless it has been "marked-out". */ if (formdata[field].name) { strncpy(tempname, formdata[field].name, LINESIZE); for (i = strlen(tempname); i < longest; i++) tempname[i] = ' '; tempname[i] = '\0'; fprintf(mailpipe, " %s: %s \n", tempname, formdata[field].value); } } fprintf(mailpipe, "\n*** END OF DATA *** \n\n"); /* Close the pipe. */ fflush(mailpipe); if (ferror(mailpipe)) { printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (ERROR_STARTDOC); printf (BODY); printf (ERROR_HEADING); printf ("

      Error encountered while sending data to mail agent. \n"); printf ("The recipient may receive an incomplete message, or no \n"); printf ("message at all. You may wish to resubmit, but this may \n"); printf ("result in a duplicate submission. You should probably \n"); printf ("attempt to contact the author of the form by another \n"); printf ("means, if possible.

      \n\n"); printf (ERROR_ID("12")); printf (ENDDOC); exit(1); } fclose(mailpipe); /* Generate the form response page indicating success. */ printf (HTTP_RESPONSE_HEADER(HTML_DOCTYPE)); printf (HTML32_PROLOGUE); printf (STARTDOC, formname); printf (BODY); printf (HEADING, formname); printf ("

      Result: SUCCESS

      \n"); printf ("

      Thank you. Your submission has been sent to the \n" " person responsible for this form. The data you entered \n" " are being sent as follows:

      \n\n"); printf (" \n\n"); for (field = 0; field < num_fields; field++) { /* Display the field unless it has been "marked-out". */ if (formdata[field].name) { /* Display the field name (Note: the name_sanitize process is more restrictive than html_sanitize; the name is already HTML-safe). */ printf (" \n", formdata[field].name); /* Sanitize and display the field value. */ safe_data = html_sanitize(formdata[field].value); if (safe_data) { printf (" \n\n", safe_data); free(safe_data); } else printf (" \n\n"); } } printf ("
      "); printf ("%s:
      ");
                  printf ("%s
      \n\n"); printf (ENDDOC); free(formdata); free(formbuf); exit(0); } /* url_decode_data: Read and decode url-encoded web form data. */ nvpair *url_decode_data(char **buffer, int *n_fields, int len) { nvpair *field_array; /* Array of fields */ register int in, out; /* Placeholders for input scan */ int field; /* Current field index */ int array_size; /* Current size of field array */ int code; /* Used in conversion of encoded values */ unsigned char next; /* Current input character */ /* This function expects a pointer to the buffer: it will allocate space as needed, but the caller can use the pointer to free the space later. The function will also set the number of fields before returning the array of name-value pairs. In the event of an unrecoverable error, the function returns NULL. Space for the buffer is allocated based upon the expected number of input characters (which should be an upper bound), plus a 1K margin for error, just in case. Rather than pre-scan to determine the number of fields, the array of fields (each field consisting of a structure containing pointers to name and value character strings) is preallocated for an initial estimate of 10K fields. This should be more than adequate for any reasonable form, but the function will automatically enlarge this array if needed (in 1K increments, for efficiency). It will be resized downward to the actual size required just before being returned. The function reads from standard input. The expected number of characters to read is "len"; the function is not permitted to rely on getting an end-of-file indication, but if it gets one, must respect it. The input stream *should* be url-encoded data, which consists of fields separated by '&' characters, each of which has a name part and a value part (in that order), separated by an '=' character. Spaces will be encoded as '+' characters, and special characters (including control characters) should be encoded by a three-character sequence beginning with '%' and followed by two characters corresponding to the hexadecimal representation of the encoded character. The function *must* behave reasonably and predictably, even if the expectations just described are not met (in the worst case, return NULL). */ /* Allocate input buffer (return immediately upon failure) */ if (!((*buffer) = (char *) malloc(sizeof(char) * (len + 1024)))) return NULL; /* Allocate field array (return immediately upon failure) */ array_size = 10240; if (!(field_array = (nvpair *) malloc(sizeof(nvpair) * (array_size)))) return NULL; /* Pre-initialize first field */ field = 0; field_array[0].name = (*buffer); field_array[0].value = ""; /* Scan through input, one character at a time. At end of scan, "in" will be number of characters read, "out" will be number of characters written, and "field" will be the index of the last field (a number one less than the number of fields). */ for (in=0, out=0; in= array_size) { /* Increase field array as needed */ if (array_size > (INT_MAX - 2050)) return NULL; array_size += 1024; field_array = (nvpair *) realloc(field_array, sizeof(nvpair) * (array_size)); if (!field_array) return NULL; } field_array[field].name = (*buffer) + (++out); field_array[field].value = ""; break; case '=': /* Start of value */ field_array[field].value = (*buffer) + (++out); break; case '+': /* Encoded space */ (*buffer)[out++] = ' '; break; case '%': /* Hex-encoded character */ next=(char)fgetc(stdin); if (feof(stdin)) break; if (!isxdigit(next)) { (*buffer)[out++] = '%'; (*buffer)[out++] = next; break; } code = toint(next); next=(char)fgetc(stdin); if (feof(stdin) || (!isxdigit(next))) { (*buffer)[out++] = (char)code; break; } code = (code << 4) | toint(next); (*buffer)[out++] = (char)code; break; default: /* Regular character */ (*buffer)[out++] = next; } } (*buffer)[out++] = '\0'; (*n_fields) = ++field; /* Resize the array */ field_array = (nvpair *) realloc(field_array, sizeof(nvpair) * (*n_fields)); return field_array; } /* html_sanitize: Prepare a character string for display within HTML. */ char *html_sanitize(char *input) { char *output = NULL; /* Sanitized string */ register int in, out; /* Placeholders for input scan */ int len; /* Length of input string */ unsigned int next; /* Current character */ unsigned int digit; /* Temporary storage for digit conversion */ /* This function takes a character string and removes from it any characters that might have a special interpretation within an HTML source file (MIME type "text/html"), replacing them with innocuous equivalents. Specifically, the characters less-than ('<'), greater-than ('>'), ampersand ('&'), and double-quote ('"') are replaced with their entity-reference equivalents "<", ">", "&", and """; control characters other than simple whitespace (characters 0x00 through 0x1F and 0x7F, except for newline--0x0A, carriage return--0x0D, and tab--0x08) are removed; and characters above 0x80 are replaced with character-reference equivalents (0x80 is replaced with "€", for example). Note that this can result in an increase in the overall size of the string. The conversion is done on a copy of the string (the original remains unchanged). The returned string can be freed after use. */ len = strlen(input); /* Allocate output buffer (return immediately upon failure) */ if (!(output = (char *) malloc(sizeof(char) * ((6 * len) + 16)))) return NULL; for (in=0, out=0; in': /* Insert entity reference for character */ output[out++] = '&'; output[out++] = 'g'; output[out++] = 't'; output[out++] = ';'; break; case '&': /* Insert entity reference for character */ output[out++] = '&'; output[out++] = 'a'; output[out++] = 'm'; output[out++] = 'p'; output[out++] = ';'; break; case '"': /* Insert entity reference for character */ output[out++] = '&'; output[out++] = 'q'; output[out++] = 'u'; output[out++] = 'o'; output[out++] = 't'; output[out++] = ';'; break; default: if ((next == 127) || ((next < 32) && (next != 8) && (next != 10) && (next != 13))) /* Omit non-whitespace control characters */ break; else if ((next >= 128) && (next <= 255)) /* Insert character reference for character */ { output[out++] = '&'; output[out++] = '#'; if (next >= 200) { next -= 200; output[out++] = '2'; } else { next -= 100; output[out++] = '1'; } digit = next%10; next = next/10; output[out++] = (char) (next + (unsigned int) '0'); output[out++] = (char) (digit + (unsigned int) '0'); output[out++] = ';'; } else /* Insert character */ output[out++] = next; } } /* Terminate string with null */ output[out] = '\0'; /* Resize the string */ output = (char *) realloc(output, sizeof(char) * (strlen(output) + 1)); return output; } /* text_sanitize: Strip disallowed control characters from a string. */ char *text_sanitize(char *string) { register int in, out; /* Placeholders for input scan */ unsigned int next; /* Current character */ /* This function takes a null-terminated character string and removes from it control characters other than simple whitespace (characters 0x01 through 0x1F and 0x7F, except for newline--0x0A, carriage return--0x0D, and tab--0x08). It also removes all 8-bit characters--0x80 through 0xFF, replacing them with underscores. The conversion is done in-place on the original string; the return is either a pointer to this string or NULL (in the event of an error). After being converted by this routine, text should be 7-bit safe; suitable for inclusion in an e-mail message. */ if (!string) return NULL; in = 0; out = 0; while (next = (unsigned int) string[in++]) { if (next > 127) string[out++] = '_'; else if ((next == 127) || ((next < 32) && (next != 8) && (next != 10) && (next != 13))) continue; else string[out++] = (char) next; } while (out < in) string[out++] = '\0'; return string; } /* name_sanitize: Convert a character string into a valid field name. */ char *name_sanitize(char *string) { register int in, out; /* Placeholders for input scan */ unsigned int next; /* Current character */ /* This function takes a null-terminated character string and conditions it for use as a field name. It removes leading and trailing whitespace, and converts sequences of internal spaces to a single underscore. It removes all control characters and 8-bit characters. It also restricts printable characters to alphabetic (both upper and lower case) or numeric, and hyphen, underscore, and period; all other characters are converted to underscores. The conversion is done in-place on the original string; the return is either a pointer to this string or NULL (in the event of an error). */ if (!string) return NULL; /* Strip trailing whitespace (including control characters). Method: Working right-to-left from the end of the string, convert all characters to nulls until the first non-control, non-whitespace character (or the beginning of the string) is reached. (Only one index--"in"--is needed for this step.) */ in = strlen (string); while (in > 0) { next = (unsigned int) string[--in]; if ((next == 127) || (next <= 32)) string[in] = '\0'; } /* Skip leading whitespace (including control characters). Method: Scan the string left-to-right from the beginning. Two indices are used: "in" and "out". Advance the input index until the first non-control, non-whitespace character (or the end of the string) is reached. In subsequent steps, copy acceptable characters from the location of the input index to the location of the output index (i.e., shift the string contents to the left). */ in = 0; out = 0; next = (unsigned int) string[0]; while (next) { if ((next == 127) || (next <= 32)) next = (unsigned int) string[++in]; else break; } /* Copy only characters from the allowed set ('0' through '9', 'A' through 'Z' and 'a' through 'z', plus '-', '.', and '_'). Convert the first whitespace character in a sequence to an underscore, and then skip all subsequent whitespace characters in that sequence. Drop all control characters. Convert all other characters to underscores. */ while (next) { if (((next >= 48) && (next <= 57)) || ((next >= 65) && (next <= 90)) || ((next >= 97) && (next <= 122)) || (next == 45) || (next == 46) || (next == 95)) string[out++] = (char) next; else if ((next == 8) || (next == 10) || (next == 13) || (next ==32)) { string[out++] = '_'; while (((next = (unsigned int) string[++in]) == 8) || (next == 10) || (next == 13) || (next == 32)); } else if ((next == 127) || (next < 32)) /* Do nothing */; else string[out++] = '_'; next = (unsigned int) string[++in]; } /* Terminate the string */ string[out] = '\0'; while (out < in) string[out++] = '\0'; return string; } /* addr_sanitize: Verify that a string is in the form of an e-mail address. */ char *addr_sanitize(char *string) { register int in, out; /* Placeholders for input scan */ unsigned int next; /* Current character */ int state, class; /* Current state and class of input character */ static const int T[6][5] = /* State transition table for FSM parser. */ { { 6, 1, 6, 6, 6 }, /* Rows are indexed by current state, */ { 6, 1, 2, 3, 6 }, /* columns by class of input character. */ { 6, 1, 6, 6, 6 }, /* Resultant value is next state. State */ { 6, 4, 6, 6, 6 }, /* 0 is START, 6 is FAIL, 7 is SUCCEED. */ { 7, 4, 5, 6, 6 }, /* Input classes are: 0 "null" (end of */ { 6, 4, 6, 6, 6 }, /* string), 1 "alphanum", 2 "separator", */ }; /* 3 "at", and 4 "other". */ /* This function takes a null-terminated character string and attempts to condition it for use as an electronic mail address. It is (intentionally) quite strict in its assessment: so-called "explicit source routing," using UUCP bang ("!") relaying and the "%-hack" for relaying, is not supported. Multiple addresses, groups, and "route addresses" (addresses enclosed in "<" and ">" brackets) are also not supported, nor are comments allowed. The address must be a simple one, in the form: localpart@domain Whitespace and control characters are simply stripped. Quoted or escaped characters are not allowed; the character set is restricted to A through Z (upper- or lowercase), digits, and the special characters hyphen ("-"), underscore ("_"), and period ("."), which may only occur as separators (they may not be initial or terminal, and may not be adjacent to each other) within the "localpart" and "domain". Parsing of the string, after it has been stripped, is performed by a finite state machine with five input classes and six non- terminal states. (Note that one of the input classes is end-of- string; the resultant state upon presentation of this input class must always be a terminal state. In other words, the first column of the transition table can never have an entry that is a non- terminal state.) If the string does not meet these requirements, it is converted to an empty string, and the function returns a NULL pointer. */ if (!string) return NULL; /* Strip whitespace and controls */ in = 0; out = 0; next = (unsigned int) string[0]; while (next) { if ((next < 127) && (next > 32)) string[out++] = next; next = (unsigned int) string[++in]; } string[out] = '\0'; while (out < in) string[out++] = '\0'; /* Attempt to parse the string */ state = 0; in = 0; while (state < 6) { /* Get input character */ next = (unsigned int) string[in++]; /* Classify it */ if (next == 0) class = 0; else if (((next >= 48) && (next <= 57)) || ((next >= 65) && (next <= 90)) || ((next >= 97) && (next <= 122))) class = 1; else if ((next == 45) || (next == 46) || (next == 95)) class = 2; else if (next == 64) class = 3; else class = 4; /* Determine next state */ state = T[state][class]; } /* Check for success or failure, and return */ if (state == 6) { in = 0; while (string[in]) string[in++] = '\0'; return NULL; } else return string; } int toint(char c) { if ((c >= '0') && (c <= '9')) return c - '0'; if ((c >= 'A') && (c <= 'F')) return c - 'A' + 10; if ((c >= 'a') && (c <= 'f')) return c - 'a' + 10; return 0; }