jens@0: /* mnemonic.c
jens@0: 
jens@0:  Copyright (c) 2000  Oren Tirosh <oren@hishome.net>
jens@0: 
jens@0:  Permission is hereby granted, free of charge, to any person obtaining a copy
jens@0:  of this software and associated documentation files (the "Software"), to deal
jens@0:  in the Software without restriction, including without limitation the rights
jens@0:  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
jens@0:  copies of the Software, and to permit persons to whom the Software is
jens@0:  furnished to do so, subject to the following conditions:
jens@0: 
jens@0:  The above copyright notice and this permission notice shall be included in
jens@0:  all copies or substantial portions of the Software.
jens@0: 
jens@0:  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
jens@0:  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
jens@0:  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
jens@0:  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
jens@0:  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
jens@0:  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
jens@0:  THE SOFTWARE.
jens@0: 
jens@0: */
jens@0: 
jens@0: #include "mnemonic.h"
jens@0: #include <string.h>
jens@0: 
jens@0: 
jens@0: /*
jens@0:  * mn_words_required
jens@0:  * 
jens@0:  * Description:
jens@0:  *  Calculate the number of words required to encode data using mnemonic
jens@0:  *  encoding.
jens@0:  *
jens@0:  * Parameters:
jens@0:  *  size - Size in bytes of data to be encoded
jens@0:  * 
jens@0:  * Return value:
jens@0:  *  number of words required for the encoding
jens@0:  */
jens@0: 
jens@0: int
jens@0: mn_words_required (int size)
jens@0: {
jens@0:   return ((size + 1) * 3) / 4;
jens@0: }
jens@0: 
jens@0: 
jens@0: /*
jens@0:  * mn_encode_word_index
jens@0:  *
jens@0:  * Description:
jens@0:  *  Perform one step of encoding binary data into words. Returns word index.
jens@0:  *
jens@0:  * Parameters:
jens@0:  *   src - Pointer to data buffer to encode
jens@0:  *   srcsize - Size in bytes of data to encode 
jens@0:  *   n - Sequence number of word to encode
jens@0:  *       0 <= n < mn_words_required(srcsize)
jens@0:  *
jens@0:  * Return value:
jens@0:  *   0 - no more words to encode / n is out of range
jens@0:  *   1..MN_WORDS - word index. May be used as index to the mn_words[] array
jens@0:  */
jens@0: 
jens@0: mn_index mn_encode_word_index (void *src, int srcsize, int n)
jens@0: {
jens@0:   mn_word32 x = 0;		/* Temporary for MN_BASE arithmetic */
jens@0:   int offset;			/* Offset into src */
jens@0:   int remaining;		/* Octets remaining to end of src */
jens@0:   int extra = 0;		/* Index 7 extra words for 24 bit data */
jens@0:   int i;
jens@0: 
jens@0:   if (n < 0 || n >= mn_words_required (srcsize))
jens@0:     return 0;			/* word out of range */
jens@0:   offset = (n / 3) * 4;		/* byte offset into src */
jens@0:   remaining = srcsize - offset;
jens@0:   if (remaining <= 0)
jens@0:     return 0;
jens@0:   if (remaining >= 4)
jens@0:     remaining = 4;
jens@0:   for (i = 0; i < remaining; i++)
jens@0:     x |= ((mn_byte *) src)[offset + i] << (i * 8);	/* endianness-agnostic */
jens@0: 
jens@0:   switch (n % 3)
jens@0:     {
jens@0:     case 2:			/* Third word of group */
jens@0:       if (remaining == 3)	/*  special case for 24 bits */
jens@0: 	extra = MN_BASE;	/*  use one of the 7 3-letter words */
jens@0:       x /= (MN_BASE * MN_BASE);
jens@0:       break;
jens@0:     case 1:			/* Second word of group */
jens@0:       x /= MN_BASE;
jens@0:     }
jens@0:   return x % MN_BASE + extra + 1;
jens@0: }
jens@0: 
jens@0: 
jens@0: /*
jens@0:  * mn_encode_word
jens@0:  *
jens@0:  * Description:
jens@0:  *  Perform one step of encoding binary data into words. Returns pointer 
jens@0:  *  to word.
jens@0:  *
jens@0:  * Parameters:
jens@0:  *   src - Pointer to data buffer to encode
jens@0:  *   srcsize - Size of data to encode in bytes
jens@0:  *   n - Sequence number of word to encode. 
jens@0:  *       0 <= n < mn_words_required(srcsize)
jens@0:  *
jens@0:  * Return value:
jens@0:  *   NULL - no more words to encode / n is out of range
jens@0:  *   valid pointer - pointer to null-terminated lowercase word. length<=7
jens@0:  */
jens@0: 
jens@0: const char *
jens@0: mn_encode_word (void *src, int srcsize, int n)
jens@0: {
jens@0:   return mn_words[mn_encode_word_index (src, srcsize, n)];
jens@0: }
jens@0: 
jens@0: 
jens@0: /*
jens@0:  * isletter
jens@0:  * Utility function - returns nonzero if character c is an ASCII letter.
jens@0:  */
jens@0: 
jens@0: static int
jens@0: isletter (char c)
jens@0: {
jens@0:   return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
jens@0: }
jens@0: 
jens@0: /*
jens@0:  * mn_next_word_index
jens@0:  *
jens@0:  * Description:
jens@0:  *  Perform one step of decoding a null-terminated buffer into word indices.
jens@0:  *  A word is defined as a sequence of letter character separated by one
jens@0:  *  or more non-letter separator characters.
jens@0:  *
jens@0:  * Parameters:
jens@0:  *  ptr - Pointer to a pointer to the next character in the buffer.
jens@0:  *  *ptr is modified by the function; see Return Value below.
jens@0:  *
jens@0:  * Return value:
jens@0:  *  0  - If *ptr==0 (points to the null at the end of the buffer) no more 
jens@0:  *       words were found in the buffer. Otherwise *ptr points to beginning 
jens@0:  *       of an unrecognized word.
jens@0:  *  >0 - index of word found, suitable for decoding with mn_decode_word_index
jens@0:  *       or comparison to values returned from mn_encode_index. *ptr points 
jens@0:  *       to first character of next word or to terminating null.
jens@0:  */
jens@0: 
jens@0: mn_index
jens@0: mn_next_word_index (char **ptr)
jens@0: {
jens@0:   char *wordstart;
jens@0:   char wordbuf[MN_WORD_BUFLEN];
jens@0:   int i = 0;
jens@0:   char c;
jens@0:   mn_index idx;
jens@0: 
jens@0:   while (**ptr && !isletter (**ptr))	/* skip separator chars */
jens@0:     (*ptr)++;
jens@0:   wordstart = *ptr;		/* save for error reporting */
jens@0:   while (**ptr && isletter (**ptr) && i < MN_WORD_BUFLEN - 1)
jens@0:     {
jens@0:       c = *(*ptr)++;
jens@0:       if (c >= 'A' && c <= 'Z')
jens@0: 	c += 'a' - 'A';		/* convert to lowercase */
jens@0:       wordbuf[i++] = c;
jens@0:     }
jens@0:   wordbuf[i] = '\0';
jens@0:   while (**ptr && isletter (**ptr))	/* skip tail of long words */
jens@0:     (*ptr)++;
jens@0:   while (**ptr && !isletter (**ptr))	/* skip separators */
jens@0:     (*ptr)++;
jens@0: 
jens@0:   if (wordbuf[0] == '\0')
jens@0:     return 0;			/* EOF, no word found */
jens@0: 
jens@0:   for (idx = 1; idx <= MN_WORDS; idx++)
jens@0:     {
jens@0:       if (!strcmp (wordbuf, mn_words[idx]))
jens@0: 	return idx;
jens@0:       /* FIXME: some fancy code should go here
jens@0:          to accept misspellings and soundalikes.
jens@0:          (replacing the linear search would also be nice) */
jens@0:     }
jens@0:   *ptr = wordstart;
jens@0:   return 0;			/* not found */
jens@0: }
jens@0: 
jens@0: 
jens@0: /*
jens@0:  * mn_decode_word_index
jens@0:  *
jens@0:  * Description:
jens@0:  *  Perform one step of decoding a sequence of words into binary data.
jens@0:  *
jens@0:  * Parameters:
jens@0:  *  index    - Index of word, e.g. return value of mn_next_word_index. Use
jens@0:  *             the value MN_EOF(=0) to signal the end of input.
jens@0:  *  dest     - Points to buffer to receive decoded binary result.
jens@0:  *  destsize - Size of buffer 
jens@0:  *  offset   - Pointer to an integer offset into the destination buffer for 
jens@0:  *             next data byte. Initialize *offset to 0 before first call to 
jens@0:  *             function. Modified by function and may be used as an 
jens@0:  * 	       indication for the amount of data actually decoded.
jens@0:  *
jens@0:  * Return value:
jens@0:  *  The return value indicates the status of the decoding function. It is
jens@0:  *  ok to ignore this value on all calls to the function except the last
jens@0:  *  one (with index=MN_EOF). Any errors encountered will be reported on. 
jens@0:  *  the last call. The error code is also returned in *offset (negative 
jens@0:  *  values indicate error).
jens@0:  *
jens@0:  * MN_OK (==0)	
jens@0:  *	for index!=MN_EOF a return value of MN_OK means that 
jens@0:  *	decoding has been successful so far.
jens@0:  *	for index==MN_EOF a return value of MN_OK means that decoding
jens@0:  *	of the entire buffer has been successful and the decoder is in
jens@0:  *	a valid state for the end of the message. A total of *offset
jens@0:  *	valid decoded bytes is in the buffer.
jens@0:  *  MN_EREM      
jens@0:  *	returned on MN_EOF when an unaccounted arithmetic remainder is
jens@0:  *	in the decoder. Most likely indicates a truncated word sequence.
jens@0:  *  MN_EOVERRUN	
jens@0:  *	Not enough room in buffer for decoded data.
jens@0:  *  MN_EOVERRUN24 
jens@0:  *	Returned when decoding of data is attempted after decoding one
jens@0:  *	of the 7 words reserved for 24 bit remainders at the end of the
jens@0:  *	message. Probably indicates a garbled messages.
jens@0:  *  MN_EINDEX	
jens@0:  *	Bad input index. Naturally this should not happen when using 
jens@0:  *	the result of mn_next_word_index.
jens@0:  *  MN_EINDEX24
jens@0:  *	Returned when one of the 7 words reserved for 24 bit remainders
jens@0:  *	is received at an offset inappropriate for a 24 bit remainder.
jens@0:  *  MN_EENCODING
jens@0:  *	Indicates an overflow in MN_BASE arithmetic. Approximately 0.09%
jens@0:  *	of the 3 word combinations are unused and will generate this error.
jens@0:  */
jens@0: 
jens@0: int
jens@0: mn_decode_word_index (mn_index index, void *dest, int destsize, int *offset)
jens@0: {
jens@0:   mn_word32 x;			/* Temporary for MN_BASE arithmetic */
jens@0:   int groupofs;
jens@0:   int i;
jens@0: 
jens@0:   if (*offset < 0)		/* Error from previous call? report it */
jens@0:     return *offset;
jens@0: 
jens@0:   if (index < 0 || index > MN_WORDS)	/* Word index out of range */
jens@0:     {
jens@0:       *offset = MN_EINDEX;
jens@0:       return *offset;
jens@0:     }
jens@0: 
jens@0:   if (*offset > destsize)	/* out of range? */
jens@0:     {
jens@0:       *offset = MN_EOVERRUN;
jens@0:       return *offset;
jens@0:     }
jens@0: 
jens@0:   if (index > MN_BASE && *offset % 4 != 2)
jens@0:     {				/* Unexpected 24 bit remainder word */
jens@0:       *offset = MN_EINDEX24;
jens@0:       return *offset;
jens@0:     }
jens@0: 
jens@0:   groupofs = *offset & ~3;	/* Offset of 4 byte group containing offet */
jens@0:   x = 0;
jens@0:   for (i = 0; i < 4; i++)
jens@0:     if (groupofs + i < destsize)	/* Ignore any bytes outside buffer */
jens@0:       x |= ((mn_byte *) dest)[groupofs + i] << (i * 8);	/* assemble number */
jens@0: 
jens@0:   if (index == MN_EOF)		/* Got EOF signal */
jens@0:     {
jens@0:       switch (*offset % 4)
jens@0: 	{
jens@0: 	case 3:		/* group was three words and the last */
jens@0: 	  return MN_OK;		/*  word was a 24 bit remainder */
jens@0: 	case 2:		/* last group has two words */
jens@0: 	  if (x <= 0xFFFF)	/*  should encode 16 bit data */
jens@0: 	    return MN_OK;
jens@0: 	  else
jens@0: 	    {
jens@0: 	      *offset = MN_EREM;
jens@0: 	      return *offset;
jens@0: 	    }
jens@0: 	case 1:		/* last group has just one word */
jens@0: 	  if (x <= 0xFF)	/*  should encode 8 bits */
jens@0: 	    return MN_OK;
jens@0: 	  else
jens@0: 	    {
jens@0: 	      *offset = MN_EREM;
jens@0: 	      return *offset;
jens@0: 	    }
jens@0: 
jens@0: 	case 0:		/* last group was full 3 words */
jens@0: 	  return MN_OK;
jens@0: 	}
jens@0:     }
jens@0:   if (*offset == destsize)	/* At EOF but didn't get MN_EOF */
jens@0:     {
jens@0:       *offset = MN_EOVERRUN;
jens@0:       return *offset;
jens@0:     }
jens@0: 
jens@0:   index--;			/* 1 based to 0 based index */
jens@0: 
jens@0:   switch (*offset % 4)
jens@0:     {
jens@0:     case 3:			/* Got data past 24 bit remainder */
jens@0:       *offset = MN_EOVERRUN24;
jens@0:       return *offset;
jens@0:     case 2:
jens@0:       if (index >= MN_BASE)
jens@0: 	{			/* 24 bit remainder */
jens@0: 	  x += (index - MN_BASE) * MN_BASE * MN_BASE;
jens@0: 	  (*offset)++;		/* *offset%4 == 3 for next time */
jens@0: 	}
jens@0:       else
jens@0: 	{			/* catch invalid encodings */
jens@0: 	  if (index >= 1625 || (index == 1624 && x > 1312671))
jens@0: 	    {
jens@0: 	      *offset = MN_EENCODING;
jens@0: 	      return *offset;
jens@0: 	    }
jens@0: 	  x += index * MN_BASE * MN_BASE;
jens@0: 	  (*offset) += 2;	/* *offset%4 == 0 for next time */
jens@0: 	}
jens@0:       break;
jens@0:     case 1:
jens@0:       x += index * MN_BASE;
jens@0:       (*offset)++;
jens@0:       break;
jens@0:     case 0:
jens@0:       x = index;
jens@0:       (*offset)++;
jens@0:       break;
jens@0:     }
jens@0: 
jens@0:   for (i = 0; i < 4; i++)
jens@0:     if (groupofs + i < destsize)	/* Don't step outside the buffer */
jens@0:       {
jens@0: 	((mn_byte *) dest)[groupofs + i] = (mn_byte) x % 256;
jens@0: 	x /= 256;
jens@0:       }
jens@0:   return MN_OK;
jens@0: }
jens@0: 
jens@0: /*
jens@0:  * mn_encode
jens@0:  *
jens@0:  * Description:
jens@0:  *  Encode a binary data buffer into a null-terminated sequence of words.
jens@0:  *  The word separators are taken from the format string. 
jens@0:  *
jens@0:  * Parameters:
jens@0:  *  src      - Pointer to the beginning of the binary data buffer.
jens@0:  *  srcsize  - Size in bytes of binary data buffer
jens@0:  *  dest     - Pointer to the beginning of a character buffer 
jens@0:  *  destsize - Size in characters of character buffer
jens@0:  *  format   - Null-terminated string describing the output format.
jens@0:  *             In the format string any letter or sequence of letters
jens@0:  *             acts as a placeholder for the encoded words. The word 
jens@0:  *             placeholders are separated by one or more non-letter
jens@0:  *             characters. When the encoder reaches the end of the 
jens@0:  *             format string it starts reading it again.
jens@0:  *             For sample formats see MN_F* constants in mnemonic.h
jens@0:  *	       If format is empty or NULL the format MN_FDEFAULT
jens@0:  *	       is used.
jens@0:  *
jens@0:  * Return value:
jens@0:  *  MN_OK(=0)
jens@0:  *	Encoding was successful.
jens@0:  *  MN_EOVERRUN
jens@0:  *	Output size exceeds size of destination buffer
jens@0:  *  MN_EFORMAT
jens@0:  *	Invalid format string. This function enforces formats which
jens@0:  *	will result in a string which can be successfully decoded by
jens@0:  *	the mn_decode function.
jens@0:  */
jens@0: 
jens@0: int
jens@0: mn_encode (void *src, int srcsize, char *dest, int destsize, char *format)
jens@0: {
jens@0:   int n;
jens@0:   char *fmt;
jens@0:   char *destend = dest + destsize;
jens@0:   const char *word;
jens@0: 
jens@0:   if (format == 0 || format[0] == '\0')
jens@0:     format = MN_FDEFAULT;
jens@0:   fmt = format;
jens@0:   for (n = 0; n < mn_words_required (srcsize); n++)
jens@0:     {
jens@0:       while (dest < destend && *fmt != '\0' && !isletter (*fmt))
jens@0: 	*dest++ = *fmt++;
jens@0:       if (dest >= destend)
jens@0: 	return MN_EOVERRUN;
jens@0:       if (*fmt == '\0')
jens@0: 	{
jens@0: 	  if (isletter (fmt[-1]) && isletter (format[0]))
jens@0: 	    return MN_EFORMAT;
jens@0: 	  fmt = format;
jens@0: 	  while (dest < destend && *fmt != '\0' && !isletter (*fmt))
jens@0: 	    *dest++ = *fmt++;
jens@0: 	  if (!isletter (*fmt))
jens@0: 	    return MN_EFORMAT;
jens@0: 	}
jens@0:       word = mn_encode_word (src, srcsize, n);
jens@0:       if (word == 0)
jens@0: 	return MN_EOVERRUN;	/* shouldn't happen, actually */
jens@0: 
jens@0:       while (isletter (*fmt))
jens@0: 	fmt++;
jens@0:       while (dest < destend && *word != '\0')
jens@0: 	*dest++ = *word++;
jens@0:     }
jens@0:   if (dest < destend)
jens@0:     *dest++ = '\0';
jens@0:   else
jens@0:     return MN_EOVERRUN;
jens@0:   return MN_OK;
jens@0: }
jens@0: 
jens@0: /*
jens@0:  * mn_decode
jens@0:  *
jens@0:  * Description:
jens@0:  *  Decode a text representation in null-terminated character buffer src to 
jens@0:  *  binary buffer dest.
jens@0:  *
jens@0:  * Parameters:
jens@0:  *  src      - Pointer to null-terminated character buffer 
jens@0:  *  dest     - Pointer to beginning of destination buffer
jens@0:  *  destsize - Size in bytes of destination buffer
jens@0:  *
jens@0:  * Return value:
jens@0:  *  This function may return all the value returned by mn_decode_word_index
jens@0:  *  plus the following result code:
jens@0:  *
jens@0:  * MN_EWORD  - Unrecognized word.
jens@0:  */
jens@0: 
jens@0: int
jens@0: mn_decode (char *src, void *dest, int destsize)
jens@0: {
jens@0:   mn_index index;
jens@0:   int offset = 0;
jens@0: 
jens@0:   while ((index = mn_next_word_index (&src)) != 0)
jens@0:     {
jens@0:       if (index == 0 && *src != 0)
jens@0: 	return MN_EWORD;
jens@0:       (void) mn_decode_word_index (index, dest, destsize, &offset);
jens@0:     }
jens@0:   (void) mn_decode_word_index (MN_EOF, dest, destsize, &offset);
jens@0:   return offset;
jens@0: }