mnemonicode-0.73/mnemonic.c
author Jens Alfke <jens@mooseyard.com>
Sat Mar 08 21:04:41 2008 -0800 (2008-03-08)
changeset 0 d84d25d6cdbb
permissions -rw-r--r--
Initial checkin.
     1 /* mnemonic.c
     2 
     3  Copyright (c) 2000  Oren Tirosh <oren@hishome.net>
     4 
     5  Permission is hereby granted, free of charge, to any person obtaining a copy
     6  of this software and associated documentation files (the "Software"), to deal
     7  in the Software without restriction, including without limitation the rights
     8  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9  copies of the Software, and to permit persons to whom the Software is
    10  furnished to do so, subject to the following conditions:
    11 
    12  The above copyright notice and this permission notice shall be included in
    13  all copies or substantial portions of the Software.
    14 
    15  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    18  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21  THE SOFTWARE.
    22 
    23 */
    24 
    25 #include "mnemonic.h"
    26 #include <string.h>
    27 
    28 
    29 /*
    30  * mn_words_required
    31  * 
    32  * Description:
    33  *  Calculate the number of words required to encode data using mnemonic
    34  *  encoding.
    35  *
    36  * Parameters:
    37  *  size - Size in bytes of data to be encoded
    38  * 
    39  * Return value:
    40  *  number of words required for the encoding
    41  */
    42 
    43 int
    44 mn_words_required (int size)
    45 {
    46   return ((size + 1) * 3) / 4;
    47 }
    48 
    49 
    50 /*
    51  * mn_encode_word_index
    52  *
    53  * Description:
    54  *  Perform one step of encoding binary data into words. Returns word index.
    55  *
    56  * Parameters:
    57  *   src - Pointer to data buffer to encode
    58  *   srcsize - Size in bytes of data to encode 
    59  *   n - Sequence number of word to encode
    60  *       0 <= n < mn_words_required(srcsize)
    61  *
    62  * Return value:
    63  *   0 - no more words to encode / n is out of range
    64  *   1..MN_WORDS - word index. May be used as index to the mn_words[] array
    65  */
    66 
    67 mn_index mn_encode_word_index (void *src, int srcsize, int n)
    68 {
    69   mn_word32 x = 0;		/* Temporary for MN_BASE arithmetic */
    70   int offset;			/* Offset into src */
    71   int remaining;		/* Octets remaining to end of src */
    72   int extra = 0;		/* Index 7 extra words for 24 bit data */
    73   int i;
    74 
    75   if (n < 0 || n >= mn_words_required (srcsize))
    76     return 0;			/* word out of range */
    77   offset = (n / 3) * 4;		/* byte offset into src */
    78   remaining = srcsize - offset;
    79   if (remaining <= 0)
    80     return 0;
    81   if (remaining >= 4)
    82     remaining = 4;
    83   for (i = 0; i < remaining; i++)
    84     x |= ((mn_byte *) src)[offset + i] << (i * 8);	/* endianness-agnostic */
    85 
    86   switch (n % 3)
    87     {
    88     case 2:			/* Third word of group */
    89       if (remaining == 3)	/*  special case for 24 bits */
    90 	extra = MN_BASE;	/*  use one of the 7 3-letter words */
    91       x /= (MN_BASE * MN_BASE);
    92       break;
    93     case 1:			/* Second word of group */
    94       x /= MN_BASE;
    95     }
    96   return x % MN_BASE + extra + 1;
    97 }
    98 
    99 
   100 /*
   101  * mn_encode_word
   102  *
   103  * Description:
   104  *  Perform one step of encoding binary data into words. Returns pointer 
   105  *  to word.
   106  *
   107  * Parameters:
   108  *   src - Pointer to data buffer to encode
   109  *   srcsize - Size of data to encode in bytes
   110  *   n - Sequence number of word to encode. 
   111  *       0 <= n < mn_words_required(srcsize)
   112  *
   113  * Return value:
   114  *   NULL - no more words to encode / n is out of range
   115  *   valid pointer - pointer to null-terminated lowercase word. length<=7
   116  */
   117 
   118 const char *
   119 mn_encode_word (void *src, int srcsize, int n)
   120 {
   121   return mn_words[mn_encode_word_index (src, srcsize, n)];
   122 }
   123 
   124 
   125 /*
   126  * isletter
   127  * Utility function - returns nonzero if character c is an ASCII letter.
   128  */
   129 
   130 static int
   131 isletter (char c)
   132 {
   133   return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
   134 }
   135 
   136 /*
   137  * mn_next_word_index
   138  *
   139  * Description:
   140  *  Perform one step of decoding a null-terminated buffer into word indices.
   141  *  A word is defined as a sequence of letter character separated by one
   142  *  or more non-letter separator characters.
   143  *
   144  * Parameters:
   145  *  ptr - Pointer to a pointer to the next character in the buffer.
   146  *  *ptr is modified by the function; see Return Value below.
   147  *
   148  * Return value:
   149  *  0  - If *ptr==0 (points to the null at the end of the buffer) no more 
   150  *       words were found in the buffer. Otherwise *ptr points to beginning 
   151  *       of an unrecognized word.
   152  *  >0 - index of word found, suitable for decoding with mn_decode_word_index
   153  *       or comparison to values returned from mn_encode_index. *ptr points 
   154  *       to first character of next word or to terminating null.
   155  */
   156 
   157 mn_index
   158 mn_next_word_index (char **ptr)
   159 {
   160   char *wordstart;
   161   char wordbuf[MN_WORD_BUFLEN];
   162   int i = 0;
   163   char c;
   164   mn_index idx;
   165 
   166   while (**ptr && !isletter (**ptr))	/* skip separator chars */
   167     (*ptr)++;
   168   wordstart = *ptr;		/* save for error reporting */
   169   while (**ptr && isletter (**ptr) && i < MN_WORD_BUFLEN - 1)
   170     {
   171       c = *(*ptr)++;
   172       if (c >= 'A' && c <= 'Z')
   173 	c += 'a' - 'A';		/* convert to lowercase */
   174       wordbuf[i++] = c;
   175     }
   176   wordbuf[i] = '\0';
   177   while (**ptr && isletter (**ptr))	/* skip tail of long words */
   178     (*ptr)++;
   179   while (**ptr && !isletter (**ptr))	/* skip separators */
   180     (*ptr)++;
   181 
   182   if (wordbuf[0] == '\0')
   183     return 0;			/* EOF, no word found */
   184 
   185   for (idx = 1; idx <= MN_WORDS; idx++)
   186     {
   187       if (!strcmp (wordbuf, mn_words[idx]))
   188 	return idx;
   189       /* FIXME: some fancy code should go here
   190          to accept misspellings and soundalikes.
   191          (replacing the linear search would also be nice) */
   192     }
   193   *ptr = wordstart;
   194   return 0;			/* not found */
   195 }
   196 
   197 
   198 /*
   199  * mn_decode_word_index
   200  *
   201  * Description:
   202  *  Perform one step of decoding a sequence of words into binary data.
   203  *
   204  * Parameters:
   205  *  index    - Index of word, e.g. return value of mn_next_word_index. Use
   206  *             the value MN_EOF(=0) to signal the end of input.
   207  *  dest     - Points to buffer to receive decoded binary result.
   208  *  destsize - Size of buffer 
   209  *  offset   - Pointer to an integer offset into the destination buffer for 
   210  *             next data byte. Initialize *offset to 0 before first call to 
   211  *             function. Modified by function and may be used as an 
   212  * 	       indication for the amount of data actually decoded.
   213  *
   214  * Return value:
   215  *  The return value indicates the status of the decoding function. It is
   216  *  ok to ignore this value on all calls to the function except the last
   217  *  one (with index=MN_EOF). Any errors encountered will be reported on. 
   218  *  the last call. The error code is also returned in *offset (negative 
   219  *  values indicate error).
   220  *
   221  * MN_OK (==0)	
   222  *	for index!=MN_EOF a return value of MN_OK means that 
   223  *	decoding has been successful so far.
   224  *	for index==MN_EOF a return value of MN_OK means that decoding
   225  *	of the entire buffer has been successful and the decoder is in
   226  *	a valid state for the end of the message. A total of *offset
   227  *	valid decoded bytes is in the buffer.
   228  *  MN_EREM      
   229  *	returned on MN_EOF when an unaccounted arithmetic remainder is
   230  *	in the decoder. Most likely indicates a truncated word sequence.
   231  *  MN_EOVERRUN	
   232  *	Not enough room in buffer for decoded data.
   233  *  MN_EOVERRUN24 
   234  *	Returned when decoding of data is attempted after decoding one
   235  *	of the 7 words reserved for 24 bit remainders at the end of the
   236  *	message. Probably indicates a garbled messages.
   237  *  MN_EINDEX	
   238  *	Bad input index. Naturally this should not happen when using 
   239  *	the result of mn_next_word_index.
   240  *  MN_EINDEX24
   241  *	Returned when one of the 7 words reserved for 24 bit remainders
   242  *	is received at an offset inappropriate for a 24 bit remainder.
   243  *  MN_EENCODING
   244  *	Indicates an overflow in MN_BASE arithmetic. Approximately 0.09%
   245  *	of the 3 word combinations are unused and will generate this error.
   246  */
   247 
   248 int
   249 mn_decode_word_index (mn_index index, void *dest, int destsize, int *offset)
   250 {
   251   mn_word32 x;			/* Temporary for MN_BASE arithmetic */
   252   int groupofs;
   253   int i;
   254 
   255   if (*offset < 0)		/* Error from previous call? report it */
   256     return *offset;
   257 
   258   if (index < 0 || index > MN_WORDS)	/* Word index out of range */
   259     {
   260       *offset = MN_EINDEX;
   261       return *offset;
   262     }
   263 
   264   if (*offset > destsize)	/* out of range? */
   265     {
   266       *offset = MN_EOVERRUN;
   267       return *offset;
   268     }
   269 
   270   if (index > MN_BASE && *offset % 4 != 2)
   271     {				/* Unexpected 24 bit remainder word */
   272       *offset = MN_EINDEX24;
   273       return *offset;
   274     }
   275 
   276   groupofs = *offset & ~3;	/* Offset of 4 byte group containing offet */
   277   x = 0;
   278   for (i = 0; i < 4; i++)
   279     if (groupofs + i < destsize)	/* Ignore any bytes outside buffer */
   280       x |= ((mn_byte *) dest)[groupofs + i] << (i * 8);	/* assemble number */
   281 
   282   if (index == MN_EOF)		/* Got EOF signal */
   283     {
   284       switch (*offset % 4)
   285 	{
   286 	case 3:		/* group was three words and the last */
   287 	  return MN_OK;		/*  word was a 24 bit remainder */
   288 	case 2:		/* last group has two words */
   289 	  if (x <= 0xFFFF)	/*  should encode 16 bit data */
   290 	    return MN_OK;
   291 	  else
   292 	    {
   293 	      *offset = MN_EREM;
   294 	      return *offset;
   295 	    }
   296 	case 1:		/* last group has just one word */
   297 	  if (x <= 0xFF)	/*  should encode 8 bits */
   298 	    return MN_OK;
   299 	  else
   300 	    {
   301 	      *offset = MN_EREM;
   302 	      return *offset;
   303 	    }
   304 
   305 	case 0:		/* last group was full 3 words */
   306 	  return MN_OK;
   307 	}
   308     }
   309   if (*offset == destsize)	/* At EOF but didn't get MN_EOF */
   310     {
   311       *offset = MN_EOVERRUN;
   312       return *offset;
   313     }
   314 
   315   index--;			/* 1 based to 0 based index */
   316 
   317   switch (*offset % 4)
   318     {
   319     case 3:			/* Got data past 24 bit remainder */
   320       *offset = MN_EOVERRUN24;
   321       return *offset;
   322     case 2:
   323       if (index >= MN_BASE)
   324 	{			/* 24 bit remainder */
   325 	  x += (index - MN_BASE) * MN_BASE * MN_BASE;
   326 	  (*offset)++;		/* *offset%4 == 3 for next time */
   327 	}
   328       else
   329 	{			/* catch invalid encodings */
   330 	  if (index >= 1625 || (index == 1624 && x > 1312671))
   331 	    {
   332 	      *offset = MN_EENCODING;
   333 	      return *offset;
   334 	    }
   335 	  x += index * MN_BASE * MN_BASE;
   336 	  (*offset) += 2;	/* *offset%4 == 0 for next time */
   337 	}
   338       break;
   339     case 1:
   340       x += index * MN_BASE;
   341       (*offset)++;
   342       break;
   343     case 0:
   344       x = index;
   345       (*offset)++;
   346       break;
   347     }
   348 
   349   for (i = 0; i < 4; i++)
   350     if (groupofs + i < destsize)	/* Don't step outside the buffer */
   351       {
   352 	((mn_byte *) dest)[groupofs + i] = (mn_byte) x % 256;
   353 	x /= 256;
   354       }
   355   return MN_OK;
   356 }
   357 
   358 /*
   359  * mn_encode
   360  *
   361  * Description:
   362  *  Encode a binary data buffer into a null-terminated sequence of words.
   363  *  The word separators are taken from the format string. 
   364  *
   365  * Parameters:
   366  *  src      - Pointer to the beginning of the binary data buffer.
   367  *  srcsize  - Size in bytes of binary data buffer
   368  *  dest     - Pointer to the beginning of a character buffer 
   369  *  destsize - Size in characters of character buffer
   370  *  format   - Null-terminated string describing the output format.
   371  *             In the format string any letter or sequence of letters
   372  *             acts as a placeholder for the encoded words. The word 
   373  *             placeholders are separated by one or more non-letter
   374  *             characters. When the encoder reaches the end of the 
   375  *             format string it starts reading it again.
   376  *             For sample formats see MN_F* constants in mnemonic.h
   377  *	       If format is empty or NULL the format MN_FDEFAULT
   378  *	       is used.
   379  *
   380  * Return value:
   381  *  MN_OK(=0)
   382  *	Encoding was successful.
   383  *  MN_EOVERRUN
   384  *	Output size exceeds size of destination buffer
   385  *  MN_EFORMAT
   386  *	Invalid format string. This function enforces formats which
   387  *	will result in a string which can be successfully decoded by
   388  *	the mn_decode function.
   389  */
   390 
   391 int
   392 mn_encode (void *src, int srcsize, char *dest, int destsize, char *format)
   393 {
   394   int n;
   395   char *fmt;
   396   char *destend = dest + destsize;
   397   const char *word;
   398 
   399   if (format == 0 || format[0] == '\0')
   400     format = MN_FDEFAULT;
   401   fmt = format;
   402   for (n = 0; n < mn_words_required (srcsize); n++)
   403     {
   404       while (dest < destend && *fmt != '\0' && !isletter (*fmt))
   405 	*dest++ = *fmt++;
   406       if (dest >= destend)
   407 	return MN_EOVERRUN;
   408       if (*fmt == '\0')
   409 	{
   410 	  if (isletter (fmt[-1]) && isletter (format[0]))
   411 	    return MN_EFORMAT;
   412 	  fmt = format;
   413 	  while (dest < destend && *fmt != '\0' && !isletter (*fmt))
   414 	    *dest++ = *fmt++;
   415 	  if (!isletter (*fmt))
   416 	    return MN_EFORMAT;
   417 	}
   418       word = mn_encode_word (src, srcsize, n);
   419       if (word == 0)
   420 	return MN_EOVERRUN;	/* shouldn't happen, actually */
   421 
   422       while (isletter (*fmt))
   423 	fmt++;
   424       while (dest < destend && *word != '\0')
   425 	*dest++ = *word++;
   426     }
   427   if (dest < destend)
   428     *dest++ = '\0';
   429   else
   430     return MN_EOVERRUN;
   431   return MN_OK;
   432 }
   433 
   434 /*
   435  * mn_decode
   436  *
   437  * Description:
   438  *  Decode a text representation in null-terminated character buffer src to 
   439  *  binary buffer dest.
   440  *
   441  * Parameters:
   442  *  src      - Pointer to null-terminated character buffer 
   443  *  dest     - Pointer to beginning of destination buffer
   444  *  destsize - Size in bytes of destination buffer
   445  *
   446  * Return value:
   447  *  This function may return all the value returned by mn_decode_word_index
   448  *  plus the following result code:
   449  *
   450  * MN_EWORD  - Unrecognized word.
   451  */
   452 
   453 int
   454 mn_decode (char *src, void *dest, int destsize)
   455 {
   456   mn_index index;
   457   int offset = 0;
   458 
   459   while ((index = mn_next_word_index (&src)) != 0)
   460     {
   461       if (index == 0 && *src != 0)
   462 	return MN_EWORD;
   463       (void) mn_decode_word_index (index, dest, destsize, &offset);
   464     }
   465   (void) mn_decode_word_index (MN_EOF, dest, destsize, &offset);
   466   return offset;
   467 }