mnemonicode-0.73/mnemonic.c
changeset 19 5ade3e09a827
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/mnemonicode-0.73/mnemonic.c	Tue Mar 10 22:34:39 2009 -0700
     1.3 @@ -0,0 +1,467 @@
     1.4 +/* mnemonic.c
     1.5 +
     1.6 + Copyright (c) 2000  Oren Tirosh <oren@hishome.net>
     1.7 +
     1.8 + Permission is hereby granted, free of charge, to any person obtaining a copy
     1.9 + of this software and associated documentation files (the "Software"), to deal
    1.10 + in the Software without restriction, including without limitation the rights
    1.11 + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    1.12 + copies of the Software, and to permit persons to whom the Software is
    1.13 + furnished to do so, subject to the following conditions:
    1.14 +
    1.15 + The above copyright notice and this permission notice shall be included in
    1.16 + all copies or substantial portions of the Software.
    1.17 +
    1.18 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    1.19 + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    1.20 + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    1.21 + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    1.22 + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    1.23 + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    1.24 + THE SOFTWARE.
    1.25 +
    1.26 +*/
    1.27 +
    1.28 +#include "mnemonic.h"
    1.29 +#include <string.h>
    1.30 +
    1.31 +
    1.32 +/*
    1.33 + * mn_words_required
    1.34 + * 
    1.35 + * Description:
    1.36 + *  Calculate the number of words required to encode data using mnemonic
    1.37 + *  encoding.
    1.38 + *
    1.39 + * Parameters:
    1.40 + *  size - Size in bytes of data to be encoded
    1.41 + * 
    1.42 + * Return value:
    1.43 + *  number of words required for the encoding
    1.44 + */
    1.45 +
    1.46 +int
    1.47 +mn_words_required (int size)
    1.48 +{
    1.49 +  return ((size + 1) * 3) / 4;
    1.50 +}
    1.51 +
    1.52 +
    1.53 +/*
    1.54 + * mn_encode_word_index
    1.55 + *
    1.56 + * Description:
    1.57 + *  Perform one step of encoding binary data into words. Returns word index.
    1.58 + *
    1.59 + * Parameters:
    1.60 + *   src - Pointer to data buffer to encode
    1.61 + *   srcsize - Size in bytes of data to encode 
    1.62 + *   n - Sequence number of word to encode
    1.63 + *       0 <= n < mn_words_required(srcsize)
    1.64 + *
    1.65 + * Return value:
    1.66 + *   0 - no more words to encode / n is out of range
    1.67 + *   1..MN_WORDS - word index. May be used as index to the mn_words[] array
    1.68 + */
    1.69 +
    1.70 +mn_index mn_encode_word_index (void *src, int srcsize, int n)
    1.71 +{
    1.72 +  mn_word32 x = 0;		/* Temporary for MN_BASE arithmetic */
    1.73 +  int offset;			/* Offset into src */
    1.74 +  int remaining;		/* Octets remaining to end of src */
    1.75 +  int extra = 0;		/* Index 7 extra words for 24 bit data */
    1.76 +  int i;
    1.77 +
    1.78 +  if (n < 0 || n >= mn_words_required (srcsize))
    1.79 +    return 0;			/* word out of range */
    1.80 +  offset = (n / 3) * 4;		/* byte offset into src */
    1.81 +  remaining = srcsize - offset;
    1.82 +  if (remaining <= 0)
    1.83 +    return 0;
    1.84 +  if (remaining >= 4)
    1.85 +    remaining = 4;
    1.86 +  for (i = 0; i < remaining; i++)
    1.87 +    x |= ((mn_byte *) src)[offset + i] << (i * 8);	/* endianness-agnostic */
    1.88 +
    1.89 +  switch (n % 3)
    1.90 +    {
    1.91 +    case 2:			/* Third word of group */
    1.92 +      if (remaining == 3)	/*  special case for 24 bits */
    1.93 +	extra = MN_BASE;	/*  use one of the 7 3-letter words */
    1.94 +      x /= (MN_BASE * MN_BASE);
    1.95 +      break;
    1.96 +    case 1:			/* Second word of group */
    1.97 +      x /= MN_BASE;
    1.98 +    }
    1.99 +  return x % MN_BASE + extra + 1;
   1.100 +}
   1.101 +
   1.102 +
   1.103 +/*
   1.104 + * mn_encode_word
   1.105 + *
   1.106 + * Description:
   1.107 + *  Perform one step of encoding binary data into words. Returns pointer 
   1.108 + *  to word.
   1.109 + *
   1.110 + * Parameters:
   1.111 + *   src - Pointer to data buffer to encode
   1.112 + *   srcsize - Size of data to encode in bytes
   1.113 + *   n - Sequence number of word to encode. 
   1.114 + *       0 <= n < mn_words_required(srcsize)
   1.115 + *
   1.116 + * Return value:
   1.117 + *   NULL - no more words to encode / n is out of range
   1.118 + *   valid pointer - pointer to null-terminated lowercase word. length<=7
   1.119 + */
   1.120 +
   1.121 +const char *
   1.122 +mn_encode_word (void *src, int srcsize, int n)
   1.123 +{
   1.124 +  return mn_words[mn_encode_word_index (src, srcsize, n)];
   1.125 +}
   1.126 +
   1.127 +
   1.128 +/*
   1.129 + * isletter
   1.130 + * Utility function - returns nonzero if character c is an ASCII letter.
   1.131 + */
   1.132 +
   1.133 +static int
   1.134 +isletter (char c)
   1.135 +{
   1.136 +  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
   1.137 +}
   1.138 +
   1.139 +/*
   1.140 + * mn_next_word_index
   1.141 + *
   1.142 + * Description:
   1.143 + *  Perform one step of decoding a null-terminated buffer into word indices.
   1.144 + *  A word is defined as a sequence of letter character separated by one
   1.145 + *  or more non-letter separator characters.
   1.146 + *
   1.147 + * Parameters:
   1.148 + *  ptr - Pointer to a pointer to the next character in the buffer.
   1.149 + *  *ptr is modified by the function; see Return Value below.
   1.150 + *
   1.151 + * Return value:
   1.152 + *  0  - If *ptr==0 (points to the null at the end of the buffer) no more 
   1.153 + *       words were found in the buffer. Otherwise *ptr points to beginning 
   1.154 + *       of an unrecognized word.
   1.155 + *  >0 - index of word found, suitable for decoding with mn_decode_word_index
   1.156 + *       or comparison to values returned from mn_encode_index. *ptr points 
   1.157 + *       to first character of next word or to terminating null.
   1.158 + */
   1.159 +
   1.160 +mn_index
   1.161 +mn_next_word_index (char **ptr)
   1.162 +{
   1.163 +  char *wordstart;
   1.164 +  char wordbuf[MN_WORD_BUFLEN];
   1.165 +  int i = 0;
   1.166 +  char c;
   1.167 +  mn_index idx;
   1.168 +
   1.169 +  while (**ptr && !isletter (**ptr))	/* skip separator chars */
   1.170 +    (*ptr)++;
   1.171 +  wordstart = *ptr;		/* save for error reporting */
   1.172 +  while (**ptr && isletter (**ptr) && i < MN_WORD_BUFLEN - 1)
   1.173 +    {
   1.174 +      c = *(*ptr)++;
   1.175 +      if (c >= 'A' && c <= 'Z')
   1.176 +	c += 'a' - 'A';		/* convert to lowercase */
   1.177 +      wordbuf[i++] = c;
   1.178 +    }
   1.179 +  wordbuf[i] = '\0';
   1.180 +  while (**ptr && isletter (**ptr))	/* skip tail of long words */
   1.181 +    (*ptr)++;
   1.182 +  while (**ptr && !isletter (**ptr))	/* skip separators */
   1.183 +    (*ptr)++;
   1.184 +
   1.185 +  if (wordbuf[0] == '\0')
   1.186 +    return 0;			/* EOF, no word found */
   1.187 +
   1.188 +  for (idx = 1; idx <= MN_WORDS; idx++)
   1.189 +    {
   1.190 +      if (!strcmp (wordbuf, mn_words[idx]))
   1.191 +	return idx;
   1.192 +      /* FIXME: some fancy code should go here
   1.193 +         to accept misspellings and soundalikes.
   1.194 +         (replacing the linear search would also be nice) */
   1.195 +    }
   1.196 +  *ptr = wordstart;
   1.197 +  return 0;			/* not found */
   1.198 +}
   1.199 +
   1.200 +
   1.201 +/*
   1.202 + * mn_decode_word_index
   1.203 + *
   1.204 + * Description:
   1.205 + *  Perform one step of decoding a sequence of words into binary data.
   1.206 + *
   1.207 + * Parameters:
   1.208 + *  index    - Index of word, e.g. return value of mn_next_word_index. Use
   1.209 + *             the value MN_EOF(=0) to signal the end of input.
   1.210 + *  dest     - Points to buffer to receive decoded binary result.
   1.211 + *  destsize - Size of buffer 
   1.212 + *  offset   - Pointer to an integer offset into the destination buffer for 
   1.213 + *             next data byte. Initialize *offset to 0 before first call to 
   1.214 + *             function. Modified by function and may be used as an 
   1.215 + * 	       indication for the amount of data actually decoded.
   1.216 + *
   1.217 + * Return value:
   1.218 + *  The return value indicates the status of the decoding function. It is
   1.219 + *  ok to ignore this value on all calls to the function except the last
   1.220 + *  one (with index=MN_EOF). Any errors encountered will be reported on. 
   1.221 + *  the last call. The error code is also returned in *offset (negative 
   1.222 + *  values indicate error).
   1.223 + *
   1.224 + * MN_OK (==0)	
   1.225 + *	for index!=MN_EOF a return value of MN_OK means that 
   1.226 + *	decoding has been successful so far.
   1.227 + *	for index==MN_EOF a return value of MN_OK means that decoding
   1.228 + *	of the entire buffer has been successful and the decoder is in
   1.229 + *	a valid state for the end of the message. A total of *offset
   1.230 + *	valid decoded bytes is in the buffer.
   1.231 + *  MN_EREM      
   1.232 + *	returned on MN_EOF when an unaccounted arithmetic remainder is
   1.233 + *	in the decoder. Most likely indicates a truncated word sequence.
   1.234 + *  MN_EOVERRUN	
   1.235 + *	Not enough room in buffer for decoded data.
   1.236 + *  MN_EOVERRUN24 
   1.237 + *	Returned when decoding of data is attempted after decoding one
   1.238 + *	of the 7 words reserved for 24 bit remainders at the end of the
   1.239 + *	message. Probably indicates a garbled messages.
   1.240 + *  MN_EINDEX	
   1.241 + *	Bad input index. Naturally this should not happen when using 
   1.242 + *	the result of mn_next_word_index.
   1.243 + *  MN_EINDEX24
   1.244 + *	Returned when one of the 7 words reserved for 24 bit remainders
   1.245 + *	is received at an offset inappropriate for a 24 bit remainder.
   1.246 + *  MN_EENCODING
   1.247 + *	Indicates an overflow in MN_BASE arithmetic. Approximately 0.09%
   1.248 + *	of the 3 word combinations are unused and will generate this error.
   1.249 + */
   1.250 +
   1.251 +int
   1.252 +mn_decode_word_index (mn_index index, void *dest, int destsize, int *offset)
   1.253 +{
   1.254 +  mn_word32 x;			/* Temporary for MN_BASE arithmetic */
   1.255 +  int groupofs;
   1.256 +  int i;
   1.257 +
   1.258 +  if (*offset < 0)		/* Error from previous call? report it */
   1.259 +    return *offset;
   1.260 +
   1.261 +  if (index < 0 || index > MN_WORDS)	/* Word index out of range */
   1.262 +    {
   1.263 +      *offset = MN_EINDEX;
   1.264 +      return *offset;
   1.265 +    }
   1.266 +
   1.267 +  if (*offset > destsize)	/* out of range? */
   1.268 +    {
   1.269 +      *offset = MN_EOVERRUN;
   1.270 +      return *offset;
   1.271 +    }
   1.272 +
   1.273 +  if (index > MN_BASE && *offset % 4 != 2)
   1.274 +    {				/* Unexpected 24 bit remainder word */
   1.275 +      *offset = MN_EINDEX24;
   1.276 +      return *offset;
   1.277 +    }
   1.278 +
   1.279 +  groupofs = *offset & ~3;	/* Offset of 4 byte group containing offet */
   1.280 +  x = 0;
   1.281 +  for (i = 0; i < 4; i++)
   1.282 +    if (groupofs + i < destsize)	/* Ignore any bytes outside buffer */
   1.283 +      x |= ((mn_byte *) dest)[groupofs + i] << (i * 8);	/* assemble number */
   1.284 +
   1.285 +  if (index == MN_EOF)		/* Got EOF signal */
   1.286 +    {
   1.287 +      switch (*offset % 4)
   1.288 +	{
   1.289 +	case 3:		/* group was three words and the last */
   1.290 +	  return MN_OK;		/*  word was a 24 bit remainder */
   1.291 +	case 2:		/* last group has two words */
   1.292 +	  if (x <= 0xFFFF)	/*  should encode 16 bit data */
   1.293 +	    return MN_OK;
   1.294 +	  else
   1.295 +	    {
   1.296 +	      *offset = MN_EREM;
   1.297 +	      return *offset;
   1.298 +	    }
   1.299 +	case 1:		/* last group has just one word */
   1.300 +	  if (x <= 0xFF)	/*  should encode 8 bits */
   1.301 +	    return MN_OK;
   1.302 +	  else
   1.303 +	    {
   1.304 +	      *offset = MN_EREM;
   1.305 +	      return *offset;
   1.306 +	    }
   1.307 +
   1.308 +	case 0:		/* last group was full 3 words */
   1.309 +	  return MN_OK;
   1.310 +	}
   1.311 +    }
   1.312 +  if (*offset == destsize)	/* At EOF but didn't get MN_EOF */
   1.313 +    {
   1.314 +      *offset = MN_EOVERRUN;
   1.315 +      return *offset;
   1.316 +    }
   1.317 +
   1.318 +  index--;			/* 1 based to 0 based index */
   1.319 +
   1.320 +  switch (*offset % 4)
   1.321 +    {
   1.322 +    case 3:			/* Got data past 24 bit remainder */
   1.323 +      *offset = MN_EOVERRUN24;
   1.324 +      return *offset;
   1.325 +    case 2:
   1.326 +      if (index >= MN_BASE)
   1.327 +	{			/* 24 bit remainder */
   1.328 +	  x += (index - MN_BASE) * MN_BASE * MN_BASE;
   1.329 +	  (*offset)++;		/* *offset%4 == 3 for next time */
   1.330 +	}
   1.331 +      else
   1.332 +	{			/* catch invalid encodings */
   1.333 +	  if (index >= 1625 || (index == 1624 && x > 1312671))
   1.334 +	    {
   1.335 +	      *offset = MN_EENCODING;
   1.336 +	      return *offset;
   1.337 +	    }
   1.338 +	  x += index * MN_BASE * MN_BASE;
   1.339 +	  (*offset) += 2;	/* *offset%4 == 0 for next time */
   1.340 +	}
   1.341 +      break;
   1.342 +    case 1:
   1.343 +      x += index * MN_BASE;
   1.344 +      (*offset)++;
   1.345 +      break;
   1.346 +    case 0:
   1.347 +      x = index;
   1.348 +      (*offset)++;
   1.349 +      break;
   1.350 +    }
   1.351 +
   1.352 +  for (i = 0; i < 4; i++)
   1.353 +    if (groupofs + i < destsize)	/* Don't step outside the buffer */
   1.354 +      {
   1.355 +	((mn_byte *) dest)[groupofs + i] = (mn_byte) x % 256;
   1.356 +	x /= 256;
   1.357 +      }
   1.358 +  return MN_OK;
   1.359 +}
   1.360 +
   1.361 +/*
   1.362 + * mn_encode
   1.363 + *
   1.364 + * Description:
   1.365 + *  Encode a binary data buffer into a null-terminated sequence of words.
   1.366 + *  The word separators are taken from the format string. 
   1.367 + *
   1.368 + * Parameters:
   1.369 + *  src      - Pointer to the beginning of the binary data buffer.
   1.370 + *  srcsize  - Size in bytes of binary data buffer
   1.371 + *  dest     - Pointer to the beginning of a character buffer 
   1.372 + *  destsize - Size in characters of character buffer
   1.373 + *  format   - Null-terminated string describing the output format.
   1.374 + *             In the format string any letter or sequence of letters
   1.375 + *             acts as a placeholder for the encoded words. The word 
   1.376 + *             placeholders are separated by one or more non-letter
   1.377 + *             characters. When the encoder reaches the end of the 
   1.378 + *             format string it starts reading it again.
   1.379 + *             For sample formats see MN_F* constants in mnemonic.h
   1.380 + *	       If format is empty or NULL the format MN_FDEFAULT
   1.381 + *	       is used.
   1.382 + *
   1.383 + * Return value:
   1.384 + *  MN_OK(=0)
   1.385 + *	Encoding was successful.
   1.386 + *  MN_EOVERRUN
   1.387 + *	Output size exceeds size of destination buffer
   1.388 + *  MN_EFORMAT
   1.389 + *	Invalid format string. This function enforces formats which
   1.390 + *	will result in a string which can be successfully decoded by
   1.391 + *	the mn_decode function.
   1.392 + */
   1.393 +
   1.394 +int
   1.395 +mn_encode (void *src, int srcsize, char *dest, int destsize, char *format)
   1.396 +{
   1.397 +  int n;
   1.398 +  char *fmt;
   1.399 +  char *destend = dest + destsize;
   1.400 +  const char *word;
   1.401 +
   1.402 +  if (format == 0 || format[0] == '\0')
   1.403 +    format = MN_FDEFAULT;
   1.404 +  fmt = format;
   1.405 +  for (n = 0; n < mn_words_required (srcsize); n++)
   1.406 +    {
   1.407 +      while (dest < destend && *fmt != '\0' && !isletter (*fmt))
   1.408 +	*dest++ = *fmt++;
   1.409 +      if (dest >= destend)
   1.410 +	return MN_EOVERRUN;
   1.411 +      if (*fmt == '\0')
   1.412 +	{
   1.413 +	  if (isletter (fmt[-1]) && isletter (format[0]))
   1.414 +	    return MN_EFORMAT;
   1.415 +	  fmt = format;
   1.416 +	  while (dest < destend && *fmt != '\0' && !isletter (*fmt))
   1.417 +	    *dest++ = *fmt++;
   1.418 +	  if (!isletter (*fmt))
   1.419 +	    return MN_EFORMAT;
   1.420 +	}
   1.421 +      word = mn_encode_word (src, srcsize, n);
   1.422 +      if (word == 0)
   1.423 +	return MN_EOVERRUN;	/* shouldn't happen, actually */
   1.424 +
   1.425 +      while (isletter (*fmt))
   1.426 +	fmt++;
   1.427 +      while (dest < destend && *word != '\0')
   1.428 +	*dest++ = *word++;
   1.429 +    }
   1.430 +  if (dest < destend)
   1.431 +    *dest++ = '\0';
   1.432 +  else
   1.433 +    return MN_EOVERRUN;
   1.434 +  return MN_OK;
   1.435 +}
   1.436 +
   1.437 +/*
   1.438 + * mn_decode
   1.439 + *
   1.440 + * Description:
   1.441 + *  Decode a text representation in null-terminated character buffer src to 
   1.442 + *  binary buffer dest.
   1.443 + *
   1.444 + * Parameters:
   1.445 + *  src      - Pointer to null-terminated character buffer 
   1.446 + *  dest     - Pointer to beginning of destination buffer
   1.447 + *  destsize - Size in bytes of destination buffer
   1.448 + *
   1.449 + * Return value:
   1.450 + *  This function may return all the value returned by mn_decode_word_index
   1.451 + *  plus the following result code:
   1.452 + *
   1.453 + * MN_EWORD  - Unrecognized word.
   1.454 + */
   1.455 +
   1.456 +int
   1.457 +mn_decode (char *src, void *dest, int destsize)
   1.458 +{
   1.459 +  mn_index index;
   1.460 +  int offset = 0;
   1.461 +
   1.462 +  while ((index = mn_next_word_index (&src)) != 0)
   1.463 +    {
   1.464 +      if (index == 0 && *src != 0)
   1.465 +	return MN_EWORD;
   1.466 +      (void) mn_decode_word_index (index, dest, destsize, &offset);
   1.467 +    }
   1.468 +  (void) mn_decode_word_index (MN_EOF, dest, destsize, &offset);
   1.469 +  return offset;
   1.470 +}