mnemonicode-0.73/mnemonic.c
author Jens Alfke <jens@mooseyard.com>
Wed May 20 08:34:04 2009 -0700 (2009-05-20)
changeset 32 222393534845
permissions -rw-r--r--
Retain/release MYDirectoryWatcher's _standardizedPath, for non-GC compatibility.
jens@0
     1
/* mnemonic.c
jens@0
     2
jens@0
     3
 Copyright (c) 2000  Oren Tirosh <oren@hishome.net>
jens@0
     4
jens@0
     5
 Permission is hereby granted, free of charge, to any person obtaining a copy
jens@0
     6
 of this software and associated documentation files (the "Software"), to deal
jens@0
     7
 in the Software without restriction, including without limitation the rights
jens@0
     8
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
jens@0
     9
 copies of the Software, and to permit persons to whom the Software is
jens@0
    10
 furnished to do so, subject to the following conditions:
jens@0
    11
jens@0
    12
 The above copyright notice and this permission notice shall be included in
jens@0
    13
 all copies or substantial portions of the Software.
jens@0
    14
jens@0
    15
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
jens@0
    16
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
jens@0
    17
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
jens@0
    18
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
jens@0
    19
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
jens@0
    20
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
jens@0
    21
 THE SOFTWARE.
jens@0
    22
jens@0
    23
*/
jens@0
    24
jens@0
    25
#include "mnemonic.h"
jens@0
    26
#include <string.h>
jens@0
    27
jens@0
    28
jens@0
    29
/*
jens@0
    30
 * mn_words_required
jens@0
    31
 * 
jens@0
    32
 * Description:
jens@0
    33
 *  Calculate the number of words required to encode data using mnemonic
jens@0
    34
 *  encoding.
jens@0
    35
 *
jens@0
    36
 * Parameters:
jens@0
    37
 *  size - Size in bytes of data to be encoded
jens@0
    38
 * 
jens@0
    39
 * Return value:
jens@0
    40
 *  number of words required for the encoding
jens@0
    41
 */
jens@0
    42
jens@0
    43
int
jens@0
    44
mn_words_required (int size)
jens@0
    45
{
jens@0
    46
  return ((size + 1) * 3) / 4;
jens@0
    47
}
jens@0
    48
jens@0
    49
jens@0
    50
/*
jens@0
    51
 * mn_encode_word_index
jens@0
    52
 *
jens@0
    53
 * Description:
jens@0
    54
 *  Perform one step of encoding binary data into words. Returns word index.
jens@0
    55
 *
jens@0
    56
 * Parameters:
jens@0
    57
 *   src - Pointer to data buffer to encode
jens@0
    58
 *   srcsize - Size in bytes of data to encode 
jens@0
    59
 *   n - Sequence number of word to encode
jens@0
    60
 *       0 <= n < mn_words_required(srcsize)
jens@0
    61
 *
jens@0
    62
 * Return value:
jens@0
    63
 *   0 - no more words to encode / n is out of range
jens@0
    64
 *   1..MN_WORDS - word index. May be used as index to the mn_words[] array
jens@0
    65
 */
jens@0
    66
jens@0
    67
mn_index mn_encode_word_index (void *src, int srcsize, int n)
jens@0
    68
{
jens@0
    69
  mn_word32 x = 0;		/* Temporary for MN_BASE arithmetic */
jens@0
    70
  int offset;			/* Offset into src */
jens@0
    71
  int remaining;		/* Octets remaining to end of src */
jens@0
    72
  int extra = 0;		/* Index 7 extra words for 24 bit data */
jens@0
    73
  int i;
jens@0
    74
jens@0
    75
  if (n < 0 || n >= mn_words_required (srcsize))
jens@0
    76
    return 0;			/* word out of range */
jens@0
    77
  offset = (n / 3) * 4;		/* byte offset into src */
jens@0
    78
  remaining = srcsize - offset;
jens@0
    79
  if (remaining <= 0)
jens@0
    80
    return 0;
jens@0
    81
  if (remaining >= 4)
jens@0
    82
    remaining = 4;
jens@0
    83
  for (i = 0; i < remaining; i++)
jens@0
    84
    x |= ((mn_byte *) src)[offset + i] << (i * 8);	/* endianness-agnostic */
jens@0
    85
jens@0
    86
  switch (n % 3)
jens@0
    87
    {
jens@0
    88
    case 2:			/* Third word of group */
jens@0
    89
      if (remaining == 3)	/*  special case for 24 bits */
jens@0
    90
	extra = MN_BASE;	/*  use one of the 7 3-letter words */
jens@0
    91
      x /= (MN_BASE * MN_BASE);
jens@0
    92
      break;
jens@0
    93
    case 1:			/* Second word of group */
jens@0
    94
      x /= MN_BASE;
jens@0
    95
    }
jens@0
    96
  return x % MN_BASE + extra + 1;
jens@0
    97
}
jens@0
    98
jens@0
    99
jens@0
   100
/*
jens@0
   101
 * mn_encode_word
jens@0
   102
 *
jens@0
   103
 * Description:
jens@0
   104
 *  Perform one step of encoding binary data into words. Returns pointer 
jens@0
   105
 *  to word.
jens@0
   106
 *
jens@0
   107
 * Parameters:
jens@0
   108
 *   src - Pointer to data buffer to encode
jens@0
   109
 *   srcsize - Size of data to encode in bytes
jens@0
   110
 *   n - Sequence number of word to encode. 
jens@0
   111
 *       0 <= n < mn_words_required(srcsize)
jens@0
   112
 *
jens@0
   113
 * Return value:
jens@0
   114
 *   NULL - no more words to encode / n is out of range
jens@0
   115
 *   valid pointer - pointer to null-terminated lowercase word. length<=7
jens@0
   116
 */
jens@0
   117
jens@0
   118
const char *
jens@0
   119
mn_encode_word (void *src, int srcsize, int n)
jens@0
   120
{
jens@0
   121
  return mn_words[mn_encode_word_index (src, srcsize, n)];
jens@0
   122
}
jens@0
   123
jens@0
   124
jens@0
   125
/*
jens@0
   126
 * isletter
jens@0
   127
 * Utility function - returns nonzero if character c is an ASCII letter.
jens@0
   128
 */
jens@0
   129
jens@0
   130
static int
jens@0
   131
isletter (char c)
jens@0
   132
{
jens@0
   133
  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
jens@0
   134
}
jens@0
   135
jens@0
   136
/*
jens@0
   137
 * mn_next_word_index
jens@0
   138
 *
jens@0
   139
 * Description:
jens@0
   140
 *  Perform one step of decoding a null-terminated buffer into word indices.
jens@0
   141
 *  A word is defined as a sequence of letter character separated by one
jens@0
   142
 *  or more non-letter separator characters.
jens@0
   143
 *
jens@0
   144
 * Parameters:
jens@0
   145
 *  ptr - Pointer to a pointer to the next character in the buffer.
jens@0
   146
 *  *ptr is modified by the function; see Return Value below.
jens@0
   147
 *
jens@0
   148
 * Return value:
jens@0
   149
 *  0  - If *ptr==0 (points to the null at the end of the buffer) no more 
jens@0
   150
 *       words were found in the buffer. Otherwise *ptr points to beginning 
jens@0
   151
 *       of an unrecognized word.
jens@0
   152
 *  >0 - index of word found, suitable for decoding with mn_decode_word_index
jens@0
   153
 *       or comparison to values returned from mn_encode_index. *ptr points 
jens@0
   154
 *       to first character of next word or to terminating null.
jens@0
   155
 */
jens@0
   156
jens@0
   157
mn_index
jens@0
   158
mn_next_word_index (char **ptr)
jens@0
   159
{
jens@0
   160
  char *wordstart;
jens@0
   161
  char wordbuf[MN_WORD_BUFLEN];
jens@0
   162
  int i = 0;
jens@0
   163
  char c;
jens@0
   164
  mn_index idx;
jens@0
   165
jens@0
   166
  while (**ptr && !isletter (**ptr))	/* skip separator chars */
jens@0
   167
    (*ptr)++;
jens@0
   168
  wordstart = *ptr;		/* save for error reporting */
jens@0
   169
  while (**ptr && isletter (**ptr) && i < MN_WORD_BUFLEN - 1)
jens@0
   170
    {
jens@0
   171
      c = *(*ptr)++;
jens@0
   172
      if (c >= 'A' && c <= 'Z')
jens@0
   173
	c += 'a' - 'A';		/* convert to lowercase */
jens@0
   174
      wordbuf[i++] = c;
jens@0
   175
    }
jens@0
   176
  wordbuf[i] = '\0';
jens@0
   177
  while (**ptr && isletter (**ptr))	/* skip tail of long words */
jens@0
   178
    (*ptr)++;
jens@0
   179
  while (**ptr && !isletter (**ptr))	/* skip separators */
jens@0
   180
    (*ptr)++;
jens@0
   181
jens@0
   182
  if (wordbuf[0] == '\0')
jens@0
   183
    return 0;			/* EOF, no word found */
jens@0
   184
jens@0
   185
  for (idx = 1; idx <= MN_WORDS; idx++)
jens@0
   186
    {
jens@0
   187
      if (!strcmp (wordbuf, mn_words[idx]))
jens@0
   188
	return idx;
jens@0
   189
      /* FIXME: some fancy code should go here
jens@0
   190
         to accept misspellings and soundalikes.
jens@0
   191
         (replacing the linear search would also be nice) */
jens@0
   192
    }
jens@0
   193
  *ptr = wordstart;
jens@0
   194
  return 0;			/* not found */
jens@0
   195
}
jens@0
   196
jens@0
   197
jens@0
   198
/*
jens@0
   199
 * mn_decode_word_index
jens@0
   200
 *
jens@0
   201
 * Description:
jens@0
   202
 *  Perform one step of decoding a sequence of words into binary data.
jens@0
   203
 *
jens@0
   204
 * Parameters:
jens@0
   205
 *  index    - Index of word, e.g. return value of mn_next_word_index. Use
jens@0
   206
 *             the value MN_EOF(=0) to signal the end of input.
jens@0
   207
 *  dest     - Points to buffer to receive decoded binary result.
jens@0
   208
 *  destsize - Size of buffer 
jens@0
   209
 *  offset   - Pointer to an integer offset into the destination buffer for 
jens@0
   210
 *             next data byte. Initialize *offset to 0 before first call to 
jens@0
   211
 *             function. Modified by function and may be used as an 
jens@0
   212
 * 	       indication for the amount of data actually decoded.
jens@0
   213
 *
jens@0
   214
 * Return value:
jens@0
   215
 *  The return value indicates the status of the decoding function. It is
jens@0
   216
 *  ok to ignore this value on all calls to the function except the last
jens@0
   217
 *  one (with index=MN_EOF). Any errors encountered will be reported on. 
jens@0
   218
 *  the last call. The error code is also returned in *offset (negative 
jens@0
   219
 *  values indicate error).
jens@0
   220
 *
jens@0
   221
 * MN_OK (==0)	
jens@0
   222
 *	for index!=MN_EOF a return value of MN_OK means that 
jens@0
   223
 *	decoding has been successful so far.
jens@0
   224
 *	for index==MN_EOF a return value of MN_OK means that decoding
jens@0
   225
 *	of the entire buffer has been successful and the decoder is in
jens@0
   226
 *	a valid state for the end of the message. A total of *offset
jens@0
   227
 *	valid decoded bytes is in the buffer.
jens@0
   228
 *  MN_EREM      
jens@0
   229
 *	returned on MN_EOF when an unaccounted arithmetic remainder is
jens@0
   230
 *	in the decoder. Most likely indicates a truncated word sequence.
jens@0
   231
 *  MN_EOVERRUN	
jens@0
   232
 *	Not enough room in buffer for decoded data.
jens@0
   233
 *  MN_EOVERRUN24 
jens@0
   234
 *	Returned when decoding of data is attempted after decoding one
jens@0
   235
 *	of the 7 words reserved for 24 bit remainders at the end of the
jens@0
   236
 *	message. Probably indicates a garbled messages.
jens@0
   237
 *  MN_EINDEX	
jens@0
   238
 *	Bad input index. Naturally this should not happen when using 
jens@0
   239
 *	the result of mn_next_word_index.
jens@0
   240
 *  MN_EINDEX24
jens@0
   241
 *	Returned when one of the 7 words reserved for 24 bit remainders
jens@0
   242
 *	is received at an offset inappropriate for a 24 bit remainder.
jens@0
   243
 *  MN_EENCODING
jens@0
   244
 *	Indicates an overflow in MN_BASE arithmetic. Approximately 0.09%
jens@0
   245
 *	of the 3 word combinations are unused and will generate this error.
jens@0
   246
 */
jens@0
   247
jens@0
   248
int
jens@0
   249
mn_decode_word_index (mn_index index, void *dest, int destsize, int *offset)
jens@0
   250
{
jens@0
   251
  mn_word32 x;			/* Temporary for MN_BASE arithmetic */
jens@0
   252
  int groupofs;
jens@0
   253
  int i;
jens@0
   254
jens@0
   255
  if (*offset < 0)		/* Error from previous call? report it */
jens@0
   256
    return *offset;
jens@0
   257
jens@0
   258
  if (index < 0 || index > MN_WORDS)	/* Word index out of range */
jens@0
   259
    {
jens@0
   260
      *offset = MN_EINDEX;
jens@0
   261
      return *offset;
jens@0
   262
    }
jens@0
   263
jens@0
   264
  if (*offset > destsize)	/* out of range? */
jens@0
   265
    {
jens@0
   266
      *offset = MN_EOVERRUN;
jens@0
   267
      return *offset;
jens@0
   268
    }
jens@0
   269
jens@0
   270
  if (index > MN_BASE && *offset % 4 != 2)
jens@0
   271
    {				/* Unexpected 24 bit remainder word */
jens@0
   272
      *offset = MN_EINDEX24;
jens@0
   273
      return *offset;
jens@0
   274
    }
jens@0
   275
jens@0
   276
  groupofs = *offset & ~3;	/* Offset of 4 byte group containing offet */
jens@0
   277
  x = 0;
jens@0
   278
  for (i = 0; i < 4; i++)
jens@0
   279
    if (groupofs + i < destsize)	/* Ignore any bytes outside buffer */
jens@0
   280
      x |= ((mn_byte *) dest)[groupofs + i] << (i * 8);	/* assemble number */
jens@0
   281
jens@0
   282
  if (index == MN_EOF)		/* Got EOF signal */
jens@0
   283
    {
jens@0
   284
      switch (*offset % 4)
jens@0
   285
	{
jens@0
   286
	case 3:		/* group was three words and the last */
jens@0
   287
	  return MN_OK;		/*  word was a 24 bit remainder */
jens@0
   288
	case 2:		/* last group has two words */
jens@0
   289
	  if (x <= 0xFFFF)	/*  should encode 16 bit data */
jens@0
   290
	    return MN_OK;
jens@0
   291
	  else
jens@0
   292
	    {
jens@0
   293
	      *offset = MN_EREM;
jens@0
   294
	      return *offset;
jens@0
   295
	    }
jens@0
   296
	case 1:		/* last group has just one word */
jens@0
   297
	  if (x <= 0xFF)	/*  should encode 8 bits */
jens@0
   298
	    return MN_OK;
jens@0
   299
	  else
jens@0
   300
	    {
jens@0
   301
	      *offset = MN_EREM;
jens@0
   302
	      return *offset;
jens@0
   303
	    }
jens@0
   304
jens@0
   305
	case 0:		/* last group was full 3 words */
jens@0
   306
	  return MN_OK;
jens@0
   307
	}
jens@0
   308
    }
jens@0
   309
  if (*offset == destsize)	/* At EOF but didn't get MN_EOF */
jens@0
   310
    {
jens@0
   311
      *offset = MN_EOVERRUN;
jens@0
   312
      return *offset;
jens@0
   313
    }
jens@0
   314
jens@0
   315
  index--;			/* 1 based to 0 based index */
jens@0
   316
jens@0
   317
  switch (*offset % 4)
jens@0
   318
    {
jens@0
   319
    case 3:			/* Got data past 24 bit remainder */
jens@0
   320
      *offset = MN_EOVERRUN24;
jens@0
   321
      return *offset;
jens@0
   322
    case 2:
jens@0
   323
      if (index >= MN_BASE)
jens@0
   324
	{			/* 24 bit remainder */
jens@0
   325
	  x += (index - MN_BASE) * MN_BASE * MN_BASE;
jens@0
   326
	  (*offset)++;		/* *offset%4 == 3 for next time */
jens@0
   327
	}
jens@0
   328
      else
jens@0
   329
	{			/* catch invalid encodings */
jens@0
   330
	  if (index >= 1625 || (index == 1624 && x > 1312671))
jens@0
   331
	    {
jens@0
   332
	      *offset = MN_EENCODING;
jens@0
   333
	      return *offset;
jens@0
   334
	    }
jens@0
   335
	  x += index * MN_BASE * MN_BASE;
jens@0
   336
	  (*offset) += 2;	/* *offset%4 == 0 for next time */
jens@0
   337
	}
jens@0
   338
      break;
jens@0
   339
    case 1:
jens@0
   340
      x += index * MN_BASE;
jens@0
   341
      (*offset)++;
jens@0
   342
      break;
jens@0
   343
    case 0:
jens@0
   344
      x = index;
jens@0
   345
      (*offset)++;
jens@0
   346
      break;
jens@0
   347
    }
jens@0
   348
jens@0
   349
  for (i = 0; i < 4; i++)
jens@0
   350
    if (groupofs + i < destsize)	/* Don't step outside the buffer */
jens@0
   351
      {
jens@0
   352
	((mn_byte *) dest)[groupofs + i] = (mn_byte) x % 256;
jens@0
   353
	x /= 256;
jens@0
   354
      }
jens@0
   355
  return MN_OK;
jens@0
   356
}
jens@0
   357
jens@0
   358
/*
jens@0
   359
 * mn_encode
jens@0
   360
 *
jens@0
   361
 * Description:
jens@0
   362
 *  Encode a binary data buffer into a null-terminated sequence of words.
jens@0
   363
 *  The word separators are taken from the format string. 
jens@0
   364
 *
jens@0
   365
 * Parameters:
jens@0
   366
 *  src      - Pointer to the beginning of the binary data buffer.
jens@0
   367
 *  srcsize  - Size in bytes of binary data buffer
jens@0
   368
 *  dest     - Pointer to the beginning of a character buffer 
jens@0
   369
 *  destsize - Size in characters of character buffer
jens@0
   370
 *  format   - Null-terminated string describing the output format.
jens@0
   371
 *             In the format string any letter or sequence of letters
jens@0
   372
 *             acts as a placeholder for the encoded words. The word 
jens@0
   373
 *             placeholders are separated by one or more non-letter
jens@0
   374
 *             characters. When the encoder reaches the end of the 
jens@0
   375
 *             format string it starts reading it again.
jens@0
   376
 *             For sample formats see MN_F* constants in mnemonic.h
jens@0
   377
 *	       If format is empty or NULL the format MN_FDEFAULT
jens@0
   378
 *	       is used.
jens@0
   379
 *
jens@0
   380
 * Return value:
jens@0
   381
 *  MN_OK(=0)
jens@0
   382
 *	Encoding was successful.
jens@0
   383
 *  MN_EOVERRUN
jens@0
   384
 *	Output size exceeds size of destination buffer
jens@0
   385
 *  MN_EFORMAT
jens@0
   386
 *	Invalid format string. This function enforces formats which
jens@0
   387
 *	will result in a string which can be successfully decoded by
jens@0
   388
 *	the mn_decode function.
jens@0
   389
 */
jens@0
   390
jens@0
   391
int
jens@0
   392
mn_encode (void *src, int srcsize, char *dest, int destsize, char *format)
jens@0
   393
{
jens@0
   394
  int n;
jens@0
   395
  char *fmt;
jens@0
   396
  char *destend = dest + destsize;
jens@0
   397
  const char *word;
jens@0
   398
jens@0
   399
  if (format == 0 || format[0] == '\0')
jens@0
   400
    format = MN_FDEFAULT;
jens@0
   401
  fmt = format;
jens@0
   402
  for (n = 0; n < mn_words_required (srcsize); n++)
jens@0
   403
    {
jens@0
   404
      while (dest < destend && *fmt != '\0' && !isletter (*fmt))
jens@0
   405
	*dest++ = *fmt++;
jens@0
   406
      if (dest >= destend)
jens@0
   407
	return MN_EOVERRUN;
jens@0
   408
      if (*fmt == '\0')
jens@0
   409
	{
jens@0
   410
	  if (isletter (fmt[-1]) && isletter (format[0]))
jens@0
   411
	    return MN_EFORMAT;
jens@0
   412
	  fmt = format;
jens@0
   413
	  while (dest < destend && *fmt != '\0' && !isletter (*fmt))
jens@0
   414
	    *dest++ = *fmt++;
jens@0
   415
	  if (!isletter (*fmt))
jens@0
   416
	    return MN_EFORMAT;
jens@0
   417
	}
jens@0
   418
      word = mn_encode_word (src, srcsize, n);
jens@0
   419
      if (word == 0)
jens@0
   420
	return MN_EOVERRUN;	/* shouldn't happen, actually */
jens@0
   421
jens@0
   422
      while (isletter (*fmt))
jens@0
   423
	fmt++;
jens@0
   424
      while (dest < destend && *word != '\0')
jens@0
   425
	*dest++ = *word++;
jens@0
   426
    }
jens@0
   427
  if (dest < destend)
jens@0
   428
    *dest++ = '\0';
jens@0
   429
  else
jens@0
   430
    return MN_EOVERRUN;
jens@0
   431
  return MN_OK;
jens@0
   432
}
jens@0
   433
jens@0
   434
/*
jens@0
   435
 * mn_decode
jens@0
   436
 *
jens@0
   437
 * Description:
jens@0
   438
 *  Decode a text representation in null-terminated character buffer src to 
jens@0
   439
 *  binary buffer dest.
jens@0
   440
 *
jens@0
   441
 * Parameters:
jens@0
   442
 *  src      - Pointer to null-terminated character buffer 
jens@0
   443
 *  dest     - Pointer to beginning of destination buffer
jens@0
   444
 *  destsize - Size in bytes of destination buffer
jens@0
   445
 *
jens@0
   446
 * Return value:
jens@0
   447
 *  This function may return all the value returned by mn_decode_word_index
jens@0
   448
 *  plus the following result code:
jens@0
   449
 *
jens@0
   450
 * MN_EWORD  - Unrecognized word.
jens@0
   451
 */
jens@0
   452
jens@0
   453
int
jens@0
   454
mn_decode (char *src, void *dest, int destsize)
jens@0
   455
{
jens@0
   456
  mn_index index;
jens@0
   457
  int offset = 0;
jens@0
   458
jens@0
   459
  while ((index = mn_next_word_index (&src)) != 0)
jens@0
   460
    {
jens@0
   461
      if (index == 0 && *src != 0)
jens@0
   462
	return MN_EWORD;
jens@0
   463
      (void) mn_decode_word_index (index, dest, destsize, &offset);
jens@0
   464
    }
jens@0
   465
  (void) mn_decode_word_index (MN_EOF, dest, destsize, &offset);
jens@0
   466
  return offset;
jens@0
   467
}