MYUtilities: mnemonicode-0.73/mnemonic.c@5cab3034d3a1 (annotated)

jens@0	1	/* mnemonic.c
jens@0	2
jens@0	3	Copyright (c) 2000 Oren Tirosh <oren@hishome.net>
jens@0	4
jens@0	5	Permission is hereby granted, free of charge, to any person obtaining a copy
jens@0	6	of this software and associated documentation files (the "Software"), to deal
jens@0	7	in the Software without restriction, including without limitation the rights
jens@0	8	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
jens@0	9	copies of the Software, and to permit persons to whom the Software is
jens@0	10	furnished to do so, subject to the following conditions:
jens@0	11
jens@0	12	The above copyright notice and this permission notice shall be included in
jens@0	13	all copies or substantial portions of the Software.
jens@0	14
jens@0	15	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
jens@0	16	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
jens@0	17	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
jens@0	18	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
jens@0	19	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
jens@0	20	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
jens@0	21	THE SOFTWARE.
jens@0	22
jens@0	23	*/
jens@0	24
jens@0	25	#include "mnemonic.h"
jens@0	26	#include <string.h>
jens@0	27
jens@0	28
jens@0	29	/*
jens@0	30	* mn_words_required
jens@0	31	*
jens@0	32	* Description:
jens@0	33	* Calculate the number of words required to encode data using mnemonic
jens@0	34	* encoding.
jens@0	35	*
jens@0	36	* Parameters:
jens@0	37	* size - Size in bytes of data to be encoded
jens@0	38	*
jens@0	39	* Return value:
jens@0	40	* number of words required for the encoding
jens@0	41	*/
jens@0	42
jens@0	43	int
jens@0	44	mn_words_required (int size)
jens@0	45	{
jens@0	46	return ((size + 1) * 3) / 4;
jens@0	47	}
jens@0	48
jens@0	49
jens@0	50	/*
jens@0	51	* mn_encode_word_index
jens@0	52	*
jens@0	53	* Description:
jens@0	54	* Perform one step of encoding binary data into words. Returns word index.
jens@0	55	*
jens@0	56	* Parameters:
jens@0	57	* src - Pointer to data buffer to encode
jens@0	58	* srcsize - Size in bytes of data to encode
jens@0	59	* n - Sequence number of word to encode
jens@0	60	* 0 <= n < mn_words_required(srcsize)
jens@0	61	*
jens@0	62	* Return value:
jens@0	63	* 0 - no more words to encode / n is out of range
jens@0	64	* 1..MN_WORDS - word index. May be used as index to the mn_words[] array
jens@0	65	*/
jens@0	66
jens@0	67	mn_index mn_encode_word_index (void *src, int srcsize, int n)
jens@0	68	{
jens@0	69	mn_word32 x = 0; /* Temporary for MN_BASE arithmetic */
jens@0	70	int offset; /* Offset into src */
jens@0	71	int remaining; /* Octets remaining to end of src */
jens@0	72	int extra = 0; /* Index 7 extra words for 24 bit data */
jens@0	73	int i;
jens@0	74
jens@0	75	if (n < 0 \|\| n >= mn_words_required (srcsize))
jens@0	76	return 0; /* word out of range */
jens@0	77	offset = (n / 3) * 4; /* byte offset into src */
jens@0	78	remaining = srcsize - offset;
jens@0	79	if (remaining <= 0)
jens@0	80	return 0;
jens@0	81	if (remaining >= 4)
jens@0	82	remaining = 4;
jens@0	83	for (i = 0; i < remaining; i++)
jens@0	84	x \|= ((mn_byte ) src)[offset + i] << (i 8); /* endianness-agnostic */
jens@0	85
jens@0	86	switch (n % 3)
jens@0	87	{
jens@0	88	case 2: /* Third word of group */
jens@0	89	if (remaining == 3) /* special case for 24 bits */
jens@0	90	extra = MN_BASE; /* use one of the 7 3-letter words */
jens@0	91	x /= (MN_BASE * MN_BASE);
jens@0	92	break;
jens@0	93	case 1: /* Second word of group */
jens@0	94	x /= MN_BASE;
jens@0	95	}
jens@0	96	return x % MN_BASE + extra + 1;
jens@0	97	}
jens@0	98
jens@0	99
jens@0	100	/*
jens@0	101	* mn_encode_word
jens@0	102	*
jens@0	103	* Description:
jens@0	104	* Perform one step of encoding binary data into words. Returns pointer
jens@0	105	* to word.
jens@0	106	*
jens@0	107	* Parameters:
jens@0	108	* src - Pointer to data buffer to encode
jens@0	109	* srcsize - Size of data to encode in bytes
jens@0	110	* n - Sequence number of word to encode.
jens@0	111	* 0 <= n < mn_words_required(srcsize)
jens@0	112	*
jens@0	113	* Return value:
jens@0	114	* NULL - no more words to encode / n is out of range
jens@0	115	* valid pointer - pointer to null-terminated lowercase word. length<=7
jens@0	116	*/
jens@0	117
jens@0	118	const char *
jens@0	119	mn_encode_word (void *src, int srcsize, int n)
jens@0	120	{
jens@0	121	return mn_words[mn_encode_word_index (src, srcsize, n)];
jens@0	122	}
jens@0	123
jens@0	124
jens@0	125	/*
jens@0	126	* isletter
jens@0	127	* Utility function - returns nonzero if character c is an ASCII letter.
jens@0	128	*/
jens@0	129
jens@0	130	static int
jens@0	131	isletter (char c)
jens@0	132	{
jens@0	133	return (c >= 'A' && c <= 'Z') \|\| (c >= 'a' && c <= 'z');
jens@0	134	}
jens@0	135
jens@0	136	/*
jens@0	137	* mn_next_word_index
jens@0	138	*
jens@0	139	* Description:
jens@0	140	* Perform one step of decoding a null-terminated buffer into word indices.
jens@0	141	* A word is defined as a sequence of letter character separated by one
jens@0	142	* or more non-letter separator characters.
jens@0	143	*
jens@0	144	* Parameters:
jens@0	145	* ptr - Pointer to a pointer to the next character in the buffer.
jens@0	146	* *ptr is modified by the function; see Return Value below.
jens@0	147	*
jens@0	148	* Return value:
jens@0	149	* 0 - If *ptr==0 (points to the null at the end of the buffer) no more
jens@0	150	* words were found in the buffer. Otherwise *ptr points to beginning
jens@0	151	* of an unrecognized word.
jens@0	152	* >0 - index of word found, suitable for decoding with mn_decode_word_index
jens@0	153	* or comparison to values returned from mn_encode_index. *ptr points
jens@0	154	* to first character of next word or to terminating null.
jens@0	155	*/
jens@0	156
jens@0	157	mn_index
jens@0	158	mn_next_word_index (char **ptr)
jens@0	159	{
jens@0	160	char *wordstart;
jens@0	161	char wordbuf[MN_WORD_BUFLEN];
jens@0	162	int i = 0;
jens@0	163	char c;
jens@0	164	mn_index idx;
jens@0	165
jens@0	166	while (ptr && !isletter (ptr)) /* skip separator chars */
jens@0	167	(*ptr)++;
jens@0	168	wordstart = ptr; / save for error reporting */
jens@0	169	while (ptr && isletter (ptr) && i < MN_WORD_BUFLEN - 1)
jens@0	170	{
jens@0	171	c = (ptr)++;
jens@0	172	if (c >= 'A' && c <= 'Z')
jens@0	173	c += 'a' - 'A'; /* convert to lowercase */
jens@0	174	wordbuf[i++] = c;
jens@0	175	}
jens@0	176	wordbuf[i] = '\0';
jens@0	177	while (ptr && isletter (ptr)) /* skip tail of long words */
jens@0	178	(*ptr)++;
jens@0	179	while (ptr && !isletter (ptr)) /* skip separators */
jens@0	180	(*ptr)++;
jens@0	181
jens@0	182	if (wordbuf[0] == '\0')
jens@0	183	return 0; /* EOF, no word found */
jens@0	184
jens@0	185	for (idx = 1; idx <= MN_WORDS; idx++)
jens@0	186	{
jens@0	187	if (!strcmp (wordbuf, mn_words[idx]))
jens@0	188	return idx;
jens@0	189	/* FIXME: some fancy code should go here
jens@0	190	to accept misspellings and soundalikes.
jens@0	191	(replacing the linear search would also be nice) */
jens@0	192	}
jens@0	193	*ptr = wordstart;
jens@0	194	return 0; /* not found */
jens@0	195	}
jens@0	196
jens@0	197
jens@0	198	/*
jens@0	199	* mn_decode_word_index
jens@0	200	*
jens@0	201	* Description:
jens@0	202	* Perform one step of decoding a sequence of words into binary data.
jens@0	203	*
jens@0	204	* Parameters:
jens@0	205	* index - Index of word, e.g. return value of mn_next_word_index. Use
jens@0	206	* the value MN_EOF(=0) to signal the end of input.
jens@0	207	* dest - Points to buffer to receive decoded binary result.
jens@0	208	* destsize - Size of buffer
jens@0	209	* offset - Pointer to an integer offset into the destination buffer for
jens@0	210	* next data byte. Initialize *offset to 0 before first call to
jens@0	211	* function. Modified by function and may be used as an
jens@0	212	* indication for the amount of data actually decoded.
jens@0	213	*
jens@0	214	* Return value:
jens@0	215	* The return value indicates the status of the decoding function. It is
jens@0	216	* ok to ignore this value on all calls to the function except the last
jens@0	217	* one (with index=MN_EOF). Any errors encountered will be reported on.
jens@0	218	* the last call. The error code is also returned in *offset (negative
jens@0	219	* values indicate error).
jens@0	220	*
jens@0	221	* MN_OK (==0)
jens@0	222	* for index!=MN_EOF a return value of MN_OK means that
jens@0	223	* decoding has been successful so far.
jens@0	224	* for index==MN_EOF a return value of MN_OK means that decoding
jens@0	225	* of the entire buffer has been successful and the decoder is in
jens@0	226	* a valid state for the end of the message. A total of *offset
jens@0	227	* valid decoded bytes is in the buffer.
jens@0	228	* MN_EREM
jens@0	229	* returned on MN_EOF when an unaccounted arithmetic remainder is
jens@0	230	* in the decoder. Most likely indicates a truncated word sequence.
jens@0	231	* MN_EOVERRUN
jens@0	232	* Not enough room in buffer for decoded data.
jens@0	233	* MN_EOVERRUN24
jens@0	234	* Returned when decoding of data is attempted after decoding one
jens@0	235	* of the 7 words reserved for 24 bit remainders at the end of the
jens@0	236	* message. Probably indicates a garbled messages.
jens@0	237	* MN_EINDEX
jens@0	238	* Bad input index. Naturally this should not happen when using
jens@0	239	* the result of mn_next_word_index.
jens@0	240	* MN_EINDEX24
jens@0	241	* Returned when one of the 7 words reserved for 24 bit remainders
jens@0	242	* is received at an offset inappropriate for a 24 bit remainder.
jens@0	243	* MN_EENCODING
jens@0	244	* Indicates an overflow in MN_BASE arithmetic. Approximately 0.09%
jens@0	245	* of the 3 word combinations are unused and will generate this error.
jens@0	246	*/
jens@0	247
jens@0	248	int
jens@0	249	mn_decode_word_index (mn_index index, void dest, int destsize, int offset)
jens@0	250	{
jens@0	251	mn_word32 x; /* Temporary for MN_BASE arithmetic */
jens@0	252	int groupofs;
jens@0	253	int i;
jens@0	254
jens@0	255	if (offset < 0) / Error from previous call? report it */
jens@0	256	return *offset;
jens@0	257
jens@0	258	if (index < 0 \|\| index > MN_WORDS) /* Word index out of range */
jens@0	259	{
jens@0	260	*offset = MN_EINDEX;
jens@0	261	return *offset;
jens@0	262	}
jens@0	263
jens@0	264	if (offset > destsize) / out of range? */
jens@0	265	{
jens@0	266	*offset = MN_EOVERRUN;
jens@0	267	return *offset;
jens@0	268	}
jens@0	269
jens@0	270	if (index > MN_BASE && *offset % 4 != 2)
jens@0	271	{ /* Unexpected 24 bit remainder word */
jens@0	272	*offset = MN_EINDEX24;
jens@0	273	return *offset;
jens@0	274	}
jens@0	275
jens@0	276	groupofs = offset & ~3; / Offset of 4 byte group containing offet */
jens@0	277	x = 0;
jens@0	278	for (i = 0; i < 4; i++)
jens@0	279	if (groupofs + i < destsize) /* Ignore any bytes outside buffer */
jens@0	280	x \|= ((mn_byte ) dest)[groupofs + i] << (i 8); /* assemble number */
jens@0	281
jens@0	282	if (index == MN_EOF) /* Got EOF signal */
jens@0	283	{
jens@0	284	switch (*offset % 4)
jens@0	285	{
jens@0	286	case 3: /* group was three words and the last */
jens@0	287	return MN_OK; /* word was a 24 bit remainder */
jens@0	288	case 2: /* last group has two words */
jens@0	289	if (x <= 0xFFFF) /* should encode 16 bit data */
jens@0	290	return MN_OK;
jens@0	291	else
jens@0	292	{
jens@0	293	*offset = MN_EREM;
jens@0	294	return *offset;
jens@0	295	}
jens@0	296	case 1: /* last group has just one word */
jens@0	297	if (x <= 0xFF) /* should encode 8 bits */
jens@0	298	return MN_OK;
jens@0	299	else
jens@0	300	{
jens@0	301	*offset = MN_EREM;
jens@0	302	return *offset;
jens@0	303	}
jens@0	304
jens@0	305	case 0: /* last group was full 3 words */
jens@0	306	return MN_OK;
jens@0	307	}
jens@0	308	}
jens@0	309	if (offset == destsize) / At EOF but didn't get MN_EOF */
jens@0	310	{
jens@0	311	*offset = MN_EOVERRUN;
jens@0	312	return *offset;
jens@0	313	}
jens@0	314
jens@0	315	index--; /* 1 based to 0 based index */
jens@0	316
jens@0	317	switch (*offset % 4)
jens@0	318	{
jens@0	319	case 3: /* Got data past 24 bit remainder */
jens@0	320	*offset = MN_EOVERRUN24;
jens@0	321	return *offset;
jens@0	322	case 2:
jens@0	323	if (index >= MN_BASE)
jens@0	324	{ /* 24 bit remainder */
jens@0	325	x += (index - MN_BASE) * MN_BASE * MN_BASE;
jens@0	326	(offset)++; / offset%4 == 3 for next time /
jens@0	327	}
jens@0	328	else
jens@0	329	{ /* catch invalid encodings */
jens@0	330	if (index >= 1625 \|\| (index == 1624 && x > 1312671))
jens@0	331	{
jens@0	332	*offset = MN_EENCODING;
jens@0	333	return *offset;
jens@0	334	}
jens@0	335	x += index * MN_BASE * MN_BASE;
jens@0	336	(offset) += 2; / offset%4 == 0 for next time /
jens@0	337	}
jens@0	338	break;
jens@0	339	case 1:
jens@0	340	x += index * MN_BASE;
jens@0	341	(*offset)++;
jens@0	342	break;
jens@0	343	case 0:
jens@0	344	x = index;
jens@0	345	(*offset)++;
jens@0	346	break;
jens@0	347	}
jens@0	348
jens@0	349	for (i = 0; i < 4; i++)
jens@0	350	if (groupofs + i < destsize) /* Don't step outside the buffer */
jens@0	351	{
jens@0	352	((mn_byte *) dest)[groupofs + i] = (mn_byte) x % 256;
jens@0	353	x /= 256;
jens@0	354	}
jens@0	355	return MN_OK;
jens@0	356	}
jens@0	357
jens@0	358	/*
jens@0	359	* mn_encode
jens@0	360	*
jens@0	361	* Description:
jens@0	362	* Encode a binary data buffer into a null-terminated sequence of words.
jens@0	363	* The word separators are taken from the format string.
jens@0	364	*
jens@0	365	* Parameters:
jens@0	366	* src - Pointer to the beginning of the binary data buffer.
jens@0	367	* srcsize - Size in bytes of binary data buffer
jens@0	368	* dest - Pointer to the beginning of a character buffer
jens@0	369	* destsize - Size in characters of character buffer
jens@0	370	* format - Null-terminated string describing the output format.
jens@0	371	* In the format string any letter or sequence of letters
jens@0	372	* acts as a placeholder for the encoded words. The word
jens@0	373	* placeholders are separated by one or more non-letter
jens@0	374	* characters. When the encoder reaches the end of the
jens@0	375	* format string it starts reading it again.
jens@0	376	* For sample formats see MN_F* constants in mnemonic.h
jens@0	377	* If format is empty or NULL the format MN_FDEFAULT
jens@0	378	* is used.
jens@0	379	*
jens@0	380	* Return value:
jens@0	381	* MN_OK(=0)
jens@0	382	* Encoding was successful.
jens@0	383	* MN_EOVERRUN
jens@0	384	* Output size exceeds size of destination buffer
jens@0	385	* MN_EFORMAT
jens@0	386	* Invalid format string. This function enforces formats which
jens@0	387	* will result in a string which can be successfully decoded by
jens@0	388	* the mn_decode function.
jens@0	389	*/
jens@0	390
jens@0	391	int
jens@0	392	mn_encode (void src, int srcsize, char dest, int destsize, char *format)
jens@0	393	{
jens@0	394	int n;
jens@0	395	char *fmt;
jens@0	396	char *destend = dest + destsize;
jens@0	397	const char *word;
jens@0	398
jens@0	399	if (format == 0 \|\| format[0] == '\0')
jens@0	400	format = MN_FDEFAULT;
jens@0	401	fmt = format;
jens@0	402	for (n = 0; n < mn_words_required (srcsize); n++)
jens@0	403	{
jens@0	404	while (dest < destend && fmt != '\0' && !isletter (fmt))
jens@0	405	dest++ = fmt++;
jens@0	406	if (dest >= destend)
jens@0	407	return MN_EOVERRUN;
jens@0	408	if (*fmt == '\0')
jens@0	409	{
jens@0	410	if (isletter (fmt[-1]) && isletter (format[0]))
jens@0	411	return MN_EFORMAT;
jens@0	412	fmt = format;
jens@0	413	while (dest < destend && fmt != '\0' && !isletter (fmt))
jens@0	414	dest++ = fmt++;
jens@0	415	if (!isletter (*fmt))
jens@0	416	return MN_EFORMAT;
jens@0	417	}
jens@0	418	word = mn_encode_word (src, srcsize, n);
jens@0	419	if (word == 0)
jens@0	420	return MN_EOVERRUN; /* shouldn't happen, actually */
jens@0	421
jens@0	422	while (isletter (*fmt))
jens@0	423	fmt++;
jens@0	424	while (dest < destend && *word != '\0')
jens@0	425	dest++ = word++;
jens@0	426	}
jens@0	427	if (dest < destend)
jens@0	428	*dest++ = '\0';
jens@0	429	else
jens@0	430	return MN_EOVERRUN;
jens@0	431	return MN_OK;
jens@0	432	}
jens@0	433
jens@0	434	/*
jens@0	435	* mn_decode
jens@0	436	*
jens@0	437	* Description:
jens@0	438	* Decode a text representation in null-terminated character buffer src to
jens@0	439	* binary buffer dest.
jens@0	440	*
jens@0	441	* Parameters:
jens@0	442	* src - Pointer to null-terminated character buffer
jens@0	443	* dest - Pointer to beginning of destination buffer
jens@0	444	* destsize - Size in bytes of destination buffer
jens@0	445	*
jens@0	446	* Return value:
jens@0	447	* This function may return all the value returned by mn_decode_word_index
jens@0	448	* plus the following result code:
jens@0	449	*
jens@0	450	* MN_EWORD - Unrecognized word.
jens@0	451	*/
jens@0	452
jens@0	453	int
jens@0	454	mn_decode (char src, void dest, int destsize)
jens@0	455	{
jens@0	456	mn_index index;
jens@0	457	int offset = 0;
jens@0	458
jens@0	459	while ((index = mn_next_word_index (&src)) != 0)
jens@0	460	{
jens@0	461	if (index == 0 && *src != 0)
jens@0	462	return MN_EWORD;
jens@0	463	(void) mn_decode_word_index (index, dest, destsize, &offset);
jens@0	464	}
jens@0	465	(void) mn_decode_word_index (MN_EOF, dest, destsize, &offset);
jens@0	466	return offset;
jens@0	467	}

author	Jens Alfke <jens@mooseyard.com>
	Wed Sep 02 08:41:25 2009 -0700 (2009-09-02)
changeset 35	5cab3034d3a1
permissions	-rw-r--r--