<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>Attractive Chaos &#187; attractivechaos</title>
	<atom:link href="http://attractivechaos.wordpress.com/author/attractivechaos/feed/" rel="self" type="application/rss+xml" />
	<link>http://attractivechaos.wordpress.com</link>
	<description>Just another WordPress.com weblog</description>
	<lastBuildDate>Tue, 29 Sep 2009 22:22:13 +0000</lastBuildDate>
	<generator>http://wordpress.com/</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<cloud domain='attractivechaos.wordpress.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://www.gravatar.com/blavatar/3aaf4ad34bfdf87dcbb70d9e3cbd326d?s=96&#038;d=http://s.wordpress.com/i/buttonw-com.png</url>
		<title>Attractive Chaos &#187; attractivechaos</title>
		<link>http://attractivechaos.wordpress.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://attractivechaos.wordpress.com/osd.xml" title="Attractive Chaos" />
		<item>
		<title>khash.h</title>
		<link>http://attractivechaos.wordpress.com/2009/09/29/khash-h/</link>
		<comments>http://attractivechaos.wordpress.com/2009/09/29/khash-h/#comments</comments>
		<pubDate>Tue, 29 Sep 2009 11:56:57 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=676</guid>
		<description><![CDATA[I put my khash.h library here. It is now compatible with VC++ 8.0. I believe it is quite stable now.

/* The MIT License

   Copyright (c) 2008, 2009 by attractor &#60;attractor@live.co.uk&#62;

   Permission is hereby granted, free of charge, to any person obtaining
   a copy of this software and associated documentation [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=676&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>I put my khash.h library here. It is now compatible with VC++ 8.0. I believe it is quite stable now.</p>
<pre class="brush: cpp;">
/* The MIT License

   Copyright (c) 2008, 2009 by attractor &lt;attractor@live.co.uk&gt;

   Permission is hereby granted, free of charge, to any person obtaining
   a copy of this software and associated documentation files (the
   &quot;Software&quot;), to deal in the Software without restriction, including
   without limitation the rights to use, copy, modify, merge, publish,
   distribute, sublicense, and/or sell copies of the Software, and to
   permit persons to whom the Software is furnished to do so, subject to
   the following conditions:

   The above copyright notice and this permission notice shall be
   included in all copies or substantial portions of the Software.

   THE SOFTWARE IS PROVIDED &quot;AS IS&quot;, WITHOUT WARRANTY OF ANY KIND,
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   SOFTWARE.
*/

/*
  An example:

#include &quot;khash.h&quot;
KHASH_MAP_INIT_INT(32, char)
int main() {
	int ret, is_missing;
	khiter_t k;
	khash_t(32) *h = kh_init(32);
	k = kh_put(32, h, 5, &amp;ret);
	if (!ret) kh_del(32, h, k);
	kh_value(h, k) = 10;
	k = kh_get(32, h, 10);
	is_missing = (k == kh_end(h));
	k = kh_get(32, h, 5);
	kh_del(32, h, k);
	for (k = kh_begin(h); k != kh_end(h); ++k)
		if (kh_exist(h, k)) kh_value(h, k) = 1;
	kh_destroy(32, h);
	return 0;
}
*/

/*
  2009-09-26 (0.2.4):

    * Improve portability

  2008-09-19 (0.2.3):

	* Corrected the example
	* Improved interfaces

  2008-09-11 (0.2.2):

	* Improved speed a little in kh_put()

  2008-09-10 (0.2.1):

	* Added kh_clear()
	* Fixed a compiling error

  2008-09-02 (0.2.0):

	* Changed to token concatenation which increases flexibility.

  2008-08-31 (0.1.2):

	* Fixed a bug in kh_get(), which has not been tested previously.

  2008-08-31 (0.1.1):

	* Added destructor
*/

#ifndef __AC_KHASH_H
#define __AC_KHASH_H

/*!
  @header

  Generic hash table library.

  @copyright Heng Li
 */

#define AC_VERSION_KHASH_H &quot;0.2.4&quot;

#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;
#include &lt;limits.h&gt;

/* compipler specific configuration */

#if UINT_MAX == 0xffffffffu
typedef unsigned int khint32_t;
#elif ULONG_MAX == 0xffffffffu
typedef unsigned long khint32_t;
#endif

#if ULONG_MAX == ULLONG_MAX
typedef unsigned long khint64_t;
#else
typedef unsigned long long khint64_t;
#endif

#ifdef _MSC_VER
#define inline __inline
#endif

typedef khint32_t khint_t;
typedef khint_t khiter_t;

#define __ac_HASH_PRIME_SIZE 32
static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
{
  0ul,          3ul,          11ul,         23ul,         53ul,
  97ul,         193ul,        389ul,        769ul,        1543ul,
  3079ul,       6151ul,       12289ul,      24593ul,      49157ul,
  98317ul,      196613ul,     393241ul,     786433ul,     1572869ul,
  3145739ul,    6291469ul,    12582917ul,   25165843ul,   50331653ul,
  100663319ul,  201326611ul,  402653189ul,  805306457ul,  1610612741ul,
  3221225473ul, 4294967291ul
};

#define __ac_isempty(flag, i) ((flag[i&gt;&gt;4]&gt;&gt;((i&amp;0xfU)&lt;&lt;1))&amp;2)
#define __ac_isdel(flag, i) ((flag[i&gt;&gt;4]&gt;&gt;((i&amp;0xfU)&lt;&lt;1))&amp;1)
#define __ac_iseither(flag, i) ((flag[i&gt;&gt;4]&gt;&gt;((i&amp;0xfU)&lt;&lt;1))&amp;3)
#define __ac_set_isdel_false(flag, i) (flag[i&gt;&gt;4]&amp;=~(1ul&lt;&lt;((i&amp;0xfU)&lt;&lt;1)))
#define __ac_set_isempty_false(flag, i) (flag[i&gt;&gt;4]&amp;=~(2ul&lt;&lt;((i&amp;0xfU)&lt;&lt;1)))
#define __ac_set_isboth_false(flag, i) (flag[i&gt;&gt;4]&amp;=~(3ul&lt;&lt;((i&amp;0xfU)&lt;&lt;1)))
#define __ac_set_isdel_true(flag, i) (flag[i&gt;&gt;4]|=1ul&lt;&lt;((i&amp;0xfU)&lt;&lt;1))

static const double __ac_HASH_UPPER = 0.77;

#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
	typedef struct {													\
		khint_t n_buckets, size, n_occupied, upper_bound;				\
		khint32_t *flags;												\
		khkey_t *keys;													\
		khval_t *vals;													\
	} kh_##name##_t;													\
	static inline kh_##name##_t *kh_init_##name() {						\
		return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));		\
	}																	\
	static inline void kh_destroy_##name(kh_##name##_t *h)				\
	{																	\
		if (h) {														\
			free(h-&gt;keys); free(h-&gt;flags);								\
			free(h-&gt;vals);												\
			free(h);													\
		}																\
	}																	\
	static inline void kh_clear_##name(kh_##name##_t *h)				\
	{																	\
		if (h &amp;&amp; h-&gt;flags) {											\
			memset(h-&gt;flags, 0xaa, ((h-&gt;n_buckets&gt;&gt;4) + 1) * sizeof(khint32_t)); \
			h-&gt;size = h-&gt;n_occupied = 0;								\
		}																\
	}																	\
	static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
	{																	\
		if (h-&gt;n_buckets) {												\
			khint_t inc, k, i, last;									\
			k = __hash_func(key); i = k % h-&gt;n_buckets;					\
			inc = 1 + k % (h-&gt;n_buckets - 1); last = i;					\
			while (!__ac_isempty(h-&gt;flags, i) &amp;&amp; (__ac_isdel(h-&gt;flags, i) || !__hash_equal(h-&gt;keys[i], key))) { \
				if (i + inc &gt;= h-&gt;n_buckets) i = i + inc - h-&gt;n_buckets; \
				else i += inc;											\
				if (i == last) return h-&gt;n_buckets;						\
			}															\
			return __ac_iseither(h-&gt;flags, i)? h-&gt;n_buckets : i;		\
		} else return 0;												\
	}																	\
	static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
	{																	\
		khint32_t *new_flags = 0;										\
		khint_t j = 1;													\
		{																\
			khint_t t = __ac_HASH_PRIME_SIZE - 1;						\
			while (__ac_prime_list[t] &gt; new_n_buckets) --t;				\
			new_n_buckets = __ac_prime_list[t+1];						\
			if (h-&gt;size &gt;= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;	\
			else {														\
				new_flags = (khint32_t*)malloc(((new_n_buckets&gt;&gt;4) + 1) * sizeof(khint32_t));	\
				memset(new_flags, 0xaa, ((new_n_buckets&gt;&gt;4) + 1) * sizeof(khint32_t)); \
				if (h-&gt;n_buckets &lt; new_n_buckets) {						\
					h-&gt;keys = (khkey_t*)realloc(h-&gt;keys, new_n_buckets * sizeof(khkey_t)); \
					if (kh_is_map)										\
						h-&gt;vals = (khval_t*)realloc(h-&gt;vals, new_n_buckets * sizeof(khval_t)); \
				}														\
			}															\
		}																\
		if (j) {														\
			for (j = 0; j != h-&gt;n_buckets; ++j) {						\
				if (__ac_iseither(h-&gt;flags, j) == 0) {					\
					khkey_t key = h-&gt;keys[j];							\
					khval_t val;										\
					if (kh_is_map) val = h-&gt;vals[j];					\
					__ac_set_isdel_true(h-&gt;flags, j);					\
					while (1) {											\
						khint_t inc, k, i;								\
						k = __hash_func(key);							\
						i = k % new_n_buckets;							\
						inc = 1 + k % (new_n_buckets - 1);				\
						while (!__ac_isempty(new_flags, i)) {			\
							if (i + inc &gt;= new_n_buckets) i = i + inc - new_n_buckets; \
							else i += inc;								\
						}												\
						__ac_set_isempty_false(new_flags, i);			\
						if (i &lt; h-&gt;n_buckets &amp;&amp; __ac_iseither(h-&gt;flags, i) == 0) { \
							{ khkey_t tmp = h-&gt;keys[i]; h-&gt;keys[i] = key; key = tmp; } \
							if (kh_is_map) { khval_t tmp = h-&gt;vals[i]; h-&gt;vals[i] = val; val = tmp; } \
							__ac_set_isdel_true(h-&gt;flags, i);			\
						} else {										\
							h-&gt;keys[i] = key;							\
							if (kh_is_map) h-&gt;vals[i] = val;			\
							break;										\
						}												\
					}													\
				}														\
			}															\
			if (h-&gt;n_buckets &gt; new_n_buckets) {							\
				h-&gt;keys = (khkey_t*)realloc(h-&gt;keys, new_n_buckets * sizeof(khkey_t)); \
				if (kh_is_map)											\
					h-&gt;vals = (khval_t*)realloc(h-&gt;vals, new_n_buckets * sizeof(khval_t)); \
			}															\
			free(h-&gt;flags);												\
			h-&gt;flags = new_flags;										\
			h-&gt;n_buckets = new_n_buckets;								\
			h-&gt;n_occupied = h-&gt;size;									\
			h-&gt;upper_bound = (khint_t)(h-&gt;n_buckets * __ac_HASH_UPPER + 0.5); \
		}																\
	}																	\
	static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
	{																	\
		khint_t x;														\
		if (h-&gt;n_occupied &gt;= h-&gt;upper_bound) {							\
			if (h-&gt;n_buckets &gt; (h-&gt;size&lt;&lt;1)) kh_resize_##name(h, h-&gt;n_buckets - 1); \
			else kh_resize_##name(h, h-&gt;n_buckets + 1);					\
		}																\
		{																\
			khint_t inc, k, i, site, last;								\
			x = site = h-&gt;n_buckets; k = __hash_func(key); i = k % h-&gt;n_buckets; \
			if (__ac_isempty(h-&gt;flags, i)) x = i;						\
			else {														\
				inc = 1 + k % (h-&gt;n_buckets - 1); last = i;				\
				while (!__ac_isempty(h-&gt;flags, i) &amp;&amp; (__ac_isdel(h-&gt;flags, i) || !__hash_equal(h-&gt;keys[i], key))) { \
					if (__ac_isdel(h-&gt;flags, i)) site = i;				\
					if (i + inc &gt;= h-&gt;n_buckets) i = i + inc - h-&gt;n_buckets; \
					else i += inc;										\
					if (i == last) { x = site; break; }					\
				}														\
				if (x == h-&gt;n_buckets) {								\
					if (__ac_isempty(h-&gt;flags, i) &amp;&amp; site != h-&gt;n_buckets) x = site; \
					else x = i;											\
				}														\
			}															\
		}																\
		if (__ac_isempty(h-&gt;flags, x)) {								\
			h-&gt;keys[x] = key;											\
			__ac_set_isboth_false(h-&gt;flags, x);							\
			++h-&gt;size; ++h-&gt;n_occupied;									\
			*ret = 1;													\
		} else if (__ac_isdel(h-&gt;flags, x)) {							\
			h-&gt;keys[x] = key;											\
			__ac_set_isboth_false(h-&gt;flags, x);							\
			++h-&gt;size;													\
			*ret = 2;													\
		} else *ret = 0;												\
		return x;														\
	}																	\
	static inline void kh_del_##name(kh_##name##_t *h, khint_t x)		\
	{																	\
		if (x != h-&gt;n_buckets &amp;&amp; !__ac_iseither(h-&gt;flags, x)) {			\
			__ac_set_isdel_true(h-&gt;flags, x);							\
			--h-&gt;size;													\
		}																\
	}

/* --- BEGIN OF HASH FUNCTIONS --- */

/*! @function
  @abstract     Integer hash function
  @param  key   The integer [khint32_t]
  @return       The hash value [khint_t]
 */
#define kh_int_hash_func(key) (khint32_t)(key)
/*! @function
  @abstract     Integer comparison function
 */
#define kh_int_hash_equal(a, b) ((a) == (b))
/*! @function
  @abstract     64-bit integer hash function
  @param  key   The integer [khint64_t]
  @return       The hash value [khint_t]
 */
#define kh_int64_hash_func(key) (khint32_t)((key)&gt;&gt;33^(key)^(key)&lt;&lt;11)
/*! @function
  @abstract     64-bit integer comparison function
 */
#define kh_int64_hash_equal(a, b) ((a) == (b))
/*! @function
  @abstract     const char* hash function
  @param  s     Pointer to a null terminated string
  @return       The hash value
 */
static inline khint_t __ac_X31_hash_string(const char *s)
{
	khint_t h = *s;
	if (h) for (++s ; *s; ++s) h = (h &lt;&lt; 5) - h + *s;
	return h;
}
/*! @function
  @abstract     Another interface to const char* hash function
  @param  key   Pointer to a null terminated string [const char*]
  @return       The hash value [khint_t]
 */
#define kh_str_hash_func(key) __ac_X31_hash_string(key)
/*! @function
  @abstract     Const char* comparison function
 */
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)

/* --- END OF HASH FUNCTIONS --- */

/* Other necessary macros... */

/*!
  @abstract Type of the hash table.
  @param  name  Name of the hash table [symbol]
 */
#define khash_t(name) kh_##name##_t

/*! @function
  @abstract     Initiate a hash table.
  @param  name  Name of the hash table [symbol]
  @return       Pointer to the hash table [khash_t(name)*]
 */
#define kh_init(name) kh_init_##name()

/*! @function
  @abstract     Destroy a hash table.
  @param  name  Name of the hash table [symbol]
  @param  h     Pointer to the hash table [khash_t(name)*]
 */
#define kh_destroy(name, h) kh_destroy_##name(h)

/*! @function
  @abstract     Reset a hash table without deallocating memory.
  @param  name  Name of the hash table [symbol]
  @param  h     Pointer to the hash table [khash_t(name)*]
 */
#define kh_clear(name, h) kh_clear_##name(h)

/*! @function
  @abstract     Resize a hash table.
  @param  name  Name of the hash table [symbol]
  @param  h     Pointer to the hash table [khash_t(name)*]
  @param  s     New size [khint_t]
 */
#define kh_resize(name, h, s) kh_resize_##name(h, s)

/*! @function
  @abstract     Insert a key to the hash table.
  @param  name  Name of the hash table [symbol]
  @param  h     Pointer to the hash table [khash_t(name)*]
  @param  k     Key [type of keys]
  @param  r     Extra return code: 0 if the key is present in the hash table;
                1 if the bucket is empty (never used); 2 if the element in
				the bucket has been deleted [int*]
  @return       Iterator to the inserted element [khint_t]
 */
#define kh_put(name, h, k, r) kh_put_##name(h, k, r)

/*! @function
  @abstract     Retrieve a key from the hash table.
  @param  name  Name of the hash table [symbol]
  @param  h     Pointer to the hash table [khash_t(name)*]
  @param  k     Key [type of keys]
  @return       Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
 */
#define kh_get(name, h, k) kh_get_##name(h, k)

/*! @function
  @abstract     Remove a key from the hash table.
  @param  name  Name of the hash table [symbol]
  @param  h     Pointer to the hash table [khash_t(name)*]
  @param  k     Iterator to the element to be deleted [khint_t]
 */
#define kh_del(name, h, k) kh_del_##name(h, k)

/*! @function
  @abstract     Test whether a bucket contains data.
  @param  h     Pointer to the hash table [khash_t(name)*]
  @param  x     Iterator to the bucket [khint_t]
  @return       1 if containing data; 0 otherwise [int]
 */
#define kh_exist(h, x) (!__ac_iseither((h)-&gt;flags, (x)))

/*! @function
  @abstract     Get key given an iterator
  @param  h     Pointer to the hash table [khash_t(name)*]
  @param  x     Iterator to the bucket [khint_t]
  @return       Key [type of keys]
 */
#define kh_key(h, x) ((h)-&gt;keys[x])

/*! @function
  @abstract     Get value given an iterator
  @param  h     Pointer to the hash table [khash_t(name)*]
  @param  x     Iterator to the bucket [khint_t]
  @return       Value [type of values]
  @discussion   For hash sets, calling this results in segfault.
 */
#define kh_val(h, x) ((h)-&gt;vals[x])

/*! @function
  @abstract     Alias of kh_val()
 */
#define kh_value(h, x) ((h)-&gt;vals[x])

/*! @function
  @abstract     Get the start iterator
  @param  h     Pointer to the hash table [khash_t(name)*]
  @return       The start iterator [khint_t]
 */
#define kh_begin(h) (khint_t)(0)

/*! @function
  @abstract     Get the end iterator
  @param  h     Pointer to the hash table [khash_t(name)*]
  @return       The end iterator [khint_t]
 */
#define kh_end(h) ((h)-&gt;n_buckets)

/*! @function
  @abstract     Get the number of elements in the hash table
  @param  h     Pointer to the hash table [khash_t(name)*]
  @return       Number of elements in the hash table [khint_t]
 */
#define kh_size(h) ((h)-&gt;size)

/*! @function
  @abstract     Get the number of buckets in the hash table
  @param  h     Pointer to the hash table [khash_t(name)*]
  @return       Number of buckets in the hash table [khint_t]
 */
#define kh_n_buckets(h) ((h)-&gt;n_buckets)

/* More conenient interfaces */

/*! @function
  @abstract     Instantiate a hash set containing integer keys
  @param  name  Name of the hash table [symbol]
 */
#define KHASH_SET_INIT_INT(name)										\
	KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)

/*! @function
  @abstract     Instantiate a hash map containing integer keys
  @param  name  Name of the hash table [symbol]
  @param  khval_t  Type of values [type]
 */
#define KHASH_MAP_INIT_INT(name, khval_t)								\
	KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)

/*! @function
  @abstract     Instantiate a hash map containing 64-bit integer keys
  @param  name  Name of the hash table [symbol]
 */
#define KHASH_SET_INIT_INT64(name)										\
	KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)

/*! @function
  @abstract     Instantiate a hash map containing 64-bit integer keys
  @param  name  Name of the hash table [symbol]
  @param  khval_t  Type of values [type]
 */
#define KHASH_MAP_INIT_INT64(name, khval_t)								\
	KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)

typedef const char *kh_cstr_t;
/*! @function
  @abstract     Instantiate a hash map containing const char* keys
  @param  name  Name of the hash table [symbol]
 */
#define KHASH_SET_INIT_STR(name)										\
	KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)

/*! @function
  @abstract     Instantiate a hash map containing const char* keys
  @param  name  Name of the hash table [symbol]
  @param  khval_t  Type of values [type]
 */
#define KHASH_MAP_INIT_STR(name, khval_t)								\
	KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)

#endif /* __AC_KHASH_H */
</pre>
<p>Here is a small example:</p>
<pre class="brush: cpp;">
#include &lt;stdio.h&gt;
#include &lt;assert.h&gt;
#include &lt;time.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;

#include &quot;khash.h&quot;
KHASH_SET_INIT_STR(str)
KHASH_SET_INIT_INT(int)

static int data_size = 5000000;
static unsigned *int_data;
static char **str_data;

void ht_init_data()
{
	int i;
	char buf[256];
	khint32_t x = 11;
	printf(&quot;--- generating data... &quot;);
	int_data = (unsigned*)calloc(data_size, sizeof(unsigned));
	str_data = (char**)calloc(data_size, sizeof(char*));
	for (i = 0; i &lt; data_size; ++i) {
		int_data[i] = (unsigned)(data_size * ((double)x / UINT_MAX) / 4) * 271828183u;
		sprintf(buf, &quot;%x&quot;, int_data[i]);
		str_data[i] = strdup(buf);
		x = 1664525L * x + 1013904223L;
	}
	printf(&quot;done!\n&quot;);
}
void ht_destroy_data()
{
	int i;
	for (i = 0; i &lt; data_size; ++i) free(str_data[i]);
	free(str_data); free(int_data);
}
void ht_khash_int()
{
	int i, ret;
	unsigned *data = int_data;
	khash_t(int) *h;
	unsigned k;

	h = kh_init(int);
	for (i = 0; i &lt; data_size; ++i) {
		k = kh_put(int, h, data[i], &amp;ret);
		if (!ret) kh_del(int, h, k);
	}
	printf(&quot;[ht_khash_int] size: %u\n&quot;, kh_size(h));
	kh_destroy(int, h);
}
void ht_khash_str()
{
	int i, ret;
	char **data = str_data;
	khash_t(str) *h;
	unsigned k;

	h = kh_init(str);
	for (i = 0; i &lt; data_size; ++i) {
		k = kh_put(str, h, data[i], &amp;ret);
		if (!ret) kh_del(str, h, k);
	}
	printf(&quot;[ht_khash_int] size: %u\n&quot;, kh_size(h));
	kh_destroy(str, h);
}
void ht_timing(void (*f)(void))
{
	clock_t t = clock();
	(*f)();
	printf(&quot;[ht_timing] %.3lf sec\n&quot;, (double)(clock() - t) / CLOCKS_PER_SEC);
}
int main(int argc, char *argv[])
{
	if (argc &gt; 1) data_size = atoi(argv[1]);
	ht_init_data();
	ht_timing(ht_khash_int);
	ht_timing(ht_khash_str);
	ht_destroy_data();
	return 0;
}
</pre>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/676/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/676/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/676/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/676/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/676/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/676/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/676/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/676/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/676/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/676/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=676&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2009/09/29/khash-h/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
		<item>
		<title>OOP in C? Don&#8217;t go too far.</title>
		<link>http://attractivechaos.wordpress.com/2009/09/20/oop-in-c-dont-go-too-far/</link>
		<comments>http://attractivechaos.wordpress.com/2009/09/20/oop-in-c-dont-go-too-far/#comments</comments>
		<pubDate>Sun, 20 Sep 2009 22:04:16 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[C]]></category>
		<category><![CDATA[development]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=674</guid>
		<description><![CDATA[I was reading some interesting articles about realizing object-oriented programming in ANSI C. It seems that most people commenting on these articles think this is a good idea in general. I want to say something different, though. In my view, it is fine to realize some basic OOP bits such as encapsulation and constructor, but [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=674&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>I was reading some interesting articles about realizing object-oriented programming in ANSI C. It seems that most people commenting on these articles think this is a good idea in general. I want to say something different, though. In my view, it is fine to realize some basic OOP bits such as encapsulation and constructor, but we should not go too far.</p>
<p>In fact, most of well-formed C projects contain some basic OOP bits. To avoid using too many global variables or a long list of arguments of a C function, we usually put related variables in a struct and transfers a pointer to the struct between functions. Frequently we define functions to (de)allocate memory for the struct. Occasionally we even put the definition of the struct in .c files rather than .h to completely hide the details of the struct. This is basic encapsulation and (de)constructor. We frequently use &#8220;static&#8221; functions inside a source file. This is private function. We should stop here, though.</p>
<p>Most of these OOP-in-C articles further mimic methods, inheritance, messaging and more OOP bits. However, all these things come at the cost of speed and/or space efficiency. For example, although we may use pointers to functions to mimic methods, pointers take memory and prevent the compiler from inlining simple functions. If we really want to following the C++ methodology to make everything objects, the overhead on these bits is huge.</p>
<p>The most frequent motivation to using OOP in C is because the programmer needs portability while (s)he only knows OOP or thinks OOP is better.  I do not want to argue if OOP is better than procedural programming, but I really think it is big fault to try to mimic all the OOP bits in C in an unnecessarily  complicated way given all the overhead on performance. If you have to use C in your project, learn and be good at procedural programming which has been proved to be at least as good as OOP on a lot of practical applications.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/674/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/674/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/674/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/674/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/674/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/674/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/674/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/674/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/674/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/674/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=674&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2009/09/20/oop-in-c-dont-go-too-far/feed/</wfw:commentRss>
		<slash:comments>5</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
		<item>
		<title>Read files on FTP/HTTP</title>
		<link>http://attractivechaos.wordpress.com/2009/08/02/read-files-on-ftphttp/</link>
		<comments>http://attractivechaos.wordpress.com/2009/08/02/read-files-on-ftphttp/#comments</comments>
		<pubDate>Sun, 02 Aug 2009 10:59:49 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=648</guid>
		<description><![CDATA[In a project I want to directly open part of a remote file sitting on FTP or HTTP. I do not want to download the whole file because that file is frequently over 10GB in size. What I want to do is to have function calls with a similar interface to fopen/fclose/ftell/fseek/fread (I do not [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=648&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>In a project I want to directly open part of a remote file sitting on FTP or HTTP. I do not want to download the whole file because that file is frequently over 10GB in size. What I want to do is to have function calls with a similar interface to fopen/fclose/ftell/fseek/fread (I do not need fwrite for the moment). I can then open a remote file as if it is local. I did quite some google search for a suitable library, but most of related libraries are designed for file transfer. In the end, I decide to write my own library for this task. And here it is.</p>
<p>This library consists of one C header file and one C source file. It was originally developed for Linux/Mac and then ported to Windows, supporting the MinGW compiler. On Linux, the implemented features work properly. On Windows, however, random access to FTP files sometimes does not.</p>
<p>This library provides knet_open(), knet_close(), knet_tell(), knet_seek() and knet_read() function calls. You can manipulate a file on HTTP with, for example:</p>
<pre class="brush: cpp;">
char buf[4096];
knetFile *fp = knet_open(&quot;http://host/file&quot;, &quot;r&quot;);
knet_seek(fp, 1000, SEEK_SET);
knet_read(fp, buf, 4096);
knet_close(fp);
</pre>
<p>Opening FTP file is similar. This library is also transparent to local files, when the file name does not start with &#8220;http://&#8221; or &#8220;ftp://&#8221;.</p>
<p>This is my first attempt on network programming and surely a lot of things can be improved. Please leave me messages if you have any suggestions. Thanks in advance.</p>
<p>Here is the C header file:</p>
<pre class="brush: cpp;">

#ifndef KNETFILE_H
#define KNETFILE_H

#include &lt;stdint.h&gt;
#include &lt;fcntl.h&gt;

#ifndef _WIN32
#define netread(fd, ptr, len) read(fd, ptr, len)
#define netwrite(fd, ptr, len) write(fd, ptr, len)
#define netclose(fd) close(fd)
#else
#include &lt;winsock.h&gt;
#define netread(fd, ptr, len) recv(fd, ptr, len, 0)
#define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
#define netclose(fd) closesocket(fd)
#endif

// FIXME: currently I/O is unbuffered

#define KNF_TYPE_LOCAL 1
#define KNF_TYPE_FTP   2
#define KNF_TYPE_HTTP  3

typedef struct knetFile_s {
	int type, fd;
	int64_t offset;
	char *host, *port;

	// the following are for FTP only
	int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
	char *response, *retr;
	int64_t seek_offset; // for lazy seek

	// the following are for HTTP only
	char *path, *http_host;
} knetFile;

#define knet_tell(fp) ((fp)-&gt;offset)
#define knet_fileno(fp) ((fp)-&gt;fd)

#ifdef __cplusplus
extern &quot;C&quot; {
#endif

#ifdef _WIN32
	int knet_win32_init();
	void knet_win32_destroy();
#endif

	knetFile *knet_open(const char *fn, const char *mode);

	/*
	   This only works with local files.
	 */
	knetFile *knet_dopen(int fd, const char *mode);

	/*
	  If -&gt;is_ready==0, this routine updates -&gt;fd; otherwise, it simply
	  reads from -&gt;fd.
	 */
	off_t knet_read(knetFile *fp, void *buf, off_t len);

	/*
	  This routine only sets -&gt;offset and -&gt;is_ready=0. It does not
	  communicate with the FTP server.
	 */
	int knet_seek(knetFile *fp, off_t off, int whence);
	int knet_close(knetFile *fp);

#ifdef __cplusplus
}
#endif

#endif
</pre>
<p>Here is the main C source code:</p>
<pre class="brush: cpp;">
/* Probably I will not do socket programming in the next few years and
   therefore I decide to heavily annotate this file, for Linux and
   Windows as well. */

#include &lt;time.h&gt;
#include &lt;stdio.h&gt;
#include &lt;ctype.h&gt;
#include &lt;stdlib.h&gt;
#include &lt;string.h&gt;
#include &lt;unistd.h&gt;
#include &lt;sys/types.h&gt;

#ifdef _WIN32
#include &lt;winsock.h&gt;
#else
#include &lt;netdb.h&gt;
#include &lt;arpa/inet.h&gt;
#include &lt;sys/socket.h&gt;
#endif

#include &quot;knetfile.h&quot;

/* In winsock.h, the type of a socket is SOCKET, which is: &quot;typedef
 * u_int SOCKET&quot;. An invalid SOCKET is: &quot;(SOCKET)(~0)&quot;, or signed
 * integer -1. In knetfile.c, I use &quot;int&quot; for socket type
 * throughout. This should be improved to avoid confusion.
 *
 * In Linux/Mac, recv() and read() do almost the same thing. You can see
 * in the header file that netread() is simply an alias of read(). In
 * Windows, however, they are different and using recv() is mandatory.
 */

/* This function tests if the file handler is ready for reading (or
 * writing if is_read==0). */
static int socket_wait(int fd, int is_read)
{
	fd_set fds, *fdr = 0, *fdw = 0;
	struct timeval tv;
	int ret;
	tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
	FD_ZERO(&amp;fds);
	FD_SET(fd, &amp;fds);
	if (is_read) fdr = &amp;fds;
	else fdw = &amp;fds;
	ret = select(fd+1, fdr, fdw, 0, &amp;tv);
	if (ret == -1) perror(&quot;select&quot;);
	return ret;
}

#ifndef _WIN32
/* This function does not work with Windows due to the lack of
 * getaddrinfo() in winsock. It is addapted from an example in &quot;Beej's
 * Guide to Network Programming&quot; (http://beej.us/guide/bgnet/). */
static int socket_connect(const char *host, const char *port)
{
#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)

	int on = 1, fd;
	struct linger lng = { 0, 0 };
	struct addrinfo hints, *res;
	memset(&amp;hints, 0, sizeof(struct addrinfo));
	hints.ai_family = AF_UNSPEC;
	hints.ai_socktype = SOCK_STREAM;
	/* In Unix/Mac, getaddrinfo() is the most convenient way to get
	 * server information. */
	if (getaddrinfo(host, port, &amp;hints, &amp;res) != 0) __err_connect(&quot;getaddrinfo&quot;);
	if ((fd = socket(res-&gt;ai_family, res-&gt;ai_socktype, res-&gt;ai_protocol)) == -1) __err_connect(&quot;socket&quot;);
	/* The following two setsockopt() are used by ftplib
	 * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
	 * necessary. */
	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &amp;on, sizeof(on)) == -1) __err_connect(&quot;setsockopt&quot;);
	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &amp;lng, sizeof(lng)) == -1) __err_connect(&quot;setsockopt&quot;);
	if (connect(fd, res-&gt;ai_addr, res-&gt;ai_addrlen) != 0) __err_connect(&quot;connect&quot;);
	freeaddrinfo(res);
	return fd;
}
#else
/* In windows, the first thing is to establish the TCP connection. */
int knet_win32_init()
{
	WSADATA wsaData;
	return WSAStartup(MAKEWORD(2, 2), &amp;wsaData);
}
void knet_win32_destroy()
{
	WSACleanup();
}
/* A slightly modfied version of the following function also works on
 * Mac (and presummably Linux). However, this function is not stable on
 * my Mac. It sometimes works fine but sometimes does not. Therefore for
 * non-Windows OS, I do not use this one. */
static SOCKET socket_connect(const char *host, const char *port)
{
#define __err_connect(func) do { perror(func); return -1; } while (0)

	int on = 1;
	SOCKET fd;
	struct linger lng = { 0, 0 };
	struct sockaddr_in server;
	struct hostent *hp = 0;
	// open socket
	if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect(&quot;socket&quot;);
	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&amp;on, sizeof(on)) == -1) __err_connect(&quot;setsockopt&quot;);
	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&amp;lng, sizeof(lng)) == -1) __err_connect(&quot;setsockopt&quot;);
	// get host info
	if (isalpha(host[0])) hp = gethostbyname(host);
	else {
		struct in_addr addr;
		addr.s_addr = inet_addr(host);
		hp = gethostbyaddr((char*)&amp;addr, 4, AF_INET);
	}
	if (hp == 0) __err_connect(&quot;gethost&quot;);
	// connect
	server.sin_addr.s_addr = *((unsigned long*)hp-&gt;h_addr);
	server.sin_family= AF_INET;
	server.sin_port = htons(atoi(port));
	if (connect(fd, (struct sockaddr*)&amp;server, sizeof(server)) != 0) __err_connect(&quot;connect&quot;);
	// freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
	return fd;
}
#endif

static off_t my_netread(int fd, void *buf, off_t len)
{
	off_t rest = len, curr, l = 0;
	/* recv() and read() may not read the required length of data with
	 * one call. They have to be called repeatedly. */
	while (rest) {
		if (socket_wait(fd, 1) &lt;= 0) break; // socket is not ready for reading
		curr = netread(fd, buf + l, rest);
		/* According to the glibc manual, section 13.2, a zero returned
		 * value indicates end-of-file (EOF), which should mean that
		 * read() will not return zero if EOF has not been met but data
		 * are not immediately available. */
		if (curr == 0) break;
		l += curr; rest -= curr;
	}
	return l;
}

/*************************
 * FTP specific routines *
 *************************/

static int kftp_get_response(knetFile *ftp)
{
	unsigned char c;
	int n = 0;
	char *p;
	if (socket_wait(ftp-&gt;ctrl_fd, 1) &lt;= 0) return 0;
	while (netread(ftp-&gt;ctrl_fd, &amp;c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
		//fputc(c, stderr);
		if (n &gt;= ftp-&gt;max_response) {
			ftp-&gt;max_response = ftp-&gt;max_response? ftp-&gt;max_response&lt;&lt;1 : 256;
			ftp-&gt;response = realloc(ftp-&gt;response, ftp-&gt;max_response);
		}
		ftp-&gt;response[n++] = c;
		if (c == '\n') {
			if (n &gt;= 4 &amp;&amp; isdigit(ftp-&gt;response[0]) &amp;&amp; isdigit(ftp-&gt;response[1]) &amp;&amp; isdigit(ftp-&gt;response[2])
				&amp;&amp; ftp-&gt;response[3] != '-') break;
			n = 0;
			continue;
		}
	}
	if (n &lt; 2) return -1;
	ftp-&gt;response[n-2] = 0;
	return strtol(ftp-&gt;response, &amp;p, 0);
}

static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
{
	if (socket_wait(ftp-&gt;ctrl_fd, 0) &lt;= 0) return -1; // socket is not ready for writing
	netwrite(ftp-&gt;ctrl_fd, cmd, strlen(cmd));
	return is_get? kftp_get_response(ftp) : 0;
}

static int kftp_pasv_prep(knetFile *ftp)
{
	char *p;
	int v[6];
	kftp_send_cmd(ftp, &quot;PASV\r\n&quot;, 1);
	for (p = ftp-&gt;response; *p &amp;&amp; *p != '('; ++p);
	if (*p != '(') return -1;
	++p;
	sscanf(p, &quot;%d,%d,%d,%d,%d,%d&quot;, &amp;v[0], &amp;v[1], &amp;v[2], &amp;v[3], &amp;v[4], &amp;v[5]);
	memcpy(ftp-&gt;pasv_ip, v, 4 * sizeof(int));
	ftp-&gt;pasv_port = (v[4]&lt;&lt;8&amp;0xff00) + v[5];
	return 0;
}

static int kftp_pasv_connect(knetFile *ftp)
{
	char host[80], port[10];
	if (ftp-&gt;pasv_port == 0) {
		fprintf(stderr, &quot;[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n&quot;);
		return -1;
	}
	sprintf(host, &quot;%d.%d.%d.%d&quot;, ftp-&gt;pasv_ip[0], ftp-&gt;pasv_ip[1], ftp-&gt;pasv_ip[2], ftp-&gt;pasv_ip[3]);
	sprintf(port, &quot;%d&quot;, ftp-&gt;pasv_port);
	ftp-&gt;fd = socket_connect(host, port);
	if (ftp-&gt;fd == -1) return -1;
	return 0;
}

int kftp_connect(knetFile *ftp)
{
	ftp-&gt;ctrl_fd = socket_connect(ftp-&gt;host, ftp-&gt;port);
	if (ftp-&gt;ctrl_fd == -1) return -1;
	kftp_get_response(ftp);
	kftp_send_cmd(ftp, &quot;USER anonymous\r\n&quot;, 1);
	kftp_send_cmd(ftp, &quot;PASS kftp@\r\n&quot;, 1);
	kftp_send_cmd(ftp, &quot;TYPE I\r\n&quot;, 1);
	return 0;
}

int kftp_reconnect(knetFile *ftp)
{
	if (ftp-&gt;ctrl_fd != -1) {
		netclose(ftp-&gt;ctrl_fd);
		ftp-&gt;ctrl_fd = -1;
	}
	netclose(ftp-&gt;fd);
	return kftp_connect(ftp);
}

// initialize -&gt;type, -&gt;host and -&gt;retr
knetFile *kftp_parse_url(const char *fn, const char *mode)
{
	knetFile *fp;
	char *p;
	int l;
	if (strstr(fn, &quot;ftp://&quot;) != fn) return 0;
	for (p = (char*)fn + 6; *p &amp;&amp; *p != '/'; ++p);
	if (*p != '/') return 0;
	l = p - fn - 6;
	fp = calloc(1, sizeof(knetFile));
	fp-&gt;type = KNF_TYPE_FTP;
	fp-&gt;fd = -1;
	/* the Linux/Mac version of socket_connect() also recognizes a port
	 * like &quot;ftp&quot;, but the Windows version does not. */
	fp-&gt;port = strdup(&quot;21&quot;);
	fp-&gt;host = calloc(l + 1, 1);
	if (strchr(mode, 'c')) fp-&gt;no_reconnect = 1;
	strncpy(fp-&gt;host, fn + 6, l);
	fp-&gt;retr = calloc(strlen(p) + 8, 1);
	sprintf(fp-&gt;retr, &quot;RETR %s\r\n&quot;, p);
	fp-&gt;seek_offset = -1;
	return fp;
}
// place -&gt;fd at offset off
int kftp_connect_file(knetFile *fp)
{
	int ret;
	if (fp-&gt;fd != -1) {
		netclose(fp-&gt;fd);
		if (fp-&gt;no_reconnect) kftp_get_response(fp);
	}
	kftp_pasv_prep(fp);
	if (fp-&gt;offset) {
		char tmp[32];
		sprintf(tmp, &quot;REST %lld\r\n&quot;, (long long)fp-&gt;offset);
		kftp_send_cmd(fp, tmp, 1);
	}
	kftp_send_cmd(fp, fp-&gt;retr, 0);
	kftp_pasv_connect(fp);
	ret = kftp_get_response(fp);
	if (ret != 150) {
		fprintf(stderr, &quot;[kftp_connect_file] %s\n&quot;, fp-&gt;response);
		netclose(fp-&gt;fd);
		fp-&gt;fd = -1;
		return -1;
	}
	fp-&gt;is_ready = 1;
	return 0;
}

/**************************
 * HTTP specific routines *
 **************************/

knetFile *khttp_parse_url(const char *fn, const char *mode)
{
	knetFile *fp;
	char *p, *proxy, *q;
	int l;
	if (strstr(fn, &quot;http://&quot;) != fn) return 0;
	// set -&gt;http_host
	for (p = (char*)fn + 7; *p &amp;&amp; *p != '/'; ++p);
	l = p - fn - 7;
	fp = calloc(1, sizeof(knetFile));
	fp-&gt;http_host = calloc(l + 1, 1);
	strncpy(fp-&gt;http_host, fn + 7, l);
	fp-&gt;http_host[l] = 0;
	for (q = fp-&gt;http_host; *q &amp;&amp; *q != ':'; ++q);
	if (*q == ':') *q++ = 0;
	// get http_proxy
	proxy = getenv(&quot;http_proxy&quot;);
	// set -&gt;host, -&gt;port and -&gt;path
	if (proxy == 0) {
		fp-&gt;host = strdup(fp-&gt;http_host); // when there is no proxy, server name is identical to http_host name.
		fp-&gt;port = strdup(*q? q : &quot;80&quot;);
		fp-&gt;path = strdup(*p? p : &quot;/&quot;);
	} else {
		fp-&gt;host = (strstr(proxy, &quot;http://&quot;) == proxy)? strdup(proxy + 7) : strdup(proxy);
		for (q = fp-&gt;host; *q &amp;&amp; *q != ':'; ++q);
		if (*q == ':') *q++ = 0;
		fp-&gt;port = strdup(*q? q : &quot;80&quot;);
		fp-&gt;path = strdup(fn);
	}
	fp-&gt;type = KNF_TYPE_HTTP;
	fp-&gt;ctrl_fd = fp-&gt;fd = -1;
	fp-&gt;seek_offset = -1;
	return fp;
}

int khttp_connect_file(knetFile *fp)
{
	int ret, l = 0;
	char *buf, *p;
	if (fp-&gt;fd != -1) netclose(fp-&gt;fd);
	fp-&gt;fd = socket_connect(fp-&gt;host, fp-&gt;port);
	buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
	l += sprintf(buf + l, &quot;GET %s HTTP/1.0\r\nHost: %s\r\n&quot;, fp-&gt;path, fp-&gt;http_host);
	if (fp-&gt;offset)
		l += sprintf(buf + l, &quot;Range: bytes=%lld-\r\n&quot;, (long long)fp-&gt;offset);
	l += sprintf(buf + l, &quot;\r\n&quot;);
	netwrite(fp-&gt;fd, buf, l);
	l = 0;
	while (netread(fp-&gt;fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
		if (buf[l] == '\n' &amp;&amp; l &gt;= 3)
			if (strncmp(buf + l - 3, &quot;\r\n\r\n&quot;, 4) == 0) break;
		++l;
	}
	buf[l] = 0;
	if (l &lt; 14) { // prematured header
		netclose(fp-&gt;fd);
		fp-&gt;fd = -1;
		return -1;
	}
	ret = strtol(buf + 8, &amp;p, 0); // HTTP return code
	if (ret == 200 &amp;&amp; fp-&gt;offset) { // 200 (complete result); then skip beginning of the file
		off_t rest = fp-&gt;offset;
		while (rest) {
			off_t l = rest &lt; 0x10000? rest : 0x10000;
			rest -= my_netread(fp-&gt;fd, buf, l);
		}
	} else if (ret != 206 &amp;&amp; ret != 200) {
		free(buf);
		fprintf(stderr, &quot;[khttp_connect_file] fail to open file (HTTP code: %d).\n&quot;, ret);
		netclose(fp-&gt;fd);
		fp-&gt;fd = -1;
		return -1;
	}
	free(buf);
	fp-&gt;is_ready = 1;
	return 0;
}

/********************
 * Generic routines *
 ********************/

knetFile *knet_open(const char *fn, const char *mode)
{
	knetFile *fp = 0;
	if (mode[0] != 'r') {
		fprintf(stderr, &quot;[kftp_open] only mode \&quot;r\&quot; is supported.\n&quot;);
		return 0;
	}
	if (strstr(fn, &quot;ftp://&quot;) == fn) {
		fp = kftp_parse_url(fn, mode);
		if (fp == 0) return 0;
		if (kftp_connect(fp) == -1) {
			knet_close(fp);
			return 0;
		}
		kftp_connect_file(fp);
	} else if (strstr(fn, &quot;http://&quot;) == fn) {
		fp = khttp_parse_url(fn, mode);
		if (fp == 0) return 0;
		khttp_connect_file(fp);
	} else { // local file
#ifdef _WIN32
		/* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
		 * be undefined on some systems, although it is defined on my
		 * Mac and the Linux I have tested on. */
		int fd = open(fn, O_RDONLY | O_BINARY);
#else
		int fd = open(fn, O_RDONLY);
#endif
		if (fd == -1) {
			perror(&quot;open&quot;);
			return 0;
		}
		fp = (knetFile*)calloc(1, sizeof(knetFile));
		fp-&gt;type = KNF_TYPE_LOCAL;
		fp-&gt;fd = fd;
		fp-&gt;ctrl_fd = -1;
	}
	if (fp &amp;&amp; fp-&gt;fd == -1) {
		knet_close(fp);
		return 0;
	}
	return fp;
}

knetFile *knet_dopen(int fd, const char *mode)
{
	knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
	fp-&gt;type = KNF_TYPE_LOCAL;
	fp-&gt;fd = fd;
	return fp;
}

off_t knet_read(knetFile *fp, void *buf, off_t len)
{
	off_t l = 0;
	if (fp-&gt;fd == -1) return 0;
	if (fp-&gt;type == KNF_TYPE_FTP) {
		if (fp-&gt;is_ready == 0) {
			if (!fp-&gt;no_reconnect) kftp_reconnect(fp);
			kftp_connect_file(fp);
		}
	} else if (fp-&gt;type == KNF_TYPE_HTTP) {
		if (fp-&gt;is_ready == 0)
			khttp_connect_file(fp);
	}
	if (fp-&gt;type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
		off_t rest = len, curr;
		while (rest) {
			curr = read(fp-&gt;fd, buf + l, rest);
			if (curr == 0) break;
			l += curr; rest -= curr;
		}
	} else l = my_netread(fp-&gt;fd, buf, len);
	fp-&gt;offset += l;
	return l;
}

int knet_seek(knetFile *fp, off_t off, int whence)
{
	if (whence == SEEK_SET &amp;&amp; off == fp-&gt;offset) return 0;
	if (fp-&gt;type == KNF_TYPE_LOCAL) {
		/* Be aware that lseek() returns the offset after seeking,
		 * while fseek() returns zero on success. */
		off_t offset = lseek(fp-&gt;fd, off, whence);
		if (offset == -1) {
			perror(&quot;lseek&quot;);
			return -1;
		}
		fp-&gt;offset = offset;
		return 0;
	} else if (fp-&gt;type == KNF_TYPE_FTP || fp-&gt;type == KNF_TYPE_HTTP) {
		if (whence != SEEK_SET) { // FIXME: we can surely allow SEEK_CUR and SEEK_END in future
			fprintf(stderr, &quot;[knet_seek] only SEEK_SET is supported for FTP/HTTP. Offset is unchanged.\n&quot;);
			return -1;
		}
		fp-&gt;offset = off;
		fp-&gt;is_ready = 0;
		return 0;
	}
	return -1;
}

int knet_close(knetFile *fp)
{
	if (fp == 0) return 0;
	if (fp-&gt;ctrl_fd != -1) netclose(fp-&gt;ctrl_fd); // FTP specific
	if (fp-&gt;fd != -1) {
		/* On Linux/Mac, netclose() is an alias of close(), but on
		 * Windows, it is an alias of closesocket(). */
		if (fp-&gt;type == KNF_TYPE_LOCAL) close(fp-&gt;fd);
		else netclose(fp-&gt;fd);
	}
	free(fp-&gt;host); free(fp-&gt;port);
	free(fp-&gt;response); free(fp-&gt;retr); // FTP specific
	free(fp-&gt;path); free(fp-&gt;http_host); // HTTP specific
	free(fp);
	return 0;
}
</pre>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/648/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/648/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/648/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/648/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/648/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/648/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/648/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/648/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/648/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/648/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=648&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2009/08/02/read-files-on-ftphttp/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
		<item>
		<title>The rde::hash_map Hash Library</title>
		<link>http://attractivechaos.wordpress.com/2008/10/13/the-rdehash_map-hash-library/</link>
		<comments>http://attractivechaos.wordpress.com/2008/10/13/the-rdehash_map-hash-library/#comments</comments>
		<pubDate>Mon, 13 Oct 2008 10:31:27 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=643</guid>
		<description><![CDATA[Recently I have put rde::hash_map, which implements an open addressing hash table with linear probing, to my benchmark. It is surprisingly fast. Studying its source codes reveals that it achieves the speed by caching the hash values and by reducing cache misses. Both ways increase the memory footprint.
In most of hash libraries, the hash value [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=643&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>Recently I have put rde::hash_map, which implements an open addressing hash table with linear probing, to <a href="http://attractivechaos.wordpress.com/2008/10/07/another-look-at-my-old-benchmark/">my benchmark</a>. It is surprisingly fast. Studying its source codes reveals that it achieves the speed by caching the hash values and by reducing cache misses. Both ways increase the memory footprint.</p>
<p>In most of hash libraries, the hash value is hash(key)%n_buckets, which only contains log2(n_buckets) bits of information. In contrast, rde::hash_map stores the hash(key). Comparison between keys is first performed between hashes and then between keys. As a hash contains 30-bit information in rdestl, distinct keys can rarely have an identical hash, which saves a lot of calls to calculating hashes and will gain a big speedup on complex keys such as strings. However, storing the hash require additional 4 bytes, which can cost more given improperly aligned memory.</p>
<p>As rde::hash_map requires additional 4 bytes in each bucket, it is able to mark a bucket as being empty or deleted with two bits of these 4 bytes, instead of using a flag array as is in my khash or using special keys as is in google dense hash table. As a result, rde::hash_map avoids the cache miss in khash AND the expensive key comparisons in google dense hash table, which makes it faster than both libraries on both integer and string keys. Also, like google dense hash_map, rde::hash_map pack key and value in a struct instead of putting keys and values in two arrays as is in khash. This also saves a cache miss on data retrieval. However, packing key and value may bring memory overhead when the key type and the value type are unaligned in memory. Please see <a href="http://attractivechaos.wordpress.com/2008/09/12/the-google-hash-table-library/">my old post</a> for further discussions.</p>
<p>Interestingly, rde::hash_map chose linear probing. I used to try linear probing in my khash and it deliverse slower speed than double hashing even on random input. Maybe I should try harder?</p>
<p>In conclusion, rde::hash_map achieves high speed at the cost of high memory consumption. It is a good candidate if you do not care too much about memory. My khash library first aims at a library with small memory foot print and then at speed. I would not be surprised to see it is not the fastest hash table library.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/643/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/643/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/643/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/643/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/643/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/643/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/643/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/643/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/643/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/643/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=643&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2008/10/13/the-rdehash_map-hash-library/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
		<item>
		<title>A Generic Buffered Stream Wrapper</title>
		<link>http://attractivechaos.wordpress.com/2008/10/11/a-generic-buffered-stream-wrapper/</link>
		<comments>http://attractivechaos.wordpress.com/2008/10/11/a-generic-buffered-stream-wrapper/#comments</comments>
		<pubDate>Sat, 11 Oct 2008 21:20:29 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[development]]></category>
		<category><![CDATA[C]]></category>
		<category><![CDATA[myprog]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=629</guid>
		<description><![CDATA[In C programming, the main difference between low-level I/O functions (open/close/read/write) and stream-level I/O functions (fopen/fclose/fread/fwrite) is that stream-level functions are buffered. Presumably, low-level I/O functions will incur a disk operation on each read(). Although the kernel may cache this, we cannot rely too much on it. Disk operations are expensive and so low-level I/O [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=629&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>In C programming, the main difference between low-level I/O functions (open/close/read/write) and stream-level I/O functions (fopen/fclose/fread/fwrite) is that stream-level functions are buffered. Presumably, low-level I/O functions will incur a disk operation on each read(). Although the kernel may cache this, we cannot rely too much on it. Disk operations are expensive and so low-level I/O does not provide fgetc equivalent.</p>
<p>Stream-level I/O functions have a buffer. On reading, they load a block of data from disk to memory. If at a fgetc() call the data have been retrieved to memory, it will not incur a disk operation, which greatly improves the efficiency.</p>
<p>Stream-level I/O functions are part of the standard C library. Why do we need a new wrapper? Three reasons. First, when you work with an alternative I/O library (such as zlib or libbzip2) which do not come with buffered I/O routines, you probably need a buffered wrapper to make your code efficient. Second, using a generic wrapper makes your code more flexible when you want to change the type of input stream. For example, you may want to write a parser that works on a normal stream, a zlib-compressed stream and on a C string. Using a unified stream wrapper will simplify coding. Third, my feeling is most of steam-level I/O functions in stdio.h are not conventient given that they cannot enlarge a string automatically. In a lot of cases, I need to read one line but I do not know how long a line can be. Managing this case is not so hard, but doing this again and again is boring.</p>
<p>In the end, I come up with my own buffered wrapper for input streams. It is generic in that it works on all types of I/O steams with a read() call (or equivalent), or even on a C string. I show an example here without much explanation. I may expand this post in future. Source codes can be found in my <a href="http://attractivechaos.wordpress.com/programs/">programs page</a>.</p>
<pre class="brush: cpp;">
#include &lt;fcntl.h&gt;
#include &lt;unistd.h&gt;
#include &lt;stdio.h&gt;
#include &lt;stdlib.h&gt;
#include &quot;kstream.h&quot;
// arguments: type of the stream handler,
//   function to read a block, size of the buffer
KSTREAM_INIT(int, read, 10)

int main()
{
	int fd;
	kstream_t *ks;
	kstring_t str;
	bzero(&amp;str, sizeof(kstring_t));
	fd = open(&quot;kstream.h&quot;, O_RDONLY);
	ks = ks_init(fd);
	while (ks_getuntil(ks, '\n', &amp;str, 0) &gt;= 0)
		printf(&quot;%s\n&quot;, str.s);
	ks_destroy(ks);
	free(str.s);
	close(fd);
	return 0;
}
</pre>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/629/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/629/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/629/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/629/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/629/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/629/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/629/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/629/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/629/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/629/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=629&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2008/10/11/a-generic-buffered-stream-wrapper/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
		<item>
		<title>Another Look at my old Benchmark</title>
		<link>http://attractivechaos.wordpress.com/2008/10/07/another-look-at-my-old-benchmark/</link>
		<comments>http://attractivechaos.wordpress.com/2008/10/07/another-look-at-my-old-benchmark/#comments</comments>
		<pubDate>Tue, 07 Oct 2008 12:02:47 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[development]]></category>
		<category><![CDATA[benchmark]]></category>
		<category><![CDATA[C]]></category>
		<category><![CDATA[cpp]]></category>
		<category><![CDATA[myprog]]></category>
		<category><![CDATA[programming]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=586</guid>
		<description><![CDATA[This is a follow-up of my previous post. Here I change the table to several charts. Hope it seems more friendly to readers. You can find the links to these libraries in that table. Their source codes, including my testing code, are available here. You may also want to see my previous posts in the [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=586&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>This is a follow-up of my previous post. Here I change <a href="http://attractivechaos.awardspace.com/udb.html">the table</a> to several charts. Hope it seems more friendly to readers. You can find the links to these libraries in that table. Their source codes, including my testing code, are available <a href="http://attractivechaos.awardspace.com/download/udb-latest.tar.bz2">here</a>. You may also want to see my previous posts in the last few days for my interpretation to the results.</p>
<p>On C string (char*) keys, I fail to use JE_rb_old and JE_rb_new to get the correct result on Mac and so they are not showed in the charts. I would really appreciate if someone may give me the correct implementation using these libraries. In addition, tr1_unordered_map uses a lot of memory according to my program. The memory for string keys are faked.</p>
<p>For conveniece, here are some brief descriptions of these libraries (with no order):</p>
<ul>
<li>google_dense and google_sparse: <a href="http://code.google.com/p/google-sparsehash/">google&#8217;s sparsehash library</a>. Google_dense is fast but memory hungery while google_sparse is the opposite.</li>
<li>sgi_hash_map and sgi_map: <a href="http://www.sgi.com/tech/stl/">SGI&#8217;s STL</a> that comes with g++-4. The backend of sgi_map is a three-pointer red-black tree.</li>
<li>tr1::unordered_map: GCC&#8217;s TR1 library that comes with g++-4. It implements a hash table.</li>
<li>rdestl::hash_map: from <a href="http://code.google.com/p/rdestl/">RDESTL</a>, another implementation of STL.</li>
<li><a href="http://uthash.sourceforge.net/">uthash</a>: a hash library in C</li>
<li>JG_btree: <a href="http://resnet.uoregon.edu/~gurney_j/jmpc/btree.html">John-Mark Gurney&#8217;s btree library</a>.</li>
<li>JE_rb_new, JE_rb_old, JE_trp_hash and JE_trp_prng: <a href="http://www.canonware.com/~ttt/2008/07/treaps-versus-red-black-trees.html">Jason Evans&#8217; binary search tree libraries</a>. JE_rb_new implements a left-leaning red-black tree; JE_rb_old a three-pointer red-black tree; both JE_trp_hash and JE_trp_prng implement treaps but with different strategies on randomness.</li>
<li>libavl_rb, libavl_prb, libavl_avl and libavl_bst: from <a href="http://www.stanford.edu/~blp/avl/">GNU libavl</a>. They implment a two-pointer red-black tree, a three-pointer red-black tree, an AVL tree and a unbalanced binary search tree, respectively.</li>
<li>NP_rbtree and NP_splaytree: <a href="http://www.freebsd.org/cgi/cvsweb.cgi/src/sys/sys/tree.h">Niels Provos&#8217; tree library</a> for FreeBSD. A three-pointer red-black tree and a splay tree.</li>
<li>TN_rbtree: <a href="http://www.darkridge.com/~jpr5/archive/alg/node21.html">Thomas Niemann&#8217;s red-black tree</a>. I ported it to C++.</li>
<li>sglib_rbtree: from <a href="http://sglib.sourceforge.net/">SGLIB</a>. It implements a two-pointer recursive red-black tree (all the other binary search trees are implemented without recursion).</li>
<li>libavl_avl_cpp, libavl_rb_cpp and libavl_rb_cpp2: incomplete C++ version of libavl (no iterator), ported by me. Libavl_rb_cpp2 further uses the same technique in JE_rb_new to save the color bit. Source codes available in the package.</li>
<li><a href="http://attractivechaos.awardspace.com/khash.h.html">khash</a> and <a href="http://attractivechaos.awardspace.com/kbtree.h.html">kbtree</a>: my hash table and B-tree implementation. kbtree is based on JG_rbtree.</li>
</ul>
<p><a href="http://klib.sourceforge.net/images/udb-int-cpu.png"><img class="alignnone size-full wp-image-622" title="udb-int-cpu" src="http://klib.sourceforge.net/images/udb-int-cpu.png" alt="" width="542" height="309" /></a></p>
<p><a href="http://klib.sourceforge.net/images/udb-int-mem.png"><img class="alignnone size-full wp-image-623" title="udb-int-mem" src="http://klib.sourceforge.net/images/udb-int-mem.png" alt="" width="542" height="309" /></a></p>
<p><a href="http://klib.sourceforge.net/images/udb-str-cpu.png"><img class="alignnone size-full wp-image-624" title="udb-str-cpu" src="http://klib.sourceforge.net/images/udb-str-cpu.png" alt="" width="542" height="308" /></a></p>
<p><a href="http://attractivechaos.files.wordpress.com/2008/10/udb-str-mem.png"><img class="alignnone size-full wp-image-625" title="udb-str-mem" src="http://klib.sourceforge.net/images/udb-str-mem.png" alt="" width="543" height="309" /></a></p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/586/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/586/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/586/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/586/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/586/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/586/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/586/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/586/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/586/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/586/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=586&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2008/10/07/another-look-at-my-old-benchmark/feed/</wfw:commentRss>
		<slash:comments>24</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>

		<media:content url="http://klib.sourceforge.net/images/udb-int-cpu.png" medium="image">
			<media:title type="html">udb-int-cpu</media:title>
		</media:content>

		<media:content url="http://klib.sourceforge.net/images/udb-int-mem.png" medium="image">
			<media:title type="html">udb-int-mem</media:title>
		</media:content>

		<media:content url="http://klib.sourceforge.net/images/udb-str-cpu.png" medium="image">
			<media:title type="html">udb-str-cpu</media:title>
		</media:content>

		<media:content url="http://klib.sourceforge.net/images/udb-str-mem.png" medium="image">
			<media:title type="html">udb-str-mem</media:title>
		</media:content>
	</item>
		<item>
		<title>Comparison of Binary Search Trees (BSTs)</title>
		<link>http://attractivechaos.wordpress.com/2008/10/02/comparison-of-binary-search-trees/</link>
		<comments>http://attractivechaos.wordpress.com/2008/10/02/comparison-of-binary-search-trees/#comments</comments>
		<pubDate>Thu, 02 Oct 2008 21:20:23 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=565</guid>
		<description><![CDATA[Again, this post is a follow-up of this page. Source code is available here.
AVL Tree vs. Red-Black Tree
If you google &#8220;avl vs. red-black&#8221;, the first returned page will largely answer your question. In summary, the height of an AVL tree is at most ~1.44log(N), lower than the maximum height of a red-black tree, ~2log(N). However, [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=565&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>Again, this post is a follow-up of <a href="http://attractivechaos.awardspace.com/udb.html">this page</a>. Source code is available <a href="http://attractivechaos.awardspace.com/download/udb-20080928.tar.bz2">here</a>.</p>
<p><strong>AVL Tree vs. Red-Black Tree</strong></p>
<p>If you google &#8220;avl vs. red-black&#8221;, the first returned page will largely answer your question. In summary, the height of an AVL tree is at most ~1.44log(N), lower than the maximum height of a red-black tree, ~2log(N). However, upon insertion, AVL tree may need O(log(N)) operations to rebalance the tree, but red-black tree requires O(1). This means AVL tree is probably faster on searching but slower on insertion. In addition, red-black tree is easier to be implemented as a <a href="http://en.wikipedia.org/wiki/Persistent_data_structure">persistent data structure</a>.</p>
<p>Practical analysis is not so clear as theoretical analysis. I think AVL and red-black may have quite similar overall performance. Firstly, although the maximum possible height of a AVL tree is better, both AVL and red-black trees are probably well balanced without reaching the maximum height frequently. Secondly, although AVL tree has an O(log(N)) loop upon insertion, it is a very tight loop with a small constant, while red-black tree may have a loop of small size with big constant (amortized O(1), though). I am not sure whether red-black tree really wins on insertion.</p>
<p>In <a href="http://stlavlmap.sourceforge.net/">Daniel&#8217;s benchmark</a>, AVL tree and red-black tree have quite similar speed (in consideration that Daniel implemented AVL tree). In my benchmark, libavl&#8217;s red-black tree and AVL tree also deliver similar speed.</p>
<p>On memory, a standard AVL tree requires two pointers and one balancing factor at each node, while a red-black tree can be implemented using two or three pointers plus a color bit (either the balancing factor or the color bit will cost the size of a pointer due to memory alignment). A red-black tree with two pointers is usually slower than a red-black tree with three pointers (libavl_rb_cpp vs. NP_rbtree), but the difference is marginal. In my benchmark, a red-black tree with two pointers has pretty similar performance in comparison to an AVL tree (libavl_rb_cpp vs. libavl_avl_cpp).</p>
<p>In all, my opinion is AVL and red-black are very similar in many aspects. Which to use does not matter too much if you are not implementing a persistent data structure.</p>
<p><strong>Splay Tree vs. AVL/RB Tree</strong></p>
<p>Search on a splay tree may be O(N) if elements are inserted in order. This &#8220;feature&#8221; alone pushes me away. Although wiki says it is possible to avoid the worst case, I am not aware of a practical implementation. On random input, splay tree is also slower than AVL and red-black trees.</p>
<p>One of the advantages of a splay tree is it only requires two pointers with no additional information. This is helpful when memory is critical, but I would rather use a modified red-black to achieve the same memory footprint. Another advantage of splay tree is it is self-adjusting. I have not tested this in my benchmark.</p>
<p><strong>Treap v. AVL/RB Tree</strong></p>
<p>I do not know much about treap. It seems to me treap does not guarantee O(log(N)) search. Fortunately, in practice the worst O(N) case should almost never happen due to the random behaviour of a treap. In my benchmark, treap is slower than splay tree on random input. Probably it is not the best choice.</p>
<p><strong>Implementations</strong></p>
<p>As to red-black tree, SGI STL&#8217;s set/map is really a good one. You can find other C++ implementations (TN_rbtree libavl_rb_cpp) in my benchmark. They are also efficient, but incomplete. Libavl_rb_cpp is adapted from libavl_rb which only uses two pointers. NP_rbtree (FreeBSD&#8217;s tree.h) is the best choice for a C programmer.</p>
<p>As to AVL tree, I am not aware of good AVL implementations in either C or C++. Stlavlmap is supposed to be efficient, but I cannot compile it. WK_avl may be a good library, judged from its source code, but it is not easy to use. GM_avl uses recursion and so cannot be efficient. Libavl_avl_cpp is my ported version of libavl_avl. It is efficient but incomplete. You may finish it if you really like to try an AVL tree. BTW, libavl is in fact a very high-quality library, but using void* makes it inferior to others. Someone should improve its API using C macros or C++ template.</p>
<p><strong>Concluding Remarks</strong></p>
<p>For general purpose, red-black tree might still be the best choice. It is among the fastest binary search trees and more easily adapted to a persistent data structure. AVL tree is equally good when persistence is not needed (well, all my applications do not require persistence). At last, do not forget my previous post: in-memory B-tree can be both faster and more light-weighted than red-black/AVL trees.</p>
<p><strong>Update</strong></p>
<p><a href="http://www.sambal.org/2008-10/stl-lower_bound-set-vs-list/">Sambal</a> told me that on insertion, red-black only guarantees amortized O(1). The worst case can be O(log(N)) with large constant. Then the advantage of red-black tree on insertion is less obvious.</p>
<p>Update the claim about the memory usage of AVL and red-black tree.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/565/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/565/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/565/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=565&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2008/10/02/comparison-of-binary-search-trees/feed/</wfw:commentRss>
		<slash:comments>10</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
		<item>
		<title>Using void* in Generic C Programming may be Inefficient</title>
		<link>http://attractivechaos.wordpress.com/2008/10/02/using-void-in-generic-c-programming-may-be-inefficient/</link>
		<comments>http://attractivechaos.wordpress.com/2008/10/02/using-void-in-generic-c-programming-may-be-inefficient/#comments</comments>
		<pubDate>Thu, 02 Oct 2008 16:03:40 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[Uncategorized]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=550</guid>
		<description><![CDATA[I have done a more thorough benchmark of various hash table and search tree implementations. I believe it is the best page from my blog so far. Unfortunately, few people have looked at this page according to the wordpress statistics. Possibly putting too much information in one page without enough explanations has scared people; or [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=550&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>I have done a more thorough benchmark of various hash table and search tree implementations. I believe it is the <strong>best page</strong> from my blog so far. Unfortunately, few people have looked at this page according to the wordpress statistics. Possibly putting too much information in one page without enough explanations has scared people; or the page is posted on another domain and people do not like to follow the link. Anyway, it might be good to discuss further in my wordpress blog. I will write a series of posts to extend that page. Please look at <a href="http://attractivechaos.awardspace.com/udb.html">here</a> for the whole page and <a href="http://attractivechaos.awardspace.com/download/udb-20080928.tar.bz2">here</a> for the source code.</p>
<p>Let&#8217;s start this series with generic programming in C.</p>
<p>We usualy use void* to implement generic containers in C. To do in this way, we need to pay an overhead on retrieving data pointed by a void* pointer. We often, but not always, need a function call for operations on void* data, such as copying, comparing or hashing. This adds additional overhead on speed. Furthermore, if the size of an object is small, we would rather put the whole object in the container instead of wasting the memory for an addition pointer: using void* may also bring memory overhead.</p>
<p>To see how large this void* effect is, I ported libavl&#8217;s AVL tree code from C to C++ (source code is available in the download link showed above). I just honestly translated void* to C++ template without applying any additional optimizations (well, Ben Pfaff does not leave much room for optimization, either). As you can see from <a href="http://attractivechaos.awardspace.com/udb.html">the table</a>, my C++ template version is both faster and more light-weighted than the original C version. The conclusion is: using void* in this way may be inefficient.</p>
<p>There is another different, but similar, way to use void*. In implementing sort, B-trees or open addressing hash tables, we use arrays. It is possible to put an object itself in a void* array instead of putting its pointer into the array. We can do most of operations when we know the size of each object. For example, we can implement swap in this way:</p>
<pre class="brush: cpp;">
void swap(void *p, int obj_size, int i, int j) {
  void *tmp = malloc(obj_size); // for efficiency, we can preallocate tmp
  memcpy(tmp, p+obj_size*i, obj_size);
  memcpy(p+obj_size*i, p+obj_size*j, obj_size);
  memcpy(p+obj_size*j, tmp, obj_size);
  free(tmp);
}
</pre>
<p>and even implement a whole sorting algorithm with similar ideas. I believe libc&#8217;s qsort is implemented in this way. However, as we can see, we need to call memcpy() on each assignment. On large memory, memcpy() is probably efficient with the help of vectorization, but on small memory, it is slower than direct assignment as we have to retrieve obj_size and memcpy() does not know what obj_size could be. Also, as the compiler does not know what obj_size is at the compiling time, the compiler cannot optimize obj_size*50 to something like 50&lt;&lt;2 even if we know obj_size=4. In all, using void* in this way will not have overhead on memory, but will have on speed. This is why libc&#8217;s qsort is always slower than STL&#8217;s or my sorting implementations (see my Comparison of Internal Sorting Algorithms).</p>
<p>Here is the lesson. If we care about efficiency, using void* to achieve generic programming in C is a bad idea. C++ template does much better at this point and causes almost no overhead on instantiation. If we want to stick to C, I would suggest switching to macros as are used in my khash.h/kbtree.h/ksort.h (I learned this from FreeBSD&#8217;s tree.h). C macros mimics C++ template and does not cause more overhead than C++ template. However, C macros are hard to read or write. That is why I ported libavl&#8217;s AVL implementation to a C++ template instead of to C macros. It would be a pain.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/550/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/550/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/550/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/550/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/550/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/550/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/550/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/550/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/550/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/550/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=550&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2008/10/02/using-void-in-generic-c-programming-may-be-inefficient/feed/</wfw:commentRss>
		<slash:comments>5</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
		<item>
		<title>Is There an Overhead to Retrieve an Element in a Struct?</title>
		<link>http://attractivechaos.wordpress.com/2008/10/01/is-there-an-overhead-to-retrieve-an-element-in-a-struct/</link>
		<comments>http://attractivechaos.wordpress.com/2008/10/01/is-there-an-overhead-to-retrieve-an-element-in-a-struct/#comments</comments>
		<pubDate>Wed, 01 Oct 2008 20:47:33 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[development]]></category>
		<category><![CDATA[benchmark]]></category>
		<category><![CDATA[C]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=541</guid>
		<description><![CDATA[I was wondering whether retrieving an element in a struct will incur additional overhead. And so I did the following experiment. Here the same array is sorted in two ways: with or without data retrieving from a struct. Both ways yield identical results. The question is whether the compiler knows the two ways are the [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=541&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>I was wondering whether retrieving an element in a struct will incur additional overhead. And so I did the following experiment. Here the same array is sorted in two ways: with or without data retrieving from a struct. Both ways yield identical results. The question is whether the compiler knows the two ways are the same and can achieve the same efficiency.</p>
<p>#include <time.h><br />
#include <stdlib.h><br />
#include <stdio.h><br />
#include &#8220;ksort.h&#8221;</p>
<p>typedef struct {<br />
	int a;<br />
} myint_t;</p>
<p>#define myint_lt(_a, _b) ((_a).a < (_b).a)</p>
<p>KSORT_INIT_GENERIC(int)<br />
KSORT_INIT(my, myint_t, myint_lt)</p>
<p>int main()<br />
{<br />
	int i, N = 10000000;<br />
	myint_t *a;<br />
	clock_t t;<br />
	a = (myint_t*)malloc(sizeof(myint_t) * N);<br />
	srand48(11);<br />
	for (i = 0; i != N; ++i) a[i].a = lrand48();<br />
	t = clock();<br />
	ks_introsort(int, N, (int*)a);<br />
	printf(&#8220;%.3lf\n&#8221;, (double)(clock() &#8211; t) / CLOCKS_PER_SEC);<br />
	srand48(11);<br />
	for (i = 0; i != N; ++i) a[i].a = lrand48();<br />
	t = clock();<br />
	ks_introsort(my, N, a);<br />
	printf(&#8220;%.3lf\n&#8221;, (double)(clock() &#8211; t) / CLOCKS_PER_SEC);<br />
	free(a);<br />
	return 0;<br />
}</p>
<p>Here is the speed with different compilers on different CPUs (first value for without data retrieving and second with):</p>
<ul>
<li>Mac-Intel, gcc-4.0, -O2: 1.422 sec vs. 1.802 sec</li>
<li>Mac-Intel, gcc-4.2, -O2: 1.438 vs. 1.567</li>
<li>Mac-Intel, gcc-4.0, -O2 -fomit-frame-pointer: 1.425 vs. 1.675</li>
<li>Mac-Intel, gcc-4.2, -O2 -fomit-frame-pointer: 1.438 vs. 1.448</li>
<li>Linux-Intel, gcc-4.1, -O2: 1.600 vs. 1.520</li>
<li>Linux-Intel, gcc-4.1, -O2 -fomit-frame-pointer: 1.620 vs. 1.530</li>
<li>Linux-Intel, icc, -O2 -fomit-frame-pointer: 1.600 vs. 1.580</li>
</ul>
<p>The conclusion is retrieving data from a struct may have marginal overhead in comprison to direct data access. However, a good compiler can avoid this and produce nearly optimal machine code. Using &#8220;-fomit-frame-pointer&#8221; may help for some machines, but not for others. In addition, it is a bit surprising to me that gcc-linux generates faster code for data retrieval in a struct. Swapping the two ways does not change the conclusion.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/541/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/541/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/541/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/541/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/541/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/541/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/541/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/541/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/541/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/541/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=541&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2008/10/01/is-there-an-overhead-to-retrieve-an-element-in-a-struct/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
		<item>
		<title>Futher Discussion on Search Trees</title>
		<link>http://attractivechaos.wordpress.com/2008/09/28/futher-discussion-on-search-trees/</link>
		<comments>http://attractivechaos.wordpress.com/2008/09/28/futher-discussion-on-search-trees/#comments</comments>
		<pubDate>Sun, 28 Sep 2008 21:23:28 +0000</pubDate>
		<dc:creator>attractivechaos</dc:creator>
				<category><![CDATA[development]]></category>
		<category><![CDATA[benchmark]]></category>
		<category><![CDATA[C]]></category>
		<category><![CDATA[cpp]]></category>
		<category><![CDATA[myprog]]></category>
		<category><![CDATA[programming]]></category>

		<guid isPermaLink="false">http://attractivechaos.wordpress.com/?p=535</guid>
		<description><![CDATA[Over the weekend, I have done a more comprehensive benchmark of various libraries on search trees. Two AVL, seven red-black tree, one Splay tree, two treap implementations are involved, together with seven hash table libraries. As I need to present a big table, I have to write it in a free-style HTML page. You can [...]<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=535&subd=attractivechaos&ref=&feed=1" />]]></description>
			<content:encoded><![CDATA[<div class='snap_preview'><br /><p>Over the weekend, I have done a more comprehensive benchmark of various libraries on search trees. Two AVL, seven red-black tree, one Splay tree, two treap implementations are involved, together with seven hash table libraries. As I need to present a big table, I have to write it in a free-style HTML page. You can find the complete benchmark <a href="http://attractivechaos.awardspace.com/udb.html">here</a> and all the source codes <a href="http://attractivechaos.awardspace.com/download/udb-20080928.tar.bz2">here</a>. I only copy the &#8220;concluding remarks&#8221; in the benchmark page as follows:</p>
<ul>
<li>Hash table is preferred over search trees if we do not require order.</li>
<li>In applications similar to my example, B-tree is better than most of binary search trees in terms of both speed and memory.</li>
<li>AVL tree and red-black tree are the best general-purposed BSTs. They are very close in efficiency.</li>
<li>For pure C libraries, using macros is usually more efficient than using void* to achieve generic programming.</li>
</ul>
<p>You can find the result and much more discussions in <a href="http://attractivechaos.awardspace.com/udb.html">that page</a>. If you think the source codes or the design of benchmark can be improved, please leave comments here or send me E-mail. In addition, I failed to use several libraries and so you can see some blank in the table. I would also appreciate if someone could show me how to use those libraries correctly.</p>
  <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gocomments/attractivechaos.wordpress.com/535/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/comments/attractivechaos.wordpress.com/535/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godelicious/attractivechaos.wordpress.com/535/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/delicious/attractivechaos.wordpress.com/535/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/gostumble/attractivechaos.wordpress.com/535/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/stumble/attractivechaos.wordpress.com/535/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/godigg/attractivechaos.wordpress.com/535/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/digg/attractivechaos.wordpress.com/535/" /></a> <a rel="nofollow" href="http://feeds.wordpress.com/1.0/goreddit/attractivechaos.wordpress.com/535/"><img alt="" border="0" src="http://feeds.wordpress.com/1.0/reddit/attractivechaos.wordpress.com/535/" /></a> <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=attractivechaos.wordpress.com&blog=4545823&post=535&subd=attractivechaos&ref=&feed=1" /></div>]]></content:encoded>
			<wfw:commentRss>http://attractivechaos.wordpress.com/2008/09/28/futher-discussion-on-search-trees/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:content url="http://0.gravatar.com/avatar/047ebc7bb9ff37a0da844413856e92cb?s=96&#38;d=identicon&#38;r=G" medium="image">
			<media:title type="html">attractivechaos</media:title>
		</media:content>
	</item>
	</channel>
</rss>