/* ----------------------------------------------------------------- */ /* Copyright (c) 2007 by Richard Harter */ /* */ /* Permission is hereby granted, free of charge, to any person */ /* obtaining a copy of this software and associated documentation */ /* files (the "Software"), to deal in the Software without */ /* restriction, including without limitation the rights to use, */ /* copy, modify, merge, publish, distribute, sublicense, and/or */ /* sell copies of the Software, and to permit persons to whom the */ /* Software is furnished to do so, subject to the following */ /* conditions: */ /* */ /* The above copyright notice and this permission notice shall be */ /* included in all copies or substantial portions of the */ /* Software. */ /* */ /* Derivative works shall include a notice that the software is a */ /* modified version of the copyrighted software. */ /* */ /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY */ /* KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE */ /* WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR */ /* PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR */ /* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER */ /* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR */ /* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* */ /* ----------------------------------------------------------------- */ /* Revision History */ /* */ /* April 22, 2007 - Base release 1.0.0 */ /* */ /* ----------------------------------------------------------------- */ #include #include #include #include "urtree_private.h" #define UCHAR unsigned char static void free_node (URT_HANDLE *,URT_NODE *); static URT_BAGS * get_bag (URT_HANDLE *,int,size_t); static URT_NODE * get_big_node (URT_HANDLE *,int,int); static int get_diff_pos (UCHAR *,UCHAR *,int,long); static URT_NODE * get_node (URT_HANDLE *); static void print_tree (URT_HANDLE *,FILE *,int,int,URT_NODE *); static URT_INTNODE * store_intnode(URT_HANDLE *,int); static URT_VALUE * store_data (URT_HANDLE *,URT_VALUE *); static URT_NODE * sub_node (URT_HANDLE *,URT_VALUE *,URT_VALUE *,int); static int mode_mask[16] = {0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,0x2000,0x4000,0x8000}; /**************************************************************/ /* External (public ) routines */ /* */ /* urtree_init..........Creates a UR tree root node */ /* urtree_close........ Deletes a UR tree */ /* urtree_search....... Finds a data element in a UR tree */ /* urtree_insert....... Adds a data element to a UR tree */ /* urtree_delete....... Deletes a data element from a UR tree */ /* urtree_print_forest. Prints all UR trees for a handle */ /* urtree_verify....... Verifies that two elements are equal */ /* */ /**************************************************************/ /* Internal (private) routines */ /* */ /* free_node......... Moves a node and its sibs to freelists */ /* get_bag........... Gets a bag and allocates its space */ /* get_big_node ..... Gets the root node for large elements */ /* get_diff_pos...... For two strings, find a pos that diffs */ /* get_node.......... Allocates space for a URT node */ /* print_tree........ Prints a UR tree for a given data size */ /* store_data ....... Store local copy of data descriptor */ /* store_intnode .... Allocates space for a URT node */ /* sub_node ......... Creates a new subordinate node */ /* */ /**************************************************************/ /**************************************************************/ /* */ /* urtree_init: */ /* This routine creates a UR tree handle initializes it. */ /* */ /**************************************************************/ URT_HANDLE * urtree_init(FILE * errf, int type) { URT_HANDLE *handle; FILE * errfile; int i; errfile = errf? errf : stderr; handle = malloc(sizeof (URT_HANDLE)); if(!handle){ fprintf(errfile,"Malloc failed in urtree_init\n"); exit(EXIT_FAILURE); } for (i=0;iroots[i] = 0; handle->node_freelist = 0; handle->data_bags = 0; handle->pair_bags = 0; handle->data_freelist = 0; handle->stkhdr.max = 0; handle->stkhdr.used = 0; handle->stkhdr.stack = 0; handle->big_data = 0; handle->bag_list = 0; handle->errfile = errfile; handle->node_bags = get_bag(handle, BAG_SIZE, sizeof (URT_NODE)); if ((type >= 0) && (type <=1)) handle->id_type = type; else { fprintf(errfile,"Improper ID type %d passed to urt_init\n",type); exit(EXIT_FAILURE); } return handle; } /**************************************************************/ /* */ /* urtree_close: */ /* This routine closes a UR tree. It deletes all storage */ /* for the tree. It returns 1 (true) if the tree was */ /* successfully closed and 0 (false) if it was not. */ /* */ /* Tree storage is in the form of linked bags. Each bag has */ /* two allocations, the bag structure and the space supplied */ /* by the bag. */ /* */ /**************************************************************/ int urtree_close(URT_HANDLE *handle) { URT_BAGS *bag; /* Ptr into the bags list */ URT_BAGS *next; /* The next ptr into the list */ if (!handle) return 0; for (bag=handle->bag_list;bag;bag = next) { next = bag->link; free(bag->space); free (bag); } if (handle->stkhdr.stack) free(handle->stkhdr.stack); free(handle); return 1; } /**************************************************************/ /* */ /* urtree_search: */ /* This routine searches a UR tree for a data element. If it */ /* doesn't find it, it returns 0. If it does find it, it */ /* fills the identification field from the value stored in */ /* the data descriptor in the tree. */ /* */ /**************************************************************/ URT_RET urtree_search(URT_HANDLE * root, UCHAR *key, long len) { URT_NODE * node; /* The current node being searched */ long index; /* The nibble data position index */ int bite; /* Byte in key pointed to by index */ int nibble; /* Nibble in key pointed to by index */ URT_RET result; /* Structure holding search results */ if (root->id_type) result.value.is_ptr = 0; else result.value.is_int = 0; if (len < 0) { fprintf(root->errfile, "Usage error: data length is %ld\n",len); exit(EXIT_FAILURE); } result.success = 0; node = (len < URT_MAX_SMALL_STRING_SIZE) ? root->roots[len] : get_big_node(root,len,0); if (!key || !node) return result; for (;;) { index = node->index >> 1; bite = key[index]; nibble = ((node->index & 1)? bite>>4 : bite) & 0xf; if (node->mode & mode_mask[nibble]) { node = node->un[nibble].kids; } else { if (urtree_verify(len, key, node->un[nibble].data->data)) { result.value = node->un[nibble].data->id; result.success = 1; return result; } return result; } } return result; } /**************************************************************/ /* */ /* urtree_insert: */ /* This routine adds a data element. First it searches to */ /* see if the element is already present. If it is, the */ /* attempt fails and 0 is returned. If it is not present, */ /* the element is added, the value field is updated, and the */ /* updated data descriptor is returned. */ /* */ /**************************************************************/ URT_RET urtree_insert(URT_HANDLE *handle, UCHAR *key, long len, VAL_ALT id, int override) { long diff_pos = 0; /* Position of a difference bite */ int bite; /* Byte in key pointed to by index */ int nibble; /* Nibble in key pointed to by index*/ long index; /* The nibble data position index */ URT_NODE *node = 0; /* Current node in tree search */ URT_RET result; /* Structure holding search results */ URT_VALUE *old_data; /* Existing data for split node */ URT_VALUE new_data; /* Data being added for split node */ int valeq; /* Boolean - true if values equal */ result.success = MISC_ERROR; result.value = id; if (!key || (len <0)) return result; new_data.id = id; new_data.len = len; new_data.data = key; if (len < URT_MAX_SMALL_STRING_SIZE) { node = handle->roots[len]; if (!node) { node = get_node(handle); handle->roots[len] = node; } } else node = get_big_node(handle,len,1); for (;;) { index = node->index >> 1; bite = key[index]; nibble = ((node->index & 1) ? bite >>4 : bite) & 0xf; if (! node->un[nibble].data) { node->un[nibble].data = store_data(handle,&new_data); node->mode = node->mode & (~mode_mask[nibble]); result.success = ADD_OK; result.value = id; return result; } else if ((node->mode) & mode_mask[nibble]) { node = node->un[nibble].kids; } else { diff_pos = get_diff_pos(node->un[nibble].data->data, key, len, index); if (diff_pos < 0) { if (override) { valeq = handle->id_type ? (node->un[nibble].data->id.is_ptr == id.is_ptr) : (node->un[nibble].data->id.is_int == id.is_int) ; result.success = valeq ? ADD_OK : BAD_INSERT; } else { result.success = ADD_OK; node->un[nibble].data->id = id; } result.value = node->un[nibble].data ->id; return result; } old_data = node->un[nibble].data; node->un[nibble].kids = sub_node(handle,old_data,&new_data,diff_pos); node->mode = node->mode | mode_mask[nibble]; result.success = ADD_OK; result.value = id; return result; } } return result; } /**************************************************************/ /* */ /* urtree_delete: */ /* This routine deletes a data element from the tree. It */ /* returns 1 (true) if the deletion was successful and 0 */ /* (false) if it was not. */ /* */ /* */ /**************************************************************/ int urtree_delete(URT_HANDLE * handle, unsigned char * key, long len, VAL_ALT value) { int bite; /* Byte in key pointed to by index */ int nibble; /* Nibble in key pointed to by index*/ int pibble = 0;/* Nibble pos from parent */ long index; /* The nibble data position index */ URT_NODE *node = 0; /* Current node in tree search */ URT_NODE *parent=0; /* Current node in tree search */ int valeq; /* Boolean - true if values equal */ int i; /* Utility loop index */ node = (len < URT_MAX_SMALL_STRING_SIZE) ? handle->roots[len] : get_big_node(handle,len,0); if (!key || !node) return 0; for (;;) { index = node->index >> 1; bite = key[index]; nibble = ((node->index & 1)? bite>>4 : bite) & 0xf; if (node->mode & mode_mask[nibble]) { parent = node; pibble = nibble; node = node->un[nibble].kids; continue; } valeq = handle->id_type ? (node->un[nibble].data->id.is_ptr == value.is_ptr) : (node->un[nibble].data->id.is_int == value.is_int) ; if (valeq && (urtree_verify(len, key, node->un[nibble].data->data ))) { int pos = 0; int count = 0; node->un[nibble].data = 0; for (i= 16;--i>=0;) { if (node->un[i].data) { pos = i; count++; } } if ((count == 1) && parent) { if (node->mode & mode_mask[pos]) { parent->un[pibble].kids = node->un[pos].kids; } else { parent->un[pibble].data = node->un[pos].data; parent->mode &= ~mode_mask[pibble]; } free_node(handle,node); } return 1; } break; } return 0; } /**************************************************************/ /* */ /* urtree_verify; */ /* This routine compares two data elements and reports */ /* 1 (true) if the are equal and 0 (false) if they are not. */ /* The code is optimized for the case where are equal. */ /* */ /**************************************************************/ int urtree_verify(long len, unsigned char *s0, unsigned char *s1) { int i; int val = 0; unsigned long *b0; unsigned long *b1; if (len >=(2*sizeof(unsigned long)-1)) { b0 = (unsigned long *)s0; b1 = (unsigned long *)s1; for (i = len/(2*sizeof(unsigned long));i>0;i--) { val |= (*b0++ ^ *b1++); val |= (*b0++ ^ *b1++); } if (val) return 0; s0 = (unsigned char *)b0; s1 = (unsigned char *)b1; len = len &(2*sizeof(unsigned long)-1); } for (i = len; --i>=0;) { val |= (s0[i] ^ s1[i]); } return !val; } /**************************************************************/ /* */ /* urtree_print_forest: */ /* This routine prints the full UR tree. There is a different*/ /* UR tree for each data element length. */ /* */ /**************************************************************/ void urtree_print_forest(FILE * ofptr, URT_HANDLE * handle) { int i; URT_NODE ** roots; roots = handle->roots; for (i=0;iun[0].link = handle->node_freelist; handle->node_freelist = node; } /**************************************************************/ /* */ /* get_bag: */ /* This routine allocates space for a new bag. The arguments */ /* are the number of elements in the bag and the size of an */ /* element. */ /* */ /**************************************************************/ static URT_BAGS * get_bag(URT_HANDLE * handle, int num, size_t size) { URT_BAGS *bag; bag = malloc(sizeof (URT_BAGS)); if (!bag) { fprintf(stderr,"Can't allocate space for a bag\n"); exit(EXIT_FAILURE); } bag->space = malloc(num *size); if (!bag->space) { fprintf(stderr,"Can't allocate space within a bag\n"); exit(EXIT_FAILURE); } bag->pos = num; bag->link = handle->bag_list; handle->bag_list = bag; return bag; } /**************************************************************/ /* */ /* get_big_node: */ /* This routine accepts an integer length as input. It returns*/ /* the root node for that length. */ /* */ /**************************************************************/ URT_NODE * get_big_node(URT_HANDLE * handle, int ival, int create_flag) { int bite; int nibble; int index; int len = 0; UCHAR * data; URT_NODE * node = 0; URT_NODE * new_node = 0; UCHAR * ref; URT_INTNODE *pair; int i; int old_bite; int new_bite; int old_nibble; int new_nibble; int depth = 0; data = (UCHAR *)&ival; if (!handle->big_data) { if (!create_flag) {return 0;} node = get_node(handle); handle->big_data = node; nibble = data[0] & 0xf; pair = store_intnode(handle,ival); pair->node = get_node(handle); node->un[nibble].pair = pair; node->index = 0; return node->un[nibble].pair->node; } node = handle->big_data; len = sizeof (ival); for (;;) { depth++; index = node->index >> 1; bite = data[index]; nibble = ((node->index & 1) ? bite >>4 : bite) & 0xf; pair = node->un[nibble].pair; if (! node->un[nibble].data) { if (!create_flag) {return 0;} node->un[nibble].pair = store_intnode(handle,ival); node->mode = node->mode & (~mode_mask[nibble]); return node->un[nibble].pair->node; } else if ((node->mode) & mode_mask[nibble]) { node = node->un[nibble].kids; } else { pair = node->un[nibble].pair; if (pair->ival == ival) return pair->node; if (!create_flag) {return 0;} ref = (UCHAR *)&(pair->ival); for (i=0;(imode = node->mode | mode_mask[nibble]; new_node = get_node(handle); node->un[nibble].kids = new_node; new_node->index = 2*i; old_bite = ref[i]; new_bite = data[i]; old_nibble = old_bite & 0xf; new_nibble = new_bite & 0xf; if (old_nibble == new_nibble) { old_nibble = (old_bite >> 4) &0xf; new_nibble = (new_bite >> 4) &0xf; new_node->index++; } new_node->un[old_nibble].pair = pair; new_node->un[new_nibble].pair = store_intnode(handle,ival); return new_node->un[nibble].pair->node; } } } /**************************************************************/ /* */ /* get_diff_pos: */ /* This routine compares two data elements and identifies */ /* a byte position at which they differ. Zero indexing is */ /* used. A value of -1 is returned if they are identical. */ /* */ /* This routine is similar to cs_verify; however the objective*/ /* is different. The current implemention finds the first */ /* byte at which the strings differ; this may not be the best */ /* strategy. */ /* */ /**************************************************************/ static int get_diff_pos(UCHAR *s0, UCHAR *s1, int len, long curpos) { int i; unsigned int * b0; unsigned int * b1; int j; int j_start; if (len >= 8) { b0 = (unsigned int *)s0; b1 = (unsigned int *)s1; for (i = 0;i<(len/4);i++) { if (b0[i]^b1[i]) break; } j_start = 4*i; } else j_start = 0; for (j = j_start;jnode_bags->pos > 1) { --(handle->node_bags->pos); node = (URT_NODE *)handle->node_bags->space; node += handle->node_bags->pos; } else if (handle->node_freelist) { node = handle->node_freelist; handle->node_freelist = handle->node_freelist->un[0].link; } else { bag = get_bag(handle, BAG_SIZE, sizeof (URT_NODE)); handle->node_bags = bag; --(handle->node_bags->pos); node = (URT_NODE *)handle->node_bags->space; node += handle->node_bags->pos; } node->index = 0; node->mode = 0; u = node->un; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; (*u++).data = 0; return node; } /**************************************************************/ /* */ /* urtree_print_tree: */ /* This routine prints the UR tree corresponding to a given */ /* data element length. It actually only handles one level */ /* of the tree per invocation. The tree is printed out by */ /* recursive calls. */ /* */ /**************************************************************/ void print_tree(URT_HANDLE *handle, FILE * ofptr, int level, int sibno, URT_NODE * node) { int i,j; if (!node) return; fprintf(ofptr,"(%d.%d.%d)\n",level,(node->index/2),(node->index &1)); for (i = 0; i < 16; i++) { if (!node->un[i].data) continue; for (j=0;j<(level+1);j++) fprintf(ofptr,"%3c",' '); fprintf(ofptr,"%x ",i); if (node->mode & mode_mask[i]) { print_tree(handle, ofptr, level+1,i,node->un[i].kids); continue; } if (handle->id_type) fprintf(ofptr,"(%p) ",node->un[i].data->id.is_ptr); else fprintf(ofptr,"(%d) ",node->un[i].data->id.is_int); for (j=0;jun[i].data->len;j++) { fprintf(ofptr,"%c",node->un[i].data->data[j]); } fprintf(ofptr,"\n"); } } /**************************************************************/ /* */ /* store_data: */ /* This routine creates a local copy of a data descriptor and */ /* stores it in space that can be deallocated by the close */ /* routine. */ /* */ /**************************************************************/ static URT_VALUE * store_data (URT_HANDLE * root, URT_VALUE * datum) { URT_VALUE *data; URT_BAGS *bag; if (!root->data_bags || (root->data_bags->pos <= 1)) { bag = get_bag(root, BAG_SIZE, sizeof (URT_VALUE)); root->data_bags = bag; } --(root->data_bags->pos); data = (URT_VALUE *)root->data_bags->space; data += root->data_bags->pos; data->len = datum->len; data->data = datum->data; data->id = datum->id; return data; } /**************************************************************/ /* */ /* store_intnode: */ /* This routine creates a pair structure. It also creates a */ /* node to serve as the root for a big length. The other */ /* input is the big length. */ /* */ /**************************************************************/ static URT_INTNODE * store_intnode (URT_HANDLE * root,int ival) { URT_INTNODE *pair; URT_BAGS *bag; if (!root->pair_bags || (root->pair_bags->pos <= 1)) { bag = get_bag(root, BAG_SIZE, sizeof (URT_INTNODE)); root->pair_bags = bag; } --(root->pair_bags->pos); pair = (URT_INTNODE *)root->pair_bags->space; pair += root->pair_bags->pos; pair->ival = ival; pair->node = get_node(root); return pair; } /**************************************************************/ /* */ /* sub_node: */ /* This routine takes two data sets and a diff pos and makes */ /* a new node containing the data sets at that position. */ /* */ /**************************************************************/ static URT_NODE * sub_node (URT_HANDLE * root, URT_VALUE * old, URT_VALUE * new, int pos) { URT_NODE * node; int old_bite; int new_bite; int old_nibble; int new_nibble; node = get_node(root); node->index = 2*pos; old_bite = old->data[pos]; new_bite = new->data[pos]; old_nibble = old_bite & 0xf; new_nibble = new_bite & 0xf; if (old_nibble == new_nibble) { old_nibble = (old_bite >> 4) &0xf; new_nibble = (new_bite >> 4) &0xf; node->index++; } node->un[old_nibble].data = old; node->un[new_nibble].data = store_data(root,new); return node; }