// cache_friend.c -- heapsort addressing with fractal clustering // Steve Witham sw at tiac not_this dot net 2009.10. // Compile with -DVANILLA or -DLOOP or -DINLINE or -DPAGE1023, etc. // Group heap entries in clusters of 3. One parent with two children, each // having two children, so a cluster of 3 entries has 2 * 2 = 4 grandchildren. // A cluster of 5 (one on top, four below) // clusters of 3 has 15 entries and 4 * 4 = 16 descendants. // A cluster of 17 clusters of 15 has 255 entries and 16 * 16 = 256 descendants. // A cluster of 257 clusters of 255 has 65535 entries and 65536 descendants. // and so on. #include #include #include #include #include #include #define raise(str){\ fprintf( stderr, (str) };\ exit(1);\ } #ifdef VANILLA #define ARRANGEMENT "vanilla" #define REALSZ(i) (i) #define PREV(i) ((i)-1) #define NEXT(i) ((i)+1) #define children(parent,c1,c2)\ { (c1)=(parent)*2+1; (c2)=(parent)*2+2; } #define parent(child,parent) (parent)=(((child)-1)/2) #endif #ifdef PAGE1023 /* Each page has entries 0..1022. Bottom row is 511..1022. Each bottom entry has top entries of two pages as children. */ #define ARRANGEMENT "pages of 1023" #define REALSZ(sz) ( ((sz)/1023) * 1024 + (sz)%1023 ) #define PREV(i) ( ((i)&0x3FF) == 0? (i)-2 : (i)-1 ) #define NEXT(i) ( ((i)&0x3FF) == 0x3FE? (i)+2 : (i)+1 ) // Given an address on a page, give parent of first entry on that page: #define page_parent(c) ( (((c)/1024-1)/2/512*1024) + (((c)/1024-1)/2%512) + 511 ) #define children(parent,c1,c2) {\ register int chp, chi; \ chp = (parent); \ chi = chp % 1024; \ if ( chi < 511 ) { \ chp = chp + chi + 1; \ (c1) = chp; \ (c2) = chp + 1; \ } else { \ if( chp >= (long) page_parent(1LL<<31) ) { \ (c1) = -1; (c2) = -1; \ } else { \ chp = ( chp + chi - 1021 ) * 1024; \ (c1) = chp; \ (c2) = chp + 1024; \ } \ } \ } #endif /* This one takes explaining. The first page has 1023 entries: 0..1022. Each entry in 511..1022, the bottom row of any page, has two children which are entries 1 and 2 on another page. So pages 1 and up have 1022 entries: 1..1022. */ #ifdef PAGE1022 #define ARRANGEMENT "pages of 1022" #define REALSZ(sz) ( (((sz)-1)/1022) * 1024 + ((sz)-1)%1022 + 1 ) #define PREV(i) ( ((i)&0x3FF) == 1 && (i)>1? (i)-3 : (i)-1 ) #define NEXT(i) ( ((i)&0x3FF) == 0x3FE? (i)+3 : (i)+1 ) #define page_parent(c) ( (((c)/1024-1)/512*1024) + (((c)/1024-1)%512) + 511 ) #define children(parent,c1,c2) {\ register int chp, chi; \ chp = (parent); \ chi = chp % 1024; \ if ( chi < 511 ) { \ chp = chp + chi; \ (c1) = chp + 1; \ (c2) = chp + 2; \ } else { \ if( chp >= (long) page_parent(1LL<<31) ) { \ (c1) = -1; (c2) = -1; \ } else { \ chp = ( chp + chi - 1020 ) * 512; \ (c1) = chp + 1; \ (c2) = chp + 2; \ } \ } \ } #endif /* 1022a is like 1022 but pairs are on even boundaries, so Page 0 has 1..1023, and other pages have 2..1023. It's basically 1022 shifted over by one entry. */ #ifdef PAGE1022a #define ARRANGEMENT "pages of 1022 alligned" #define REALSZ(sz) ( (((sz)-1)/1022) * 1024 + ((sz)-1)%1022 + 2 ) #define PREV(i) ( ((i)&0x3FF) == 2 && (i)>2? (i)-3 : (i)-1 ) #define ROOT (1) #define NEXT(i) ( ((i)&0x3FF) == 0x3FF? (i)+3 : (i)+1 ) #define page_parent(c) ( (((c)/1024-1)/512*1024) + (((c)/1024-1)%512) + 512 ) #define children(parent,c1,c2) {\ register int chp, chi; \ chp = (parent); \ chi = chp % 1024; \ if ( chi < 512 ) { \ chp = chp + chi; \ (c1) = chp; \ (c2) = chp + 1; \ } else { \ if( chp >= (long) page_parent(1LL<<31) ) { \ (c1) = -1; (c2) = -1; \ } else { \ chp = ( chp + chi - 1022 ) * 512; \ (c1) = chp + 2; \ (c2) = chp + 3; \ } \ } \ } #endif /* 510a is like 1022a except pages of 510. Each entry in 256..511, the bottom row of any page, has two children which are entries 2 and 3 on another page. So pages 1 and up have 510 entries: 2..511. */ #ifdef PAGE510a #define ARRANGEMENT "pages of 510 aligned" #define REALSZ(sz) ( (((sz)-1)/510) * 512 + ((sz)-1)%510 + 2 ) #define PREV(i) ( ((i)&0x1FF) == 2 && (i)>2? (i)-3 : (i)-1 ) #define ROOT (1) #define NEXT(i) ( ((i)&0x1FF) == 0x1FF? (i)+3 : (i)+1 ) #define page_parent(c) ( (((c)/512-1)/256*512) + (((c)/512-1)%256) + 256 ) #define children(parent,c1,c2) {\ register int chp, chi; \ chp = (parent); \ chi = chp % 512; \ if ( chi < 256 ) { \ chp = chp + chi; \ (c1) = chp; \ (c2) = chp + 1; \ } else { \ if( chp >= (long) page_parent(1LL<<31) ) { \ (c1) = -1; (c2) = -1; \ } else { \ chp = ( chp + chi - 510 ) * 256; \ (c1) = chp + 2; \ (c2) = chp + 3; \ } \ } \ } #endif /* Clumpy 510 alligned is pages of 510, but clumpy arrangement within pages. Each entry in 256..511, the bottom row of any page, has two children which are entries 2 and 3 on another page. So pages 1 and up have 510 entries: 2..511. */ #ifdef CLUMPY510a #define ARRANGEMENT "clumpy 510 aligned" #define REALSZ(sz) ( (((sz)-1)/510) * 512 + ((sz)-1)%510 + 2 ) #define PREV(i) ( ((i)&0x1FF) == 2 && (i)>2? (i)-3 : (i)-1 ) #define ROOT (1) #define NEXT(i) ( ((i)&0x1FF) == 0x1FF? (i)+3 : (i)+1 ) // parent_page is beginning of page with parent of c: #define parent_page(c) (((c)/512 - 1)/256 * 512 ) #define children(parent,c1,c2) { \ register int chp, chi, chj, chk; \ if( ( chp = (parent) ) == 1 ) { \ (c1) = 2; \ (c2) = 3; \ } else { \ chi = chp % 512 - 2; \ if( ( chj = chi % 6 ) < 2 ) { \ chp += chj; \ } else { \ if( ( chk = chi % 30 ) < 8 ) { \ chp += chj * 5 - 8; \ } else { \ if( chi < 30 ) { \ chp += 19 * chi + 10 * chj - 152; \ } else { \ if( chp >= (long) parent_page(1LL<<31) ) { \ chp = -4; \ } else { \ chp = ( ( 15 * chp + chi + 10*chj + 4*chk ) / 30 \ - 22 ) * 512; \ } \ } \ } \ } \ (c1) = chp + 2; \ (c2) = chp + 3; \ } \ } #endif // chp += 29 * chi - (chi/6) * 60 - 152; #ifdef LOOP #define ARRANGEMENT "clumpy loop" #define REALSZ(i) (i) #define PREV(i) ((i)-1) #define NEXT(i) ((i)+1) #define children(parent,c1,c2) {\ register int a,d,k,p;\ a = (parent);\ d = a % 3;\ if (d == 0) {\ (c1)=a+1; (c2)=a+2;\ } else {\ k = 1 + ( d - 1 ) * 2;\ a /= 3;\ p = 4;\ d = a % 5;\ while ( d != 0 ) {\ k += ( d - 1 ) * p;\ a /= ( p + 1 );\ p *= p;\ d = a % ( p + 1 );\ }\ a = (a+k)*(p-1);\ (c1) = a; (c2) = a + p - 1; \ } \ } #define parent(child,parent) {\ register int a, p, q, n;\ a=(child);\ if (a % 3 > 0) { \ (parent) = a / 3 * 3;\ } else {\ if (a == 0 ) {\ raise( "Address 0 has no parent." );\ }\ p = 4;\ q = 16;\ n = a % 15;\ while ( n == 0 ) { /* Find smallest enclosing clump child is not the top of. */ \ p = q;\ q *= q; \ n = a % ( q - 1 );\ a -= n;\ }\ /* Here a = top of the larger clump that includes both child and parent \ (q-1) = size of that larger clump\ n = child's address relative to a -- top of a foot clump\ (p-1) = size of head and foot clumps within the larger clump\ */ \ n = ( int(n) / (p-1) - 1 ) / 2; // no. of parent along bottom row of head clump\ // 0 <= n < (p/2) ... ( log2(p) - 1 ) bits \ q = 2;\ while ( q < p ) {\ a += ( n % q + 1 ) * ( q - 1 );\ n /= q;\ q *= q;\ }\ (parent) = a;\ }\ } #endif #ifdef INLINE #define ARRANGEMENT "clumpy" #define REALSZ(i) (i) #define PREV(i) ((i)-1) #define NEXT(i) ((i)+1) #define children(parent,c1,c2) {\ register int a, z, yz, xyz, wxyz;\ a = (parent);\ z = a % 3;\ if (z == 0) {\ (c1)=a+1; (c2)=a+2;\ } else {\ yz = a % 15;\ if ( yz < 3 ) {\ a += z * 5;\ (c1) = a - 3; (c2) = a;\ } else {\ xyz = a % 255;\ if ( xyz < 15 ) {\ a += z * 10 + yz * 19 - 75;\ (c1) = a; (c2) = a + 15;\ } else {\ wxyz = a % 65535;\ if ( wxyz < 255 ) {\ a += z * 170 + yz * 68 + xyz * 271 - 5355;\ (c1) = a; (c2) = a + 255;\ } else { /* if a % (2**32-1) < 65535 */ \ if ( a < 65535 ) {\ a += -18153195 + z*43690 + yz*17476 + xyz*4112 + wxyz*65791; \ (c1) = a; (c2) = a + 65535;\ } else {\ (c1) = -1; (c2) = -1; /* off the bottom */ \ } } } } }\ } #define parent(child,parent) { \ register int a, d; \ a = (child); \ d = a % 3; \ if (d > 0) { \ (parent) = a - d; \ } else { \ d = a % 15 - 3; \ if ( d >= 0 ) { \ (parent) = a - d + d / 6 - 2; \ } else { \ d = a % 255 - 15; \ if ( d >= 0 ) { \ (parent) = a - d + d / 30 + d / 60 - 11; \ } else { \ d = a % 65535 - 255; \ if ( d >= 0 ) { \ (parent) = a - d + d / 510 + d / 1020 + d / 4080 * 3 - 236; \ } else { \ if (a == 0) { \ raise( "Address 0 has no parent." ); \ } \ if ( a & 0x80000000 ) { \ raise Exception( "Not handling addresses > 2**31 = 1." ); \ } \ d = a - 65535; \ (parent) = d/131070 + d/262140 + d/1048560 * 3 + d/16776960 * 15 + 274; \ } } } }\ } #endif /************************* END OF ARRANGEMENTS ***********************/ #ifndef ROOT #define ROOT (0) #endif #define adjust_heap(adja,adj_j0,adjsz) { \ int adjleft, adjright, adj_j, adj_k, adjtmp; \ adj_j = (adj_j0); \ children(adj_j,adjleft,adjright); \ while ( adjleft >= ROOT && adjleft < (adjsz) ) { \ if ( adjright < adjsz && (adja)[ adjright ] > (adja)[ adjleft ] ) { \ adj_k = adjright; \ } else { \ adj_k = adjleft; \ } \ if( (adja)[ adj_j ] >= (adja)[ adj_k ] ) { \ break; \ } \ tmp = (adja)[adj_j]; (adja)[adj_j] = (adja)[adj_k]; (adja)[adj_k] = tmp; \ adj_j = adj_k; \ children(adj_j,adjleft,adjright); \ } \ } // printf( " " ); \ // printf( "(%d,%d:%d) ", adj_k/512, adj_k%512, (adja)[ adj_k ] ); \ // printf("\n" ); \ void my_heapsort( int *a, int rsz ) { int i, tmp; int c1, c2; // Form the heap: for( i = PREV(rsz); i >= ROOT; i=PREV(i) ) { adjust_heap( a, i, rsz ); } // Doh, check the heap. // for( i = ROOT; i <= PREV(rsz); i=NEXT(i) ) { // children(i,c1,c2); // if( c1 > rsz ) break; // assert( a[i] >= a[c1] ); // if( c2 > rsz ) break; // assert( a[i] >= a[c2] ); // } // Extract highest-first while maintaining heap: for( i = PREV(rsz); i > ROOT; i=PREV(i) ) { // printf( "i = %d ( %d*512+%d ), \n", // i, i/512, i%512 ); // printf( " ROOT = %10d <-- take this\n", a[ROOT] ); // printf( " a[i] = %10d\n", a[i] ); tmp = a[ ROOT ]; a[ ROOT ] = a[ i ]; a[ i ] = tmp; adjust_heap( a, ROOT, i ); if( a[i] < a[ROOT] ) { printf( "rsz = %d, i = %d ( %d*512+%d ), a[i] = %d, a[ROOT] = %d\n", rsz, i, i/512, i%512, a[i], a[ROOT] ); assert( a[i] >= a[ROOT] ); } } } unsigned seed = 0; unsigned congrnd32( void ) { seed = ( 0x63c63cd9 * seed + 0x9c39c33d ) & 0xFFFFFFFF; return seed; } unsigned congrnd31( void ) { seed = ( 0x63c63cd9 * seed + 0x9c39c33d ) & 0xFFFFFFFF; return (int) seed & 0x7fffFFFF; } void randomize_array( int *a, int sz, int rsz ) { int i, k; seed = 0; k = 0x7fffffff / ( sz * 2 ); for( i = ROOT; i < rsz; i = NEXT(i) ) { a[i] = congrnd31() / k; } } void show_array( int *a, int rsz ) { char nstr[ 20 ]; int tab, i; strcpy( nstr, " " ); printf( nstr ); tab = strlen( nstr ); for( i = ROOT; i < rsz; i=NEXT(i) ) { sprintf( nstr, "%d ", a[i] ); tab += strlen( nstr ); if( tab > 75 ) { break; } printf( nstr ); } printf( "...\n" ); } double doubletime( void ) { struct timeval tv; struct timezone tz; gettimeofday( &tv, &tz ); return( (double) tv.tv_sec + tv.tv_usec * .000001 ); } void test_my_heapsort( void ) { int sz, rsz, *array; double t0, t1, t2, t3, t4, t5, t1_2, t2_3, pwr; int p, i; int c1, c2, pp; // for( p = 1; p <= 552; p=NEXT(p) ) { // children(p,c1,c2); // printf( "%d => %d", p, c1 ); // #ifdef CLUMPY510a // pp = parent_page(c1); // printf( " parent %d", pp ); // #endif // printf( "\n" ); // } // for( pwr = 19; pwr < 29; pwr += 2 ) { // #ifdef VANILLA // for( pwr = 7; pwr <= 27; pwr += 2 ) { // #else // for( pwr = 7; pwr <= 27; pwr += 2 ) { // #endif for( pwr = 7.0; pwr < 23.25; pwr += .5 ) { sz = round( pow( 2.0, pwr ) ); t0 = doubletime(); rsz = REALSZ(sz); array = valloc( rsz * sizeof(int) ); assert( array != NULL ); t1_2 = t2_3 = 0.0; for( i = 0; t2_3 < 10.0; i++ ) { t1 = doubletime(); randomize_array( array, sz, rsz ); t2 = doubletime(); t1_2 += t2 - t1; if( i == 0 ) { printf( "size: %d alloc: %f ", sz, t1-t0 ); fflush( stdout ); } t2 = doubletime(); my_heapsort( array, rsz ); t3 = doubletime(); t2_3 += t3 - t2; } t1_2 /= i; t2_3 /= i; printf( "randomize: %f\n", t1_2 ); show_array( array, rsz ); printf( " %s: %f\n", ARRANGEMENT, t2_3 ); fflush( stdout ); } } int main() { test_my_heapsort(); }