/* sort_n -- Squozen Sort: sort numbers in a limited amount of space. sort_n [n m] output Reads n 32-bit unsigned numbers expressed in ascii decimal, one number per line, from stdin. There can be duplicates. The default is 1,000,000 longs. Sorts them in m bytes of memory not counting "overhead" and local variables. It uses no recursive procedures and no temporary files. The default is 2,000,000 bytes. Outputs ascii numbers to stdout. or else if m is too tight for n, the program says it can't do it. Compare to the Unix command: sort -n output The sorting and compression methods, and some tests, are described at http://www.tiac.net/~sw/2009/10/Squozen_sort Steve Witham ess double-you at remove_this tiac dot net ("FutureNerd" in some places.) 2009.01.17 */ #include #include #include #include // for memmove() #include #define CEIL_DIV(num,denom) ( ( (num) + (denom) - 1 ) / (denom) ) /* bytes_for_buffer -- Determine the maximum size of buffer needed for n numbers (gaps) totalling less than input_range, using nb binary bits per number (e.g. 1000000, 2^32, 12). Rationale for this calculation is in http://www.tiac.net/~sw/2009/10/Squozen_sort/squozen_code.html . */ size_t bytes_for_buffer( size_t n, long long input_range, int nb ) { long long n_unary_ones = CEIL_DIV( input_range, (long long) 1 << nb ); long long sz = n * ( 1 + nb ) + n_unary_ones; return CEIL_DIV( sz, 8 ); } // best_code_nb -- Determine the best number of binary bits per entry // to encode n numbers (gaps) totalling less than input_range // (e.g. 1000000, 2^32). This just slogs through all the // possible nb's and picks the best one. int best_code_nb( size_t n, long long input_range ) { int minmax_nb = 0; size_t minmax_sz = bytes_for_buffer( n, input_range, minmax_nb ); int nb; long long sz; for( nb = 1; nb < 64 && ((long long) 1<buf = buf; c->bits = 0; c->bits_in = 0; c->nb = nb; c->n = 0; c->prev = 0; } // send_lsbs -- Send the nbits<=32 least-significant bits of a 32-bit word. void send_lsbs( Compressor *c, unsigned long word, int nbits, char *buf_end ) { word <<= (32 - nbits); // Shift the msbs off the end. c->bits = ( c->bits << 32 | word ) >> (32 - nbits); c->bits_in += nbits; if( c->buf + CEIL_DIV( c->bits_in, 8 ) > buf_end ) { die( "send_lsbs: too many bits for the buffer." ); } if( c->bits_in >= 32 ) { word = c->bits >> (c->bits_in - 32); *(unsigned long *) c->buf = word; c->buf += sizeof( word ); c->bits_in -= 32; } } // compressor_flush -- Flush the remaining bits (< 32 of them). // send_lsbs already checked whether there's room for them. // Leave buf pointing just after the last byte written. void compressor_flush( Compressor *c ) { while( c->bits_in >= 8 ) { *c->buf++ = c->bits >> ( c->bits_in - 8 ); c->bits_in -= 8; } if( c->bits_in > 0 ) { *c->buf++ = c->bits << ( 8 - c->bits_in ); } } // compress -- Take difference from previous and encode. // [1...] 0 xxxx xxxx xxxx (c->nb x's) // If base = 1 << c->nb, then send // (gap/base) one bits, a zero, and (gap % base) as nb binary bits. void compress( Compressor *c, unsigned long word, char *buf_end ) { unsigned long n_ones, gap; if( word < c->prev ) { die( "compress: word < prev" ); } gap = word - c->prev; c->prev = word; for( n_ones = gap >> c->nb; n_ones >= 32; n_ones -= 32 ) { send_lsbs( c, 0xffffFFFF, 32, buf_end ); } send_lsbs( c, 0xffffFFFe, n_ones + 1, buf_end ); send_lsbs( c, gap, c->nb, buf_end ); c->n++; } // ===== Decompressor stuff. There's also only going to be one of these: typedef struct Decompressor { char *buf; char *data_end; // This is the address after the last data byte. char *buf_end; // This is the physical end. Data doesn't have to reach it. long long n; // number of entries in the buffer, can go < 0 size_t n_max; unsigned long long bits; unsigned long prev; int bits_in; int nb; } Decompressor; Decompressor decompressor; void decompressor_init( Decompressor *d, char *buf, char *data_end, char *buf_end, int n, int nb ) { if( (int) buf & (sizeof(long)-1) != 0 ) { die( "decompressor_init: buf should be word-aligned." ); } d->buf = buf; if( data_end < buf ) { die( "decompressor_init: data_end should be >= buf." ); } d->data_end = data_end; if( buf_end < data_end ) { die( "decompressor_init: buf_end should be >= data_end." ); } d->buf_end = buf_end; d->n = n; d->nb = nb; d->prev = 0; d->bits = 0; d->bits_in = 0; } #define LONGBITS (sizeof(long)*8) #define CHARBITS (sizeof(char)*8) #define decompressor_get_bits(d) { \ if( (d)->bits_in < LONGBITS ) { \ if( (d)->buf + sizeof(long) <= (d)->data_end) { \ (d)->bits = ((d)->bits << LONGBITS) | *(unsigned long *) (d)->buf; \ (d)->buf += sizeof(long); \ (d)->bits_in += LONGBITS; \ } else { \ while( (d)->buf < (d)->data_end ) { \ (d)->bits = ((d)->bits << CHARBITS) | *(unsigned char *) (d)->buf++; \ (d)->bits_in += CHARBITS; \ } } } } void decompressor_close( Decompressor *d ) { int okay = TRUE; decompressor_get_bits(d); if( d->bits_in < 0 || d->bits_in >= 8 || d->n != 0 ) { fprintf( stderr, "decompressor_close: bits_in = %d, n = %lld\n", d->bits_in, d->n ); exit( 1 ); } } // decompress sets *p_word and returns TRUE if there's a word, // returns FALSE at end of data, // or dies if data ends uncleanly. int decompress( Decompressor *d, unsigned long *p_word ) { int n_ones; unsigned long bits; int bits_in; if( d->n <= 0 ) { decompressor_close( d ); return( FALSE ); } decompressor_get_bits(d); if( d->bits_in <= 0 ) decompressor_close(d); // for the die for( n_ones = 0; d->bits_in >= 32 && ( ( d->bits >> (d->bits_in - 32)) & 0xFFFFffffULL ) == 0xFFFFffffULL; n_ones += 32 ) { d->bits_in -= 32; decompressor_get_bits(d); } // Now there are fewer than 32 one bits and (hopefully) a zero. bits_in = d->bits_in; if( bits_in > 32 ) { bits = d->bits >> ( bits_in - 32 ); } else { bits = d->bits << ( 32 - bits_in ); // including << 0 } if( bits_in >= 16 && bits >= 0xffff0000 ) { bits_in -= 16; bits <<= 16; } if( bits_in >= 8 && bits >= 0xff000000 ) { bits_in -= 8; bits <<= 8; } if( bits_in >= 4 && bits >= 0xf0000000 ) { bits_in -= 4; bits <<= 4; } if( bits_in >= 2 && bits >= 0xc0000000 ) { bits_in -= 2; bits <<= 2; } if( bits_in >= 1 && bits >= 0x80000000 ) { bits_in -= 1; } n_ones += d->bits_in - bits_in; d->bits_in = bits_in; d->bits_in--; // for the zero (if any, else d->bits_in == -1) decompressor_get_bits(d); if( d->bits_in < d->nb ) { die( "decompress: not enough \"binary bits\"" ); } bits = ( d->bits >> (d->bits_in - d->nb) ) & ( 0xFFFFffff >> ( 32 - d->nb ) ); d->prev += ( n_ones << d->nb ) + bits; d->bits_in -= d->nb; *p_word = d->prev; d->n--; return( TRUE ); } // ===== MERGE // When merge is called, old is set up to read the old data in the // big buffer. On return, old is re-set up to read the merged data. void merge( Decompressor *old, unsigned long *new_buf, unsigned long *new_buf_end, int recompress ) { size_t movement; size_t len; char *big_buf = old->buf; // Move data up against the end of the buffer, // and fix old to reflect that: movement = ( ( old->buf_end - old->data_end ) / sizeof(long) ) * sizeof(long); // Align the moved data on a word boundary. len = old->data_end - old->buf; memmove( old->buf + movement, old->buf, len ); old->buf += movement; old->data_end += movement; // Set up to compress starting at the beginning of the buffer: Compressor merged_itself; Compressor *merged = &merged_itself; compressor_init( merged, big_buf, old->nb ); int old_ready, new_ready; unsigned long old_word, word; size_t n = 0; old_ready = decompress( old, &old_word ); new_ready = new_buf < new_buf_end; while( old_ready || new_ready ) { if( !new_ready || ( old_ready && old_word <= *new_buf ) ) { word = old_word; old_ready = decompress( old, &old_word ); } else { word = *new_buf; new_ready = ++new_buf < new_buf_end; } if( recompress ) { compress( merged, word, old->buf /* don't overwrite this */ ); } else { printf( "%lu\n", word ); } n++; } decompressor_close( old ); compressor_flush( merged ); // Re-init old to read what was merged: decompressor_init( old, big_buf, merged->buf /* end of the merged data */, old->buf_end, merged->n, merged->nb ); } // ===== main reads args, setup calculates buffer sizes and mallocs, // read_nums reads input, action processes ===== // setup--Calculate nb (# of binary bits in code) & malloc buffers. // Sets up *big_d with nb and empty big buffer description. // void setup( Decompressor *big_d, size_t n, size_t n_bytes_allowed, long long input_range, unsigned long **plil_buf, size_t *plil_buf_n_max ) { size_t n_to_compress, prev_n_to_compress, big_buf_needs; decompressor_init( big_d, 0, 0, 0, 0, 0 ); if( n_bytes_allowed <= 0 ) { die( "setup: n_bytes_allowed <= 0" ); } // We don't have to compress the last lil_buf-full, but we don't // know how big lil_buf will be yet. Last-minute hack to take // advantage of this: loop till the buffer sizes stabilize: n_to_compress = n; prev_n_to_compress = n + 1; while( n_to_compress > 0 && n_to_compress < prev_n_to_compress ) { big_d->nb = best_code_nb( n_to_compress, input_range ); big_buf_needs = bytes_for_buffer( n_to_compress, input_range, big_d->nb ) + sizeof(long); // Allow for alignment in merge. if( big_buf_needs + sizeof(**plil_buf) > n_bytes_allowed ) { die( "setup: big_buf_needs too many bytes" ); } size_t leftover = n_bytes_allowed - big_buf_needs; *plil_buf_n_max = leftover / sizeof(**plil_buf); prev_n_to_compress = n_to_compress; // Don't have to compress the last little buffer full: if( *plil_buf_n_max >= n ) { n_to_compress = 0; // n_to_compress is a size_t: unsigned. } else { n_to_compress = n - *plil_buf_n_max; } } if( n_to_compress > 0 ) { big_d->n_max = n_to_compress; big_d->buf = malloc( big_buf_needs ); if( big_d->buf == NULL ) { die( "setup: couldn't malloc big_d->buf" ); } } else { big_d->n_max = 0; big_buf_needs = 0; big_d->buf = 0; *plil_buf_n_max = n_bytes_allowed / sizeof( **plil_buf ); } big_d->data_end = big_d->buf; big_d->buf_end = big_d->buf + big_buf_needs; *plil_buf = (unsigned long *) malloc( *plil_buf_n_max * sizeof( **plil_buf ) ); if( *plil_buf == NULL ) { die( "setup: couldn't malloc lil_buf" ); } } size_t read_nums( unsigned long *nums, size_t nums_n_max ) { char line[ 80 ]; int len; size_t nums_n = 0; char *num_end; static size_t line_no = 0; while( nums_n < nums_n_max && fgets( line, sizeof(line), stdin ) ) { if( strchr( line, '\n' ) == NULL ) { die( "read_nums: input line too long" ); } line_no++; nums[ nums_n ] = strtoul( line, &num_end, 0 ); if( num_end == line ) { fprintf( stderr, "Invalid number on line %lld: %s", (long long) line_no, line ); exit(1); } nums_n++; } return( nums_n ); } int cmpulong( const void *a, const void *b ) { if ( *(unsigned long *) a < *(unsigned long *) b ) return -1; else if( *(unsigned long *) a > *(unsigned long *) b ) return 1; else return 0; } double doubletime( void ) { struct timeval tv; struct timezone tz; gettimeofday( &tv, &tz ); return( (double) tv.tv_sec + tv.tv_usec * .000001 ); } void action( size_t n, size_t n_bytes_allowed, long long input_range ) { Decompressor big_d_itself; Decompressor *big_d = &big_d_itself; size_t big_d_size; unsigned long *lil_buf; size_t lil_buf_n_max; // number of longs that fit size_t lil_buf_n, n_to_read; int remerge; double start, between; double sort_time = 0.0; double merge_time = 0.0; double last_merge_time = 0.0; double read_time = 0.0; int pass = 0; setup( big_d, n, n_bytes_allowed, input_range, &lil_buf, &lil_buf_n_max ); if( big_d->n_max > 0 ) { big_d_size = big_d->buf_end - big_d->buf; fprintf( stderr, "Compressed %lu bytes using %d binary bits => %lu entries.\n", big_d_size, big_d->nb, big_d->n_max ); } fprintf( stderr, "Uncompressed %ld bytes => %ld entries\n", lil_buf_n_max * sizeof( *lil_buf ), lil_buf_n_max ); fprintf( stderr, "Will take %lu / %lu = %lu passes.\n", n, lil_buf_n_max, CEIL_DIV( n, lil_buf_n_max ) ); do { if( n <= lil_buf_n_max ) { n_to_read = n; } else { n_to_read = MIN( lil_buf_n_max, big_d->n_max - big_d->n ); } start = doubletime(); lil_buf_n = read_nums( lil_buf, n_to_read ); read_time += doubletime() - start; n -= lil_buf_n; remerge = ( n > 0 ); if( lil_buf_n > 0 ) { start = doubletime(); qsort( lil_buf, lil_buf_n, sizeof( *lil_buf ), cmpulong ); between = doubletime(); sort_time += between - start; merge( big_d, lil_buf, lil_buf + lil_buf_n, remerge ); if( remerge ) { merge_time += doubletime() - between; } else { last_merge_time = doubletime() - between; } fprintf( stderr, "after pass %d: %lu bytes for %lld entries\n", ++pass, big_d->data_end - big_d->buf, big_d->n ); } } while( n > 0 ); fprintf( stderr, "read %f sec\n", read_time ); fprintf( stderr, "sort %f sec\n", sort_time ); fprintf( stderr, "merge %f sec\n", merge_time ); fprintf( stderr, "last_merge %f sec\n", last_merge_time ); fprintf( stderr, "total %f sec\n", read_time + sort_time + merge_time + last_merge_time ); } void usage( char **argv ) { fprintf( stderr, "Usage: %s\n", argv[0] ); fprintf( stderr, " or: %s n m where n and m are unsigned longs.\n", argv[0] ); exit( 1 ); } main( int argc, char **argv ) { long long n, m; char *endptr1, *endptr2; if( argc == 1 ) { n = 1000000; m = 2000000; } else if ( argc == 3 ) { n = strtoull( argv[1], &endptr1, 0 ); m = strtoull( argv[2], &endptr2, 0 ); if( *endptr1 != '\0' || *endptr2 != '\0' ) { usage( argv ); } } else { usage( argv ); } action( n, m, (long long) 1 << 32 ); }