EPOCH@まつやま 2010 予選問題

id:w125さんの記事に触発され、こちらも予選問題のソースを晒すことにしました。
ちなみに、rankalee(id:simezi_tan)さんと一緒にチーム・えたふぉで参加予定です。

方針

まず、予選は次の4つの基準で競われます。
(1)正確さ
(2)実行速度
(3)メモリ使用量
(4)提出の早さ
このルールを、「正解してさえいれば、速ければ速いほどよい」というふうに読み替え、予選提出コードには次のようなコーディング規約を設けました。
(3)と(4)は無視です。

1. オール・インラインアセンブラなぜか本番のコンパイル時に最適化オプションをつけないようになっていたので、全てインラインアセンブラにすることで何倍か速くなります。(-O2付きだと、Cで書いたものもアセンブラで書いたものも数%しか変わりませんが、-O2なしだと信じられないぐらい変わります。)

2. ライブラリを使わない。(入出力はread()とwrite()で)入力ファイルが巨大なので、そもそもの問題を解く部分よりも、scanf()による文字列パース処理の方が圧倒的に重くなります。
入力ファイルの書式は限られているため、手でパースすることによりscanf()の処理の無駄を防ぐことができます。
また、STLなどを使わないことにより、内部的なnew/deleteの呼び出しを避けることができます。


では、各問に移ります。
ちなみに、Linux環境でないと以下のコードはコンパイルできません。あしからず。
#ifdef EFB_CHECK はデバッグ、#ifdef EFB_STAT はアルゴリズムの分析、#ifdef EFB_BENCH はベンチマークです。

1. 数列の構成は同じ?

素直な実装だと、set >を使うことで簡単に実装できます。
しかしこのままだと、内部的なノードの確保や比較演算でかなりのロスがでるため、次のような最適化を行ないます。
(1)setを999ビットの配列で表現する
(2)set<999ビットの配列> をハッシュマップで実装する
(3)全てのノードをグローバル変数として予め確保しておく
それに加え、入力のパースをread()に置き換えたのが次のソースです。

#include <stdio.h>

#define N (100000)
#define HASHSIZE (131072)
#define BUFSIZE (65536)

char buf[BUFSIZE] __attribute__((aligned(32)));
int head[HASHSIZE];
int bits[N][32] __attribute__((aligned(128)));
unsigned mask[4] __attribute__((aligned(16))) = { 0xffffffffu, 0xffffffffu, 0xffffffffu, 127, };

#ifdef EFB_CHECK
int c_n;

void input_dump() {
    int i, j;
    fprintf(stderr, "input\n");
    for(i=0; i<c_n; i++) {
        for(j=0; j<31; j++) {
            fprintf(stderr, "%x,", bits[i][j]);
        }
        fprintf(stderr, "%x,\n", bits[i][31]&127);
    }
}

void link_dump() {
    int i;
    fprintf(stderr, "head\n");
    for(i=0; i<HASHSIZE; i++) {
         int p=head[i];
         if(p) fprintf(stderr, "%d\n", (p-((int)bits))/128);
         else fprintf(stderr, "-1\n");
    }
    fprintf(stderr, "next\n");
    for(i=0; i<c_n; i++) {
         unsigned p=bits[i][31]&(-128);
         if(p) fprintf(stderr, "%d\n", (p-((int)bits))/128);
         else fprintf(stderr, "-1\n");
    }
}
#endif


// statistics
#ifdef EFB_STAT
#define IF_STAT(a) a
int s_reads, s_nrbyte;
int s_writes, s_nwbyte;

void stat_output() {
    fprintf(stderr, "stats\n");
    fprintf(stderr, "  read   : %d bytes read by %d read().\n",
            s_nrbyte, s_reads);
    fprintf(stderr, "  write  : %d bytes written by %d write().\n",
            s_nwbyte, s_writes);
}
#else
#define IF_STAT(a) ""
#endif


// benchmark
#ifdef EFB_BENCH
#define BENCH_MARK_HERE "call bench_mark\n\t"
#include <sys/time.h>
int markc, markv[10];

void bench_mark() {
    static struct timeval tv;
    gettimeofday(&tv, NULL);
    markv[markc++] = tv.tv_usec;
}

void bench_output() {
    int i;
    fprintf(stderr, "bentch: ");
    for(i=0; i<markc-1; i++) {
        if(i>0) fprintf(stderr, ", ");
        fprintf(stderr, "%d", markv[i+1]-markv[i]);
    }
    fprintf(stderr, "\n");
}
#else
#define BENCH_MARK_HERE ""
#endif


int main() {
    __asm__(
        "pushl   %%ebp\n\t"
        "movl    %%esp, %%ebp\n\t"
        "subl    $80, %%esp\n\t"
        "andl    $-16, %%esp\n\t"
        "movl    %%ebp, 60(%%esp)\n\t"
        BENCH_MARK_HERE
    ".Lread_first:\n\t"
        "movl    $buf, %%esi\n\t"
        "movl    $65536, %%ebx\n\t"
    ".Lread_first_loop:\n\t"
        // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:-, ebp:-
        // use eax:x, ebx:nbyte, ecx:x, edx:x, esi:p, edi:-, ebp:-
        "xorl    %%eax, %%eax\n\t"
        "movl    %%eax, (%%esp)\n\t" // set fd
        "movl    %%esi, 4(%%esp)\n\t" // set buf
        "movl    %%ebx, 8(%%esp)\n\t" // set nbyte
        IF_STAT("addl $1, s_reads\n\t")
        "call    read\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lread_first_loop\n\t" // retry on error
        "test    %%eax, %%eax\n\t"
        "jz      .Lread_first_end\n\t"
        IF_STAT("addl %%eax, s_nrbyte\n\t")
        "addl    %%eax, %%esi\n\t"
        "subl    %%eax, %%ebx\n\t"
        "ja      .Lread_first_loop\n\t"
    ".Lread_first_end:\n\t"
    ".Latoi:\n\t"
        "movl    $buf, %%esi\n\t"
        "movzbl  buf, %%ecx\n\t"
        "xorl    %%ebp, %%ebp\n\t"
        "subl    $48, %%ecx\n\t"
    ".Latoi_loop:\n\t"
        // req eax:-, ebx:-, ecx:d, edx:-, esi:p, edi:-, ebp:a
        // use eax:-, ebx:-, ecx:d, edx:t, esi:p, edi:-, ebp:a
        "leal    (%%ebp,%%ebp,4), %%edx\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%ecx,%%edx,2), %%ebp\n\t"
        "movzbl  (%%esi), %%ecx\n\t"
        "subl    $48, %%ecx\n\t"
        "jae     .Latoi_loop\n\t"
    ".Latoi_end:\n\t"
        "addl    $1, %%esi\n\t"
        "movl    %%ebp, 24(%%esp)\n\t" // store n
#ifdef EFB_CHECK
        "movl    %%ebp, c_n\n\t" // store c_n
#endif
    ".Lparse_arrays:\n\t"
        "movl    $bits-128, %%edi\n\t"
    ".Lparse_arrays_loop:\n\t"
        // req eax:d, ebx:-, ecx:c, edx:-, esi:p, edi:q, ebp:n
        // use eax:a, ebx:c, ecx:c, edx:t, esi:p, edi:q, ebp:i
    ".Lparse_arrays_loop_increment:\n\t"
        "movzbl  (%%esi), %%eax\n\t"
        "movzbl  1(%%esi), %%ecx\n\t"
        "addl    $128, %%edi\n\t"
        "subl    $48, %%eax\n\t"
        "subl    $1, %%ebp\n\t"
        "jb      .Lcreate_hashset\n\t"
    ".Lparse:\n\t"
        "addl    $1, %%esi\n\t"
        "subl    $48, %%ecx\n\t"
        "jb      .Lstore_bit\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%esi), %%ebx\n\t"
        "leal    (%%ecx,%%edx,2), %%eax\n\t"
        "subl    $48, %%ebx\n\t"
        "jb      .Lstore_bit\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%ebx,%%edx,2), %%eax\n\t"
    ".Lstore_bit:\n\t"
        "subl    $1, %%eax\n\t"
        "addl    $1, %%esi\n\t"
        "movl    %%eax, %%ecx\n\t"
        "movl    $1, %%edx\n\t"
        "shrl    $3, %%eax\n\t"
        "andl    $7, %%ecx\n\t"
        "shll    %%cl, %%edx\n\t"
        "orb     %%dl, (%%edi,%%eax)\n\t"
        "movzbl  (%%esi), %%eax\n\t"
        "movzbl  1(%%esi), %%ecx\n\t"
        "subl    $48, %%eax\n\t"
        "jae     .Lparse\n\t"
    ".Ltest_buf:\n\t"
        "addl    $3, %%esi\n\t"
        "cmpl    $buf+65536-1024, %%esi\n\t" // BUFSIZE-1024
        "jb      .Lparse_arrays_loop\n\t"
    ".Lread:\n\t"
        // req eax:-, ebx:-, ecx:-, edx:-, esi:p, edi:q, ebp:i
        // use eax:-, ebx:c, ecx:c, edx:t, esi:p, edi:q, ebp:i
        "movl    %%esi, %%ebx\n\t"
        "subl    $65536-1024, %%esi\n\t" // BUFSIZE-1024
        "andl    $-32, %%ebx\n\t"
        "movl    %%esi, 12(%%esp)\n\t" // store p
        "leal    1024-65536(%%ebx), %%esi\n\t" // 1024-BUFSIZE
    ".Lmemcpy_loop:\n\t"
        "movdqa  (%%ebx), %%xmm0\n\t"
        "movdqa  16(%%ebx), %%xmm1\n\t"
        "addl    $32, %%ebx\n\t"
        "movdqa  %%xmm0, (%%esi)\n\t"
        "movdqa  %%xmm1, 16(%%esi)\n\t"
        "addl    $32, %%esi\n\t"
        "cmpl    $buf+65536, %%ebx\n\t" // BUFSIZE
        "jne     .Lmemcpy_loop\n\t"
        "movl    $buf+1024, %%esi\n\t"
        "movl    $65536-1024, %%ebx\n\t" // BUFSIZE-1024
    ".Lread_loop:\n\t"
        // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:-, ebp:-
        // use eax:x, ebx:nbyte, ecx:x, edx:x, esi:p, edi:t, ebp:-
        "xorl    %%eax, %%eax\n\t"
        "movl    %%eax, (%%esp)\n\t" // set fd
        "movl    %%esi, 4(%%esp)\n\t" // set buf
        "movl    %%ebx, 8(%%esp)\n\t" // set nbyte
        IF_STAT("addl $1, s_reads\n\t")
        "call    read\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lread_loop\n\t" // retry on error
        "test    %%eax, %%eax\n\t"
        "jz      .Lread_end\n\t"
        IF_STAT("addl %%eax, s_nrbyte\n\t")
        "addl    %%eax, %%esi\n\t"
        "subl    %%eax, %%ebx\n\t"
        "ja      .Lread_loop\n\t"
    ".Lread_end:\n\t"
        "movl    12(%%esp), %%esi\n\t" // load p
        "jmp     .Lparse_arrays_loop\n\t"
    ".Lcreate_hashset:\n\t"
        BENCH_MARK_HERE
        "movdqa  mask, %%xmm3\n\t"
        "movl    $bits, %%esi\n\t"
        "movl    24(%%esp), %%ebp\n\t"
        "xorl    %%ebx, %%ebx\n\t"
    ".Lcreate_hashset_loop:\n\t"
        // req eax:-, ebx:hashsize, ecx:-, edx:-, esi:q, edi:-, ebp:i
    ".Lcalc_hash:\n\t"
        "movdqa  (%%esi), %%xmm0\n\t"
        "movdqa  16(%%esi), %%xmm1\n\t"
        "paddd   32(%%esi), %%xmm0\n\t"
        "paddd   48(%%esi), %%xmm1\n\t"
        "paddd   64(%%esi), %%xmm0\n\t"
        "paddd   80(%%esi), %%xmm1\n\t"
        "paddd   96(%%esi), %%xmm0\n\t"
        "paddd   112(%%esi), %%xmm1\n\t"
        "paddd   %%xmm1, %%xmm0\n\t"
        "pshufd  $0x4e, %%xmm0, %%xmm1\n\t"
        "paddd   %%xmm1, %%xmm0\n\t"
        "pshufd  $0xb1, %%xmm0, %%xmm1\n\t"
        "paddd   %%xmm1, %%xmm0\n\t"
        "movd    %%xmm0, %%eax\n\t"
        "andl    $131071, %%eax\n\t"
    ".Lhash_list_iterate:\n\t"
        "leal    head(,%%eax,4), %%edx\n\t"
        "movl    (%%edx), %%edi\n\t"
        "test    %%edi, %%edi\n\t"
        "jz      .Lhash_insert\n\t"
    ".Lhash_test_loop:\n\t"
        // req eax:-, ebx:hashsize, ecx:-, edx:pp, esi:q, edi:p, ebp:i
        "movdqa  (%%edi), %%xmm0\n\t"
        "movdqa  16(%%edi), %%xmm1\n\t"
        "pcmpeqd (%%esi), %%xmm0\n\t"
        "pcmpeqd 16(%%esi), %%xmm1\n\t"
        "pmovmskb    %%xmm0, %%eax\n\t"
        "pmovmskb    %%xmm1, %%ecx\n\t"
        "cmpl    $65535, %%eax\n\t"
        "jne     .Lhash_test_next\n\t"
        "cmpl    $65535, %%ecx\n\t"
        "jne     .Lhash_test_next\n\t"
        "movdqa  32(%%edi), %%xmm0\n\t"
        "movdqa  48(%%edi), %%xmm1\n\t"
        "pcmpeqd 32(%%esi), %%xmm0\n\t"
        "pcmpeqd 48(%%esi), %%xmm1\n\t"
        "pmovmskb    %%xmm0, %%eax\n\t"
        "pmovmskb    %%xmm1, %%ecx\n\t"
        "cmpl    $65535, %%eax\n\t"
        "jne     .Lhash_test_next\n\t"
        "cmpl    $65535, %%ecx\n\t"
        "jne     .Lhash_test_next\n\t"
        "movdqa  64(%%edi), %%xmm0\n\t"
        "movdqa  80(%%edi), %%xmm1\n\t"
        "pcmpeqd 64(%%esi), %%xmm0\n\t"
        "pcmpeqd 80(%%esi), %%xmm1\n\t"
        "pmovmskb    %%xmm0, %%eax\n\t"
        "pmovmskb    %%xmm1, %%ecx\n\t"
        "cmpl    $65535, %%eax\n\t"
        "jne     .Lhash_test_next\n\t"
        "cmpl    $65535, %%ecx\n\t"
        "jne     .Lhash_test_next\n\t"
        "movdqa  96(%%edi), %%xmm0\n\t"
        "movdqa  112(%%edi), %%xmm1\n\t"
        "pand    %%xmm3, %%xmm1\n\t"
        "pcmpeqd 96(%%esi), %%xmm0\n\t"
        "pcmpeqd 112(%%esi), %%xmm1\n\t"
        "pmovmskb    %%xmm0, %%eax\n\t"
        "pmovmskb    %%xmm1, %%ecx\n\t"
        "cmpl    $65535, %%eax\n\t"
        "jne     .Lhash_test_next\n\t"
        "cmpl    $65535, %%ecx\n\t"
        "je      .Lcreate_hashset_loop_increment\n\t"
    ".Lhash_test_next:\n\t"
        "leal    124(%%edi), %%edx\n\t"
        "movl    124(%%edi), %%edi\n\t"
        "xorl    %%ecx, %%ecx\n\t"
        "andl    $-128, %%edi\n\t"
        "jnz     .Lhash_test_loop\n\t"
    ".Lhash_insert:\n\t"
        "orl     %%esi, (%%edx)\n\t"
        "addl    $1, %%ebx\n\t"
    ".Lcreate_hashset_loop_increment:\n\t"
        "addl    $128, %%esi\n\t"
        "subl    $1, %%ebp\n\t"
        "ja      .Lcreate_hashset_loop\n\t"
        BENCH_MARK_HERE
        "movl    %%ebx, %%ecx\n\t"
    ".Lprint:\n\t"
        // req eax:-, ebx:-, ecx:hashsize, edx:-, esi:-, edi:-, ebp:-
        "xorl    %%ebx, %%ebx\n\t"
        "movl    $10, buf+16\n\t"
        "movl    $buf+16, %%edi\n\t"
        "movl    $-858993459, %%esi\n\t"
        "addl    $1, %%ebx\n\t"
    ".Ltostring_loop:\n\t"
        // req eax:-, ebx:l, ecx:a, edx:-,    esi:3435973837, edi:p, ebp:-
        // use eax:t, ebx:l, ecx:a, edx:a/10, esi:3435973837, edi:p, ebp:-
        "movl    %%ecx, %%eax\n\t"
        "mull    %%esi\n\t"
        "shrl    $3, %%edx\n\t"
        "leal    (%%edx,%%edx,4), %%eax\n\t"
        "addl    %%eax, %%eax\n\t"
        "subl    $1, %%edi\n\t"
        "subl    %%eax, %%ecx\n\t"
        "addl    $1, %%ebx\n\t"
        "addl    $48, %%ecx\n\t"
        "movb    %%cl, (%%edi)\n\t"
        "movl    %%edx, %%ecx\n\t"
        "test    %%ecx, %%ecx\n\t"
        "jnz     .Ltostring_loop\n\t"
    ".Lwrite_loop:\n\t"
        // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:-, edi:p, ebp:-
        "movl    $1, %%eax\n\t"
        "movl    %%eax, (%%esp)\n\t"
        "movl    %%edi, 4(%%esp)\n\t"
        "movl    %%ebx, 8(%%esp)\n\t"
        IF_STAT("addl $1, s_writes\n\t")
        "call    write\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lwrite_loop\n\t" // retry on error
        IF_STAT("addl %%eax, s_nwbyte\n\t")
        "addl    %%eax, %%edi\n\t"
        "subl    %%eax, %%ebx\n\t"
        "ja      .Lwrite_loop\n\t"
        BENCH_MARK_HERE
    ".Lclear_stack:\n\t"
        "movl    60(%%esp), %%esp\n\t"
        "popl    %%ebp\n\t"
    : : : "eax", "ebx", "ecx", "edx", "esi", "edi", "cc");
#ifdef EFB_CHECK
    input_dump();
    link_dump();
#endif
#ifdef EFB_STAT
    stat_output();
#endif
#ifdef EFB_BENCH
    bench_output();
#endif
    return 0;
}

2. 素数暗号

アリストテレスの篩を使うのが一般的だと思います。
素数の埋め込みをした方もいるようですが、それは諦めました。
2の倍数は無視、他の数を篩うのに使う可能性のある素数の埋め込み、メモリアクセスの局所化などの最適化を行ないました。

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>

#define N (5000000)
#define PN (5001216)
#define SPAN (12288)
// assert(PN%SPAN==0)
#define P (445)
#define M (890)
#define RBUFSIZE (65536)
#define WBUFSIZE (65536)
char char_of[PN] __attribute__((aligned(4096)));
char rbuf[RBUFSIZE] __attribute__((aligned(16)));
char wbuf[WBUFSIZE];

int magi[M+2] = {3, 4, 5, 12, 7, 24, 11, 60, 13, 84, 17, 144, 19, 180, 23, 264, 29, 420, 31, 480, 37, 684, 41, 840, 43, 924, 47, 1104, 53, 1404, 59, 1740, 61, 1860, 67, 2244, 71, 2520, 73, 2664, 79, 3120, 83, 3444, 89, 3960, 97, 4704, 101, 5100, 103, 5304, 107, 5724, 109, 5940, 113, 6384, 127, 8064, 131, 8580, 137, 9384, 139, 9660, 149, 11100, 151, 11400, 157, 12324, 163, 13284, 167, 13944, 173, 14964, 179, 16020, 181, 16380, 191, 18240, 193, 18624, 197, 19404, 199, 19800, 211, 22260, 223, 24864, 227, 25764, 229, 26220, 233, 27144, 239, 28560, 241, 29040, 251, 31500, 257, 33024, 263, 34584, 269, 36180, 271, 36720, 277, 38364, 281, 39480, 283, 40044, 293, 42924, 307, 47124, 311, 48360, 313, 48984, 317, 50244, 331, 54780, 337, 56784, 347, 60204, 349, 60900, 353, 62304, 359, 64440, 367, 67344, 373, 69564, 379, 71820, 383, 73344, 389, 75660, 397, 78804, 401, 80400, 409, 83640, 419, 87780, 421, 88620, 431, 92880, 433, 93744, 439, 96360, 443, 98124, 449, 100800, 457, 104424, 461, 106260, 463, 107184, 467, 109044, 479, 114720, 487, 118584, 491, 120540, 499, 124500, 503, 126504, 509, 129540, 521, 135720, 523, 136764, 541, 146340, 547, 149604, 557, 155124, 563, 158484, 569, 161880, 571, 163020, 577, 166464, 587, 172284, 593, 175824, 599, 179400, 601, 180600, 607, 184224, 613, 187884, 617, 190344, 619, 191580, 631, 199080, 641, 205440, 643, 206724, 647, 209304, 653, 213204, 659, 217140, 661, 218460, 673, 226464, 677, 229164, 683, 233244, 691, 238740, 701, 245700, 709, 251340, 719, 258480, 727, 264264, 733, 268644, 739, 273060, 743, 276024, 751, 282000, 757, 286524, 761, 289560, 769, 295680, 773, 298764, 787, 309684, 797, 317604, 809, 327240, 811, 328860, 821, 337020, 823, 338664, 827, 341964, 829, 343620, 839, 351960, 853, 363804, 857, 367224, 859, 368940, 863, 372384, 877, 384564, 881, 388080, 883, 389844, 887, 393384, 907, 411324, 911, 414960, 919, 422280, 929, 431520, 937, 438984, 941, 442740, 947, 448404, 953, 454104, 967, 467544, 971, 471420, 977, 477264, 983, 483144, 991, 491040, 997, 497004, 1009, 509040, 1013, 513084, 1019, 519180, 1021, 521220, 1031, 531480, 1033, 533544, 1039, 539760, 1049, 550200, 1051, 552300, 1061, 562860, 1063, 564984, 1069, 571380, 1087, 590784, 1091, 595140, 1093, 597324, 1097, 601704, 1103, 608304, 1109, 614940, 1117, 623844, 1123, 630564, 1129, 637320, 1151, 662400, 1153, 664704, 1163, 676284, 1171, 685620, 1181, 697380, 1187, 704484, 1193, 711624, 1201, 721200, 1213, 735684, 1217, 740544, 1223, 747864, 1229, 755220, 1231, 757680, 1237, 765084, 1249, 780000, 1259, 792540, 1277, 815364, 1279, 817920, 1283, 823044, 1289, 830760, 1291, 833340, 1297, 841104, 1301, 846300, 1303, 848904, 1307, 854124, 1319, 869880, 1321, 872520, 1327, 880464, 1361, 926160, 1367, 934344, 1373, 942564, 1381, 953580, 1399, 978600, 1409, 992640, 1423, 1012464, 1427, 1018164, 1429, 1021020, 1433, 1026744, 1439, 1035360, 1447, 1046904, 1451, 1052700, 1453, 1055604, 1459, 1064340, 1471, 1081920, 1481, 1096680, 1483, 1099644, 1487, 1105584, 1489, 1108560, 1493, 1114524, 1499, 1123500, 1511, 1141560, 1523, 1159764, 1531, 1171980, 1543, 1190424, 1549, 1199700, 1553, 1205904, 1559, 1215240, 1567, 1227744, 1571, 1234020, 1579, 1246620, 1583, 1252944, 1597, 1275204, 1601, 1281600, 1607, 1291224, 1609, 1294440, 1613, 1300884, 1619, 1310580, 1621, 1313820, 1627, 1323564, 1637, 1339884, 1657, 1372824, 1663, 1382784, 1667, 1389444, 1669, 1392780, 1693, 1433124, 1697, 1439904, 1699, 1443300, 1709, 1460340, 1721, 1480920, 1723, 1484364, 1733, 1501644, 1741, 1515540, 1747, 1526004, 1753, 1536504, 1759, 1547040, 1777, 1578864, 1783, 1589544, 1787, 1596684, 1789, 1600260, 1801, 1621800, 1811, 1639860, 1823, 1661664, 1831, 1676280, 1847, 1705704, 1861, 1731660, 1867, 1742844, 1871, 1750320, 1873, 1754064, 1877, 1761564, 1879, 1765320, 1889, 1784160, 1901, 1806900, 1907, 1818324, 1913, 1829784, 1931, 1864380, 1933, 1868244, 1949, 1899300, 1951, 1903200, 1973, 1946364, 1979, 1958220, 1987, 1974084, 1993, 1986024, 1997, 1994004, 1999, 1998000, 2003, 2006004, 2011, 2022060, 2017, 2034144, 2027, 2054364, 2029, 2058420, 2039, 2078760, 2053, 2107404, 2063, 2127984, 2069, 2140380, 2081, 2165280, 2083, 2169444, 2087, 2177784, 2089, 2181960, 2099, 2202900, 2111, 2228160, 2113, 2232384, 2129, 2266320, 2131, 2270580, 2137, 2283384, 2141, 2291940, 2143, 2296224, 2153, 2317704, 2161, 2334960, 2179, 2374020, 2203, 2426604, 2207, 2435424, 2213, 2448684, 2221, 2466420, 2237, 2502084, 2239, 2506560, 2243, 2515524, 2251, 2533500, 2267, 2569644, 2269, 2574180, 2273, 2583264, 2281, 2601480, 2287, 2615184, 2293, 2628924, 2297, 2638104, 2309, 2665740, 2311, 2670360, 2333, 2721444, 2339, 2735460, 2341, 2740140, 2347, 2754204, 2351, 2763600, 2357, 2777724, 2371, 2810820, 2377, 2825064, 2381, 2834580, 2383, 2839344, 2389, 2853660, 2393, 2863224, 2399, 2877600, 2411, 2906460, 2417, 2920944, 2423, 2935464, 2437, 2969484, 2441, 2979240, 2447, 2993904, 2459, 3023340, 2467, 3043044, 2473, 3057864, 2477, 3067764, 2503, 3132504, 2521, 3177720, 2531, 3202980, 2539, 3223260, 2543, 3233424, 2549, 3248700, 2551, 3253800, 2557, 3269124, 2579, 3325620, 2591, 3356640, 2593, 3361824, 2609, 3403440, 2617, 3424344, 2621, 3434820, 2633, 3466344, 2647, 3503304, 2657, 3529824, 2659, 3535140, 2663, 3545784, 2671, 3567120, 2677, 3583164, 2683, 3599244, 2687, 3609984, 2689, 3615360, 2693, 3626124, 2699, 3642300, 2707, 3663924, 2711, 3674760, 2713, 3680184, 2719, 3696480, 2729, 3723720, 2731, 3729180, 2741, 3756540, 2749, 3778500, 2753, 3789504, 2767, 3828144, 2777, 3855864, 2789, 3889260, 2791, 3894840, 2797, 3911604, 2801, 3922800, 2803, 3928404, 2819, 3973380, 2833, 4012944, 2837, 4024284, 2843, 4041324, 2851, 4064100, 2857, 4081224, 2861, 4092660, 2879, 4144320, 2887, 4167384, 2897, 4196304, 2903, 4213704, 2909, 4231140, 2917, 4254444, 2927, 4283664, 2939, 4318860, 2953, 4360104, 2957, 4371924, 2963, 4389684, 2969, 4407480, 2971, 4413420, 2999, 4497000, 3001, 4503000, 3011, 4533060, 3019, 4557180, 3023, 4569264, 3037, 4611684, 3041, 4623840, 3049, 4648200, 3061, 4684860, 3067, 4703244, 3079, 4740120, 3083, 4752444, 3089, 4770960, 3109, 4832940, 3119, 4864080, 3121, 4870320, 3137, 4920384, 0, 0x7fffffff};

// check
#ifdef EFB_CHECK
int c_num;

void table_dump() {
    int i;
    for(i=0; i<N; i++) {
        fputc(char_of[i], stderr);
    }
    fputc('\n', stderr);
}

void num_dump() {
    fprintf(stderr, "%d\n", c_num);
}
#endif

// statistics
#ifdef EFB_STAT
#define IF_STAT(a) a
int s_reads, s_nrbyte;
int s_writes, s_nwbyte;

void stat_output() {
    fprintf(stderr, "stats\n");
    fprintf(stderr, "  read   : %d bytes read by %d read().\n",
            s_nrbyte, s_reads);
    fprintf(stderr, "  write  : %d bytes written by %d write().\n",
            s_nwbyte, s_writes);
}
#else
#define IF_STAT(a) ""
#endif

// benchmark
#ifdef EFB_BENCH
#define BENCH_MARK_HERE "call bench_mark\n\t"
int markc, markv[10];

void bench_mark() {
    static struct timeval tv;
    gettimeofday(&tv, NULL);
    markv[markc++] = tv.tv_usec;
}

void bench_output() {
    int i;
    fprintf(stderr, "bentch: ");
    for(i=0; i<markc-1; i++) {
        if(i>0) fprintf(stderr, ", ");
        fprintf(stderr, "%d", markv[i+1]-markv[i]);
    }
    fprintf(stderr, "\n");
}
#else
#define BENCH_MARK_HERE ""
#endif


int main() {
    __asm__(
    ".Lstack_init:\n\t"
        "pushl   %%ebp\n\t"
        "movl    %%esp, %%ebp\n\t"
        "subl    $80, %%esp\n\t"
        "andl    $-16, %%esp\n\t"
        "movl    %%ebp, 60(%%esp)\n\t" // store ebp
        BENCH_MARK_HERE
        "movl    $12288, %%edx\n\t" // SPAN
        "movl    $95, %%ebx\n\t"
        "movl    magi, %%ecx\n\t"
        "movl    magi+4, %%eax\n\t"
        "movl    $magi+8, %%edi\n\t"
    ".Lsieve_loop:\n\t"
        // req eax:b, ebx:95, ecx:d, edx:k, esi:-, edi:p, ebp:-
        "movb    %%bl, char_of(%%eax)\n\t"
        "addl    %%ecx, %%eax\n\t"
        "cmpl    %%eax, %%edx\n\t"
        "ja      .Lsieve_loop\n\t"
        "movl    %%eax, -4(%%edi)\n\t"
        "movl    (%%edi), %%ecx\n\t"
        "movl    4(%%edi), %%eax\n\t"
        "addl    $8, %%edi\n\t"
        "cmpl    %%eax, %%edx\n\t"
        "ja      .Lsieve_loop\n\t"
    ".Lsieve_loop_increment:\n\t"
        "movl    magi, %%ecx\n\t"
        "movl    magi+4, %%eax\n\t"
        "movl    $magi+8, %%edi\n\t"
        "addl    $12288, %%edx\n\t" // SPAN
        "cmpl    $5001216+12288, %%edx\n\t" // PN+SPAN
        "jne     .Lsieve_loop\n\t"
        BENCH_MARK_HERE
        "movb    $95, char_of\n\t"
        "movl    $char_of+1, %%ebx\n\t"
        "movl    $98, %%ecx\n\t"
    ".Lnumbering_loop_lower:"
        // req eax:-, ebx:p, ecx:c, edx:-, esi:-, edi:-, ebp:-
        // use eax:t, ebx:p, ecx:c, edx:-, esi:-, edi:-, ebp:-
        "movzbl  (%%ebx), %%eax\n\t"
        "addl    $1, %%ebx\n\t"
        "test    %%eax, %%eax\n\t"
        "jnz     .Lnumbering_loop_lower\n\t"
        "movb    %%cl, -1(%%ebx)\n\t"
        "addl    $1, %%ecx\n\t"
        "cmpl    $123, %%ecx\n\t"
        "jne     .Lnumbering_loop_lower\n\t"
        "movl    $65, %%ecx\n\t"
    ".Lnumbering_loop_upper:\n\t"
        "movzbl  (%%ebx), %%eax\n\t"
        "addl    $1, %%ebx\n\t"
        "test    %%eax, %%eax\n\t"
        "jnz     .Lnumbering_loop_upper\n\t"
        "movb    %%cl, -1(%%ebx)\n\t"
        "addl    $1, %%ecx\n\t"
        "cmpl    $91, %%ecx\n\t"
        "jne     .Lnumbering_loop_upper\n\t"
        "movl    $97, %%ecx\n\t"
        "cmpl    $char_of+5000000, %%ebx\n\t" // N
        "jb      .Lnumbering_loop_lower\n\t"
    ".Lnumbering_loop_end:\n\t"
#ifdef EFB_CHECK
        "call    table_dump\n\t"
#endif
        BENCH_MARK_HERE
    ".Lread_first:\n\t"
        "movl    $rbuf, %%esi\n\t"
        "movl    $65536, %%ebx\n\t" // RBUFSIZE
    ".Lread_first_loop:\n\t"
        // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:w, ebp:-
        "xorl    %%eax, %%eax\n\t"
        "movl    %%eax, (%%esp)\n\t" // set fd
        "movl    %%esi, 4(%%esp)\n\t" // set buf
        "movl    %%ebx, 8(%%esp)\n\t" // set nbyte
        IF_STAT("addl $1, s_reads\n\t")
        "call    read\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lread_first_loop\n\t"
        IF_STAT("addl %%eax, s_nrbyte\n\t")
        "test    %%eax, %%eax\n\t"
        "jz      .Lread_first_loop_end\n\t"
        "addl    %%eax, %%esi\n\t"
        "subl    %%eax, %%ebx\n\t"
        "ja      .Lread_first_loop\n\t"
    ".Lread_first_loop_end:\n\t"
    ".Lparse_init:\n\t"
        "movl    $rbuf, %%esi\n\t"
        "movl    $wbuf, %%edi\n\t"
    ".Lparse:\n\t"
        // req eax:-, ebx:-, ecx:-, edx:-, esi:p, edi:w, ebp:-
        "movzbl  (%%esi), %%eax\n\t"
        "movzbl  1(%%esi), %%ecx\n\t"
        "addl    $1, %%esi\n\t"
        "subl    $48, %%eax\n\t"
        "jb      .Lwrite_newline\n\t"
        "subl    $48, %%ecx\n\t"
        "jb      .Lparse_end\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%esi), %%ebx\n\t"
        "leal    (%%ecx,%%edx,2), %%eax\n\t"
        "subl    $48, %%ebx\n\t"
        "jb      .Lparse_end\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%esi), %%ecx\n\t"
        "leal    (%%ebx,%%edx,2), %%eax\n\t"
        "subl    $48, %%ecx\n\t"
        "jb      .Lparse_end\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%esi), %%ebx\n\t"
        "leal    (%%ecx,%%edx,2), %%eax\n\t"
        "subl    $48, %%ebx\n\t"
        "jb      .Lparse_end\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%esi), %%ecx\n\t"
        "leal    (%%ebx,%%edx,2), %%eax\n\t"
        "subl    $48, %%ecx\n\t"
        "jb      .Lparse_end\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%esi), %%ebx\n\t"
        "leal    (%%ecx,%%edx,2), %%eax\n\t"
        "subl    $48, %%ebx\n\t"
        "jb      .Lparse_end\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "addl    $1, %%esi\n\t"
        "leal    (%%ebx,%%edx,2), %%eax\n\t"
    ".Lparse_end:\n\t"
        "addl    $1, %%esi\n\t"
#ifdef EFB_CHECK
        "movl    %%eax, c_num\n\t"
        "call    num_dump\n\t"
        "movl    c_num, %%eax\n\t"
#endif
        "test    $1, %%eax\n\t"
        "jz      .Leven\n\t"
        "shrl    $1, %%eax\n\t"
        "movzbl  char_of(%%eax), %%ecx\n\t"
        "movb    %%cl, (%%edi)\n\t"
        "addl    $1, %%edi\n\t"
        "cmpl    $wbuf+65536, %%edi\n\t" // WBUFSIZE
        "je      .Lwrite\n\t"
        "cmpl    $rbuf+65536-16, %%esi\n\t" // RBUFSIZE-16
        "jae     .Lread\n\t"
        "jmp     .Lparse\n\t"
    ".Leven:\n\t"
        "movl    $95, %%edx\n\t"
        "movl    $97, %%ebx\n\t"
        "cmpl    $2, %%eax\n\t"
        "cmovel  %%ebx, %%edx\n\t"
        "movb    %%dl, (%%edi)\n\t"
        "addl    $1, %%edi\n\t"
        "cmpl    $wbuf+65536, %%edi\n\t" // WBUFSIZE
        "je      .Lwrite\n\t"
        "cmpl    $rbuf+65536-16, %%esi\n\t" // RBUFSIZE-16
        "jae     .Lread\n\t"
        "jmp     .Lparse\n\t"
    ".Lwrite:\n\t"
        // w==wbuf+WBUFSIZE
        "movl    $wbuf, %%edi\n\t"
        "movl    $65536, %%ebx\n\t" // WBUFSIZE
    ".Lwrite_loop:\n\t"
        // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:w, ebp:-
        "movl    $1, %%eax\n\t"
        "movl    %%eax, (%%esp)\n\t"
        "movl    %%edi, 4(%%esp)\n\t"
        "movl    %%ebx, 8(%%esp)\n\t"
        IF_STAT("addl $1, s_writes\n\t")
        "call    write\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lwrite_loop\n\t"
        IF_STAT("addl %%eax, s_nwbyte\n\t")
        "addl    %%eax, %%edi\n\t"
        "subl    %%eax, %%ebx\n\t"
        "ja      .Lwrite_loop\n\t"
    ".Lwrite_loop_end:\n\t"
        "movl    $wbuf, %%edi\n\t"
        "cmpl    $rbuf+65536-16, %%esi\n\t" // RBUFSIZE-16
        "jb      .Lparse\n\t"
    ".Lread:\n\t"
        "movdqa  rbuf+65536-16, %%xmm0\n\t" // RBUFSIZE-16
        "subl    $65536-16, %%esi\n\t" // RBUFSIZE-16
        "movdqa  %%xmm0, rbuf\n\t"
        "movl    $rbuf+16, %%ebp\n\t"
        "movl    $65536-16, %%ebx\n\t" // RBUFSIZE-16
    ".Lread_loop:\n\t"
        // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:w, ebp:np
        "xorl    %%eax, %%eax\n\t"
        "movl    %%eax, (%%esp)\n\t" // set fd
        "movl    %%ebp, 4(%%esp)\n\t" // sed buf
        "movl    %%ebx, 8(%%esp)\n\t" // set nbyte
        IF_STAT("addl $1, s_reads\n\t")
        "call    read\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lread_loop\n\t"
        IF_STAT("addl %%eax, s_nrbyte\n\t")
        "test    %%eax, %%eax\n\t"
        "jz      .Lparse\n\t"
        "addl    %%eax, %%ebp\n\t"
        "subl    %%eax, %%ebx\n\t"
        "jbe     .Lparse\n\t"
        "jmp     .Lread_loop\n\t"
    ".Lread_loop_end:\n\t"
    ".Lwrite_newline:\n\t"
        "movl    $10, (%%edi)\n\t"
        "movl    %%edi, %%ebx\n\t"
        "movl    $wbuf, %%edi\n\t"
        "subl    $wbuf-1, %%ebx\n\t"
    ".Lwrite_newline_loop:\n\t"
        // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:-, edi:w, ebp:-
        "movl    $1, %%eax\n\t"
        "movl    %%eax, (%%esp)\n\t"
        "movl    %%edi, 4(%%esp)\n\t"
        "movl    %%ebx, 8(%%esp)\n\t"
        IF_STAT("addl $1, s_writes\n\t")
        "call    write\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lwrite_newline_loop\n\t"
        IF_STAT("addl %%eax, s_nwbyte\n\t")
        "addl    %%eax, %%edi\n\t"
        "subl    %%eax, %%ebx\n\t"
        "ja      .Lwrite_newline_loop\n\t"
    ".Lwrite_newline_end:\n\t"
        BENCH_MARK_HERE
    ".Lclean_stack:\n\t"
        "movl    60(%%esp), %%esp\n\t"
        "popl    %%ebp\n\t"
    : : : "eax", "ebx", "ecx", "edx", "esi", "edi");
#ifdef EFB_STAT
    stat_output();
#endif
#ifdef EFB_BENCH
    bench_output();
#endif
    return 0;
}

3. n番目に大きい分数は?

Spaghetti Sourceで紹介されているアルゴリズムを使用しました。ただし、Spaghetti Sourceの実装は一部バグがあるので注意が必要です。
中央値の選択は、(左端の数+右端の数)/2という大雑把なことをしています。
ある意味、一番工夫のしどころがなかった問題でした。

#include <unistd.h>
#include <stdio.h>
#include <time.h>
#include <sys/time.h>

#define M (1000000)
#define BUFSIZE (16777216) // 2**24

double fv[M];
char* fp[M];
char buf[BUFSIZE];
const double HALF=0.5;


// check
#ifdef EFB_CHECK
#define MEM_DUMP_HERE "call mem_dump\n\t"
int c_N, c_n;
void mem_dump() {
    int i;
    fprintf(stderr, "mem_dump\n");
    fprintf(stderr, "N=%d, n=%d\n", c_N, c_n);
    for(i=0; i<c_N; i++) {
        fprintf(stderr, "%20.12f at %d\n", fv[i], (int)(fp[i]-buf));
    }
}
#else
#define MEM_DUMP_HERE ""
#endif


// statistics
#ifdef EFB_STAT
#define IF_STAT(a) a
int s_reads, s_nrbyte;
int s_writes, s_nwbyte;
int s_steps, s_lcmps, s_rcmps, s_swaps;

void stat_output() {
    fprintf(stderr, "stats\n");
    fprintf(stderr, "  read   : %d bytes read by %d read().\n",
            s_nrbyte, s_reads);
    fprintf(stderr, "  write  : %d bytes written by %d write().\n",
            s_nwbyte, s_writes);
    fprintf(stderr, "  select : %d steps, %d(%d:%d) compares and %d swaps.\n",
            s_steps, s_lcmps+s_rcmps, s_lcmps, s_rcmps, s_swaps);
}
#else
#define IF_STAT(a) ""
#endif


// benchmark
#ifdef EFB_BENCH
#define BENCH_MARK_HERE "call bench_mark\n\t"
int markc, markv[10];

void bench_mark() {
    static struct timeval tv;
    gettimeofday(&tv, NULL);
    markv[markc++] = tv.tv_usec;
}

void bench_output() {
    int i;
    fprintf(stderr, "bentch: ");
    for(i=0; i<markc-1; i++) {
        if(i>0) fprintf(stderr, ", ");
        fprintf(stderr, "%d", markv[i+1]-markv[i]);
    }
    fprintf(stderr, "\n");
}
#else
#define BENCH_MARK_HERE ""
#endif


int main() {
    __asm__(
    ".Lstack_init:\n\t"
        "pushl   %%ebp\n\t"
        "movl    %%esp, %%ebp\n\t"
        "subl    $80, %%esp\n\t"
        "andl    $-16, %%esp\n\t"
        "movl    %%ebp, 60(%%esp)\n\t" // store ebp
        BENCH_MARK_HERE
    ".Lread:\n\t"
        "movl    $buf, %%edi\n\t"
        "movl    $16777216, %%ebx\n\t"
    ".Lread_loop:\n\t"
        // req eax:-,   ebx:nbyte, ecx:-, edx:-, esi:-, edi:p, ebp:-
        // use eax:ret, ebx:nbyte, ecx:x, edx:x, esi:-, edi:p, ebp:-
        "xorl    %%eax, %%eax\n\t" // set fd
        "movl    %%eax, (%%esp)\n\t" // set fd
        "movl    %%edi, 4(%%esp)\n\t" // set buf
        "movl    %%ebx, 8(%%esp)\n\t" // set nbyte
        IF_STAT("addl $1, s_reads\n\t")
        "call    read\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lread_loop\n\t"
        IF_STAT("addl %%eax, s_nrbyte\n\t")
        "test    %%eax, %%eax\n\t"
        "jz      .Lread_loop_fin\n\t"
        "subl    %%eax, %%ebx\n\t"
        "addl    %%eax, %%edi\n\t"
        "jmp     .Lread_loop\n\t"
    ".Lread_loop_fin:\n\t"
        BENCH_MARK_HERE
        "movl    $buf, %%edi\n\t"
    ".Lparse_N:\n\t"
        "xorl    %%eax, %%eax\n\t"
        "movzbl  (%%edi), %%ecx\n\t"
    ".Lparse_N_loop:\n\t"
        // req eax:a, ebx:-, ecx:c, edx:-, esi:-, edi:p, ebp:-
        // use eax:a, ebx:-, ecx:c, edx:t, esi:-, edi:p, ebp:-
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "addl    $1, %%edi\n\t"
        "leal    -48(%%ecx,%%edx,2), %%eax\n\t"
        "movzbl  (%%edi), %%ecx\n\t"
        "test    $16, %%ecx\n\t"
        "jnz     .Lparse_N_loop\n\t"
    ".Lparse_N_loop_fin:\n\t"
        "movl    %%eax, 12(%%esp)\n\t" // store N
#ifdef EFB_CHECK
        "movl    %%eax, c_N\n\t" // store c_N
#endif
        "addl    $1, %%edi\n\t"
    ".Lparse_n:\n\t"
        "xorl    %%ebx, %%ebx\n\t"
        "movzbl  (%%edi), %%ecx\n\t"
    ".Lparse_n_loop:\n\t"
        // req eax:N, ebx:a, ecx:c, edx:-, esi:-, edi:p, ebp:-
        // use eax:N, ebx:a, ecx:c, edx:t, esi:-, edi:p, ebp:-
        "leal    (%%ebx,%%ebx,4), %%edx\n\t"
        "addl    $1, %%edi\n\t"
        "leal    -48(%%ecx,%%edx,2), %%ebx\n\t"
        "movzbl  (%%edi), %%ecx\n\t"
        "test    $16, %%ecx\n\t"
        "jnz     .Lparse_n_loop\n\t"
    ".Lparse_n_loop_fin:\n\t"
        "movl    %%ebx, 16(%%esp)\n\t" // store n
#ifdef EFB_CHECK
        "movl    %%ebx, c_n\n\t" // store c_n
#endif
        BENCH_MARK_HERE
        "addl    $1, %%edi\n\t"
    ".Lparse_fractions:\n\t"
        "xorl    %%ebp, %%ebp\n\t"
    ".Lparse_fractions_loop:\n\t"
        // req eax:-, ebx:-, ecx:-, edx:-, esi:-, edi:p, ebp:i
        "movl    %%edi, fp(,%%ebp,4)\n\t" // store fp[i]
    ".Lparse_a:\n\t"
        // use eax:a, ebx:s, ecx:c, edx:t, esi:t, edi:p, ebp:i
        "movzbl  (%%edi), %%eax\n\t"
        "movzbl  1(%%edi), %%edx\n\t"
        "addl    $1, %%edi\n\t"
        "xorl    %%ebx, %%ebx\n\t"
        "test    $16, %%eax\n\t"
        "setz    %%bl\n\t"
        "cmovz   %%edx, %%eax\n\t"
        "addl    %%ebx, %%edi\n\t"
        "subl    $48, %%eax\n\t"
        "movzbl  (%%edi), %%ecx\n\t"
        "subl    $48, %%ecx\n\t"
        "jb      .Lparse_b\n\t"
        "addl    $1, %%edi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%edi), %%esi\n\t"
        "leal    (%%ecx,%%edx,2), %%eax\n\t"
        "subl    $48, %%esi\n\t"
        "jb      .Lparse_b\n\t"
        "addl    $1, %%edi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%edi), %%ecx\n\t"
        "leal    (%%esi,%%edx,2), %%eax\n\t"
        "subl    $48, %%ecx\n\t"
        "jb      .Lparse_b\n\t"
        "addl    $1, %%edi\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "movzbl  (%%edi), %%esi\n\t"
        "leal    (%%ecx,%%edx,2), %%eax\n\t"
        "subl    $48, %%esi\n\t"
        "jb      .Lparse_b\n\t"
        "leal    (%%eax,%%eax,4), %%edx\n\t"
        "addl    $1, %%edi\n\t"
        "leal    (%%esi,%%edx,2), %%eax\n\t"
    ".Lparse_b:\n\t"
        // use eax:c, ebx:t, ecx:a, edx:t, esi:t, edi:p, ebp:i
        "addl    $1, %%edi\n\t"
        "movzbl  (%%edi), %%ecx\n\t"
        "movzbl  1(%%edi), %%esi\n\t"
        "addl    $1, %%edi\n\t"
        "xorl    %%edx, %%edx\n\t"
        "test    $16, %%ecx\n\t"
        "setz    %%dl\n\t"
        "cmovz   %%esi, %%ecx\n\t"
        "addl    %%edx, %%ebx\n\t"
        "addl    %%edx, %%edi\n\t"
        "movl    %%eax, %%esi\n\t"
        "negl    %%eax\n\t"
        "test    $1, %%ebx\n\t"
        "cmovnz  %%eax, %%esi\n\t"
        "movzbl  (%%edi), %%eax\n\t"
        "subl    $48, %%ecx\n\t"
        "cvtsi2sd    %%esi, %%xmm0\n\t"
        "subl    $48, %%eax\n\t"
        "jb      .Lstore_fraction_data\n\t"
        "addl    $1, %%edi\n\t"
        "leal    (%%ecx,%%ecx,4), %%edx\n\t"
        "movzbl  (%%edi), %%esi\n\t"
        "leal    (%%eax,%%edx,2), %%ecx\n\t"
        "subl    $48, %%esi\n\t"
        "jb      .Lstore_fraction_data\n\t"
        "addl    $1, %%edi\n\t"
        "leal    (%%ecx,%%ecx,4), %%edx\n\t"
        "movzbl  (%%edi), %%eax\n\t"
        "leal    (%%esi,%%edx,2), %%ecx\n\t"
        "subl    $48, %%eax\n\t"
        "jb      .Lstore_fraction_data\n\t"
        "addl    $1, %%edi\n\t"
        "leal    (%%ecx,%%ecx,4), %%edx\n\t"
        "movzbl  (%%edi), %%esi\n\t"
        "leal    (%%eax,%%edx,2), %%ecx\n\t"
        "subl    $48, %%esi\n\t"
        "jb      .Lstore_fraction_data\n\t"
        "leal    (%%ecx,%%ecx,4), %%edx\n\t"
        "addl    $1, %%edi\n\t"
        "leal    (%%esi,%%edx,2), %%ecx\n\t"
    ".Lstore_fraction_data:\n\t"
        "cvtsi2sd    %%ecx, %%xmm1\n\t"
        "addl    $1, %%ebp\n\t"
        "divsd    %%xmm1, %%xmm0\n\t"
        "addl    $1, %%edi\n\t"
        "movsd    %%xmm0, fv-8(,%%ebp,8)\n\t"
        "cmpl    12(%%esp), %%ebp\n\t" // N
        "jne     .Lparse_fractions_loop\n\t"
        MEM_DUMP_HERE
        BENCH_MARK_HERE
    ".Lselect:\n\t"
        "movl    12(%%esp), %%edi\n\t"
        "movl    %%edi, %%ebp\n\t"
        "subl    16(%%esp), %%ebp\n\t"
        "xorl    %%esi, %%esi\n\t"
        "subl    $1, %%edi\n\t"
        "movsd   HALF, %%xmm3\n\t"
    ".Lselect_loop:\n\t"
        IF_STAT("addl $1, s_steps\n\t")
        "movsd   fv(,%%esi,8), %%xmm0\n\t"
        "leal    -1(%%esi), %%ebx\n\t"
        "addsd   fv(,%%edi,8), %%xmm0\n\t"
        "leal    1(%%edi), %%edx\n\t"
        "mulsd   %%xmm3, %%xmm0\n\t"
    ".Lpartition_loop:\n\t"
    ".Lleft_loop:\n\t"
        IF_STAT("addl $1, s_lcmps\n\t")
        "movsd   fv+8(,%%ebx,8), %%xmm1\n\t"
        "addl    $1, %%ebx\n\t"
        "ucomisd %%xmm1, %%xmm0\n\t"
        "ja      .Lleft_loop\n\t"
    ".Lright_loop:\n\t"
        IF_STAT("addl $1, s_rcmps\n\t")
        "movsd   fv-8(,%%edx,8), %%xmm2\n\t"
        "subl    $1, %%edx\n\t"
        "ucomisd %%xmm2, %%xmm0\n\t"
        "jb      .Lright_loop\n\t"
        "cmpl    %%ebx, %%edx\n\t"
        "jle     .Ldo_drop\n\t"
    ".Lswap:\n\t"
        IF_STAT("addl $1, s_swaps\n\t")
        "movl    fp(,%%ebx,4), %%eax\n\t"
        "movl    fp(,%%edx,4), %%ecx\n\t"
        "movsd   %%xmm1, fv(,%%edx,8)\n\t"
        "movsd   %%xmm2, fv(,%%ebx,8)\n\t"
        "movl    %%eax, fp(,%%edx,4)\n\t"
        "movl    %%ecx, fp(,%%ebx,4)\n\t"
        "jmp     .Lpartition_loop\n\t"
    ".Ldo_drop:\n\t"
        "subl    $1, %%ebx\n\t"
        "addl    $1, %%edx\n\t"
        "cmpl    %%ebp, %%ebx\n\t"
        "cmovge  %%ebx, %%edi\n\t"
        "cmovl   %%edx, %%esi\n\t"
    ".Lselect_loop_next:\n\t"
        "cmpl    %%esi, %%edi\n\t"
        "jg      .Lselect_loop\n\t"
        MEM_DUMP_HERE
        BENCH_MARK_HERE
    ".Lwrite:\n\t"
        "movl    12(%%esp), %%eax\n\t"
        "subl    16(%%esp), %%eax\n\t"
        "xorl    %%ebx, %%ebx\n\t"
        "movl    fp(,%%eax,4), %%edi\n\t"
    ".Lstrlen_loop:\n\t"
        "movzbl  (%%edi,%%ebx), %%ecx\n\t"
        "addl    $1, %%ebx\n\t"
        "cmpl    $10, %%ecx\n\t"
        "jne     .Lstrlen_loop\n\t"
    ".Lwrite_loop:\n\t"
        "movl    $1, %%eax\n\t"
        "movl    %%eax, (%%esp)\n\t" // set fd
        "movl    %%edi, 4(%%esp)\n\t" // set buf
        "movl    %%ebx, 8(%%esp)\n\t" // set nbyte
        IF_STAT("addl $1, s_writes\n\t")
        "call    write\n\t"
        "cmpl    $-1, %%eax\n\t"
        "je      .Lwrite_loop\n\t"
        IF_STAT("addl %%eax, s_nwbyte\n\t")
        "addl    %%eax, %%edi\n\t"
        "subl    %%eax, %%ebx\n\t"
        "ja      .Lwrite_loop\n\t"
    ".Lclean_stack:\n\t"
        "movl    60(%%esp), %%esp\n\t"
        "popl    %%ebp\n\t"
    : : : "eax", "ebx", "ecx", "edx", "esi", "edi");
#ifdef EFB_STAT
    stat_output();
#endif
#ifdef EFB_BENCH
    bench_output();
#endif
    return 0;
}

まとめ

アセンブラとしては、共通して(1)できるだけレジスタのみを使用(ローカル変数をメモリに置かない),(2)ループの展開,(3)SIMD命令の使用などの最適化を行なっています。
しかしまあ、可読性は最悪ですね。