EPOCH@まつやま 2010 予選問題
id:w125さんの記事に触発され、こちらも予選問題のソースを晒すことにしました。
ちなみに、rankalee(id:simezi_tan)さんと一緒にチーム・えたふぉで参加予定です。
方針
まず、予選は次の4つの基準で競われます。
(1)正確さ
(2)実行速度
(3)メモリ使用量
(4)提出の早さ
このルールを、「正解してさえいれば、速ければ速いほどよい」というふうに読み替え、予選提出コードには次のようなコーディング規約を設けました。
(3)と(4)は無視です。
1. オール・インラインアセンブラなぜか本番のコンパイル時に最適化オプションをつけないようになっていたので、全てインラインアセンブラにすることで何倍か速くなります。(-O2付きだと、Cで書いたものもアセンブラで書いたものも数%しか変わりませんが、-O2なしだと信じられないぐらい変わります。)
2. ライブラリを使わない。(入出力はread()とwrite()で)入力ファイルが巨大なので、そもそもの問題を解く部分よりも、scanf()による文字列パース処理の方が圧倒的に重くなります。
入力ファイルの書式は限られているため、手でパースすることによりscanf()の処理の無駄を防ぐことができます。
また、STLなどを使わないことにより、内部的なnew/deleteの呼び出しを避けることができます。
では、各問に移ります。
ちなみに、Linux環境でないと以下のコードはコンパイルできません。あしからず。
#ifdef EFB_CHECK はデバッグ、#ifdef EFB_STAT はアルゴリズムの分析、#ifdef EFB_BENCH はベンチマークです。
1. 数列の構成は同じ?
素直な実装だと、set
しかしこのままだと、内部的なノードの確保や比較演算でかなりのロスがでるため、次のような最適化を行ないます。
(1)set
(2)set<999ビットの配列> をハッシュマップで実装する
(3)全てのノードをグローバル変数として予め確保しておく
それに加え、入力のパースをread()に置き換えたのが次のソースです。
#include <stdio.h> #define N (100000) #define HASHSIZE (131072) #define BUFSIZE (65536) char buf[BUFSIZE] __attribute__((aligned(32))); int head[HASHSIZE]; int bits[N][32] __attribute__((aligned(128))); unsigned mask[4] __attribute__((aligned(16))) = { 0xffffffffu, 0xffffffffu, 0xffffffffu, 127, }; #ifdef EFB_CHECK int c_n; void input_dump() { int i, j; fprintf(stderr, "input\n"); for(i=0; i<c_n; i++) { for(j=0; j<31; j++) { fprintf(stderr, "%x,", bits[i][j]); } fprintf(stderr, "%x,\n", bits[i][31]&127); } } void link_dump() { int i; fprintf(stderr, "head\n"); for(i=0; i<HASHSIZE; i++) { int p=head[i]; if(p) fprintf(stderr, "%d\n", (p-((int)bits))/128); else fprintf(stderr, "-1\n"); } fprintf(stderr, "next\n"); for(i=0; i<c_n; i++) { unsigned p=bits[i][31]&(-128); if(p) fprintf(stderr, "%d\n", (p-((int)bits))/128); else fprintf(stderr, "-1\n"); } } #endif // statistics #ifdef EFB_STAT #define IF_STAT(a) a int s_reads, s_nrbyte; int s_writes, s_nwbyte; void stat_output() { fprintf(stderr, "stats\n"); fprintf(stderr, " read : %d bytes read by %d read().\n", s_nrbyte, s_reads); fprintf(stderr, " write : %d bytes written by %d write().\n", s_nwbyte, s_writes); } #else #define IF_STAT(a) "" #endif // benchmark #ifdef EFB_BENCH #define BENCH_MARK_HERE "call bench_mark\n\t" #include <sys/time.h> int markc, markv[10]; void bench_mark() { static struct timeval tv; gettimeofday(&tv, NULL); markv[markc++] = tv.tv_usec; } void bench_output() { int i; fprintf(stderr, "bentch: "); for(i=0; i<markc-1; i++) { if(i>0) fprintf(stderr, ", "); fprintf(stderr, "%d", markv[i+1]-markv[i]); } fprintf(stderr, "\n"); } #else #define BENCH_MARK_HERE "" #endif int main() { __asm__( "pushl %%ebp\n\t" "movl %%esp, %%ebp\n\t" "subl $80, %%esp\n\t" "andl $-16, %%esp\n\t" "movl %%ebp, 60(%%esp)\n\t" BENCH_MARK_HERE ".Lread_first:\n\t" "movl $buf, %%esi\n\t" "movl $65536, %%ebx\n\t" ".Lread_first_loop:\n\t" // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:-, ebp:- // use eax:x, ebx:nbyte, ecx:x, edx:x, esi:p, edi:-, ebp:- "xorl %%eax, %%eax\n\t" "movl %%eax, (%%esp)\n\t" // set fd "movl %%esi, 4(%%esp)\n\t" // set buf "movl %%ebx, 8(%%esp)\n\t" // set nbyte IF_STAT("addl $1, s_reads\n\t") "call read\n\t" "cmpl $-1, %%eax\n\t" "je .Lread_first_loop\n\t" // retry on error "test %%eax, %%eax\n\t" "jz .Lread_first_end\n\t" IF_STAT("addl %%eax, s_nrbyte\n\t") "addl %%eax, %%esi\n\t" "subl %%eax, %%ebx\n\t" "ja .Lread_first_loop\n\t" ".Lread_first_end:\n\t" ".Latoi:\n\t" "movl $buf, %%esi\n\t" "movzbl buf, %%ecx\n\t" "xorl %%ebp, %%ebp\n\t" "subl $48, %%ecx\n\t" ".Latoi_loop:\n\t" // req eax:-, ebx:-, ecx:d, edx:-, esi:p, edi:-, ebp:a // use eax:-, ebx:-, ecx:d, edx:t, esi:p, edi:-, ebp:a "leal (%%ebp,%%ebp,4), %%edx\n\t" "addl $1, %%esi\n\t" "leal (%%ecx,%%edx,2), %%ebp\n\t" "movzbl (%%esi), %%ecx\n\t" "subl $48, %%ecx\n\t" "jae .Latoi_loop\n\t" ".Latoi_end:\n\t" "addl $1, %%esi\n\t" "movl %%ebp, 24(%%esp)\n\t" // store n #ifdef EFB_CHECK "movl %%ebp, c_n\n\t" // store c_n #endif ".Lparse_arrays:\n\t" "movl $bits-128, %%edi\n\t" ".Lparse_arrays_loop:\n\t" // req eax:d, ebx:-, ecx:c, edx:-, esi:p, edi:q, ebp:n // use eax:a, ebx:c, ecx:c, edx:t, esi:p, edi:q, ebp:i ".Lparse_arrays_loop_increment:\n\t" "movzbl (%%esi), %%eax\n\t" "movzbl 1(%%esi), %%ecx\n\t" "addl $128, %%edi\n\t" "subl $48, %%eax\n\t" "subl $1, %%ebp\n\t" "jb .Lcreate_hashset\n\t" ".Lparse:\n\t" "addl $1, %%esi\n\t" "subl $48, %%ecx\n\t" "jb .Lstore_bit\n\t" "addl $1, %%esi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%esi), %%ebx\n\t" "leal (%%ecx,%%edx,2), %%eax\n\t" "subl $48, %%ebx\n\t" "jb .Lstore_bit\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "addl $1, %%esi\n\t" "leal (%%ebx,%%edx,2), %%eax\n\t" ".Lstore_bit:\n\t" "subl $1, %%eax\n\t" "addl $1, %%esi\n\t" "movl %%eax, %%ecx\n\t" "movl $1, %%edx\n\t" "shrl $3, %%eax\n\t" "andl $7, %%ecx\n\t" "shll %%cl, %%edx\n\t" "orb %%dl, (%%edi,%%eax)\n\t" "movzbl (%%esi), %%eax\n\t" "movzbl 1(%%esi), %%ecx\n\t" "subl $48, %%eax\n\t" "jae .Lparse\n\t" ".Ltest_buf:\n\t" "addl $3, %%esi\n\t" "cmpl $buf+65536-1024, %%esi\n\t" // BUFSIZE-1024 "jb .Lparse_arrays_loop\n\t" ".Lread:\n\t" // req eax:-, ebx:-, ecx:-, edx:-, esi:p, edi:q, ebp:i // use eax:-, ebx:c, ecx:c, edx:t, esi:p, edi:q, ebp:i "movl %%esi, %%ebx\n\t" "subl $65536-1024, %%esi\n\t" // BUFSIZE-1024 "andl $-32, %%ebx\n\t" "movl %%esi, 12(%%esp)\n\t" // store p "leal 1024-65536(%%ebx), %%esi\n\t" // 1024-BUFSIZE ".Lmemcpy_loop:\n\t" "movdqa (%%ebx), %%xmm0\n\t" "movdqa 16(%%ebx), %%xmm1\n\t" "addl $32, %%ebx\n\t" "movdqa %%xmm0, (%%esi)\n\t" "movdqa %%xmm1, 16(%%esi)\n\t" "addl $32, %%esi\n\t" "cmpl $buf+65536, %%ebx\n\t" // BUFSIZE "jne .Lmemcpy_loop\n\t" "movl $buf+1024, %%esi\n\t" "movl $65536-1024, %%ebx\n\t" // BUFSIZE-1024 ".Lread_loop:\n\t" // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:-, ebp:- // use eax:x, ebx:nbyte, ecx:x, edx:x, esi:p, edi:t, ebp:- "xorl %%eax, %%eax\n\t" "movl %%eax, (%%esp)\n\t" // set fd "movl %%esi, 4(%%esp)\n\t" // set buf "movl %%ebx, 8(%%esp)\n\t" // set nbyte IF_STAT("addl $1, s_reads\n\t") "call read\n\t" "cmpl $-1, %%eax\n\t" "je .Lread_loop\n\t" // retry on error "test %%eax, %%eax\n\t" "jz .Lread_end\n\t" IF_STAT("addl %%eax, s_nrbyte\n\t") "addl %%eax, %%esi\n\t" "subl %%eax, %%ebx\n\t" "ja .Lread_loop\n\t" ".Lread_end:\n\t" "movl 12(%%esp), %%esi\n\t" // load p "jmp .Lparse_arrays_loop\n\t" ".Lcreate_hashset:\n\t" BENCH_MARK_HERE "movdqa mask, %%xmm3\n\t" "movl $bits, %%esi\n\t" "movl 24(%%esp), %%ebp\n\t" "xorl %%ebx, %%ebx\n\t" ".Lcreate_hashset_loop:\n\t" // req eax:-, ebx:hashsize, ecx:-, edx:-, esi:q, edi:-, ebp:i ".Lcalc_hash:\n\t" "movdqa (%%esi), %%xmm0\n\t" "movdqa 16(%%esi), %%xmm1\n\t" "paddd 32(%%esi), %%xmm0\n\t" "paddd 48(%%esi), %%xmm1\n\t" "paddd 64(%%esi), %%xmm0\n\t" "paddd 80(%%esi), %%xmm1\n\t" "paddd 96(%%esi), %%xmm0\n\t" "paddd 112(%%esi), %%xmm1\n\t" "paddd %%xmm1, %%xmm0\n\t" "pshufd $0x4e, %%xmm0, %%xmm1\n\t" "paddd %%xmm1, %%xmm0\n\t" "pshufd $0xb1, %%xmm0, %%xmm1\n\t" "paddd %%xmm1, %%xmm0\n\t" "movd %%xmm0, %%eax\n\t" "andl $131071, %%eax\n\t" ".Lhash_list_iterate:\n\t" "leal head(,%%eax,4), %%edx\n\t" "movl (%%edx), %%edi\n\t" "test %%edi, %%edi\n\t" "jz .Lhash_insert\n\t" ".Lhash_test_loop:\n\t" // req eax:-, ebx:hashsize, ecx:-, edx:pp, esi:q, edi:p, ebp:i "movdqa (%%edi), %%xmm0\n\t" "movdqa 16(%%edi), %%xmm1\n\t" "pcmpeqd (%%esi), %%xmm0\n\t" "pcmpeqd 16(%%esi), %%xmm1\n\t" "pmovmskb %%xmm0, %%eax\n\t" "pmovmskb %%xmm1, %%ecx\n\t" "cmpl $65535, %%eax\n\t" "jne .Lhash_test_next\n\t" "cmpl $65535, %%ecx\n\t" "jne .Lhash_test_next\n\t" "movdqa 32(%%edi), %%xmm0\n\t" "movdqa 48(%%edi), %%xmm1\n\t" "pcmpeqd 32(%%esi), %%xmm0\n\t" "pcmpeqd 48(%%esi), %%xmm1\n\t" "pmovmskb %%xmm0, %%eax\n\t" "pmovmskb %%xmm1, %%ecx\n\t" "cmpl $65535, %%eax\n\t" "jne .Lhash_test_next\n\t" "cmpl $65535, %%ecx\n\t" "jne .Lhash_test_next\n\t" "movdqa 64(%%edi), %%xmm0\n\t" "movdqa 80(%%edi), %%xmm1\n\t" "pcmpeqd 64(%%esi), %%xmm0\n\t" "pcmpeqd 80(%%esi), %%xmm1\n\t" "pmovmskb %%xmm0, %%eax\n\t" "pmovmskb %%xmm1, %%ecx\n\t" "cmpl $65535, %%eax\n\t" "jne .Lhash_test_next\n\t" "cmpl $65535, %%ecx\n\t" "jne .Lhash_test_next\n\t" "movdqa 96(%%edi), %%xmm0\n\t" "movdqa 112(%%edi), %%xmm1\n\t" "pand %%xmm3, %%xmm1\n\t" "pcmpeqd 96(%%esi), %%xmm0\n\t" "pcmpeqd 112(%%esi), %%xmm1\n\t" "pmovmskb %%xmm0, %%eax\n\t" "pmovmskb %%xmm1, %%ecx\n\t" "cmpl $65535, %%eax\n\t" "jne .Lhash_test_next\n\t" "cmpl $65535, %%ecx\n\t" "je .Lcreate_hashset_loop_increment\n\t" ".Lhash_test_next:\n\t" "leal 124(%%edi), %%edx\n\t" "movl 124(%%edi), %%edi\n\t" "xorl %%ecx, %%ecx\n\t" "andl $-128, %%edi\n\t" "jnz .Lhash_test_loop\n\t" ".Lhash_insert:\n\t" "orl %%esi, (%%edx)\n\t" "addl $1, %%ebx\n\t" ".Lcreate_hashset_loop_increment:\n\t" "addl $128, %%esi\n\t" "subl $1, %%ebp\n\t" "ja .Lcreate_hashset_loop\n\t" BENCH_MARK_HERE "movl %%ebx, %%ecx\n\t" ".Lprint:\n\t" // req eax:-, ebx:-, ecx:hashsize, edx:-, esi:-, edi:-, ebp:- "xorl %%ebx, %%ebx\n\t" "movl $10, buf+16\n\t" "movl $buf+16, %%edi\n\t" "movl $-858993459, %%esi\n\t" "addl $1, %%ebx\n\t" ".Ltostring_loop:\n\t" // req eax:-, ebx:l, ecx:a, edx:-, esi:3435973837, edi:p, ebp:- // use eax:t, ebx:l, ecx:a, edx:a/10, esi:3435973837, edi:p, ebp:- "movl %%ecx, %%eax\n\t" "mull %%esi\n\t" "shrl $3, %%edx\n\t" "leal (%%edx,%%edx,4), %%eax\n\t" "addl %%eax, %%eax\n\t" "subl $1, %%edi\n\t" "subl %%eax, %%ecx\n\t" "addl $1, %%ebx\n\t" "addl $48, %%ecx\n\t" "movb %%cl, (%%edi)\n\t" "movl %%edx, %%ecx\n\t" "test %%ecx, %%ecx\n\t" "jnz .Ltostring_loop\n\t" ".Lwrite_loop:\n\t" // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:-, edi:p, ebp:- "movl $1, %%eax\n\t" "movl %%eax, (%%esp)\n\t" "movl %%edi, 4(%%esp)\n\t" "movl %%ebx, 8(%%esp)\n\t" IF_STAT("addl $1, s_writes\n\t") "call write\n\t" "cmpl $-1, %%eax\n\t" "je .Lwrite_loop\n\t" // retry on error IF_STAT("addl %%eax, s_nwbyte\n\t") "addl %%eax, %%edi\n\t" "subl %%eax, %%ebx\n\t" "ja .Lwrite_loop\n\t" BENCH_MARK_HERE ".Lclear_stack:\n\t" "movl 60(%%esp), %%esp\n\t" "popl %%ebp\n\t" : : : "eax", "ebx", "ecx", "edx", "esi", "edi", "cc"); #ifdef EFB_CHECK input_dump(); link_dump(); #endif #ifdef EFB_STAT stat_output(); #endif #ifdef EFB_BENCH bench_output(); #endif return 0; }
2. 素数暗号
アリストテレスの篩を使うのが一般的だと思います。
全素数の埋め込みをした方もいるようですが、それは諦めました。
2の倍数は無視、他の数を篩うのに使う可能性のある素数の埋め込み、メモリアクセスの局所化などの最適化を行ないました。
#include <stdio.h> #include <stdlib.h> #include <sys/time.h> #define N (5000000) #define PN (5001216) #define SPAN (12288) // assert(PN%SPAN==0) #define P (445) #define M (890) #define RBUFSIZE (65536) #define WBUFSIZE (65536) char char_of[PN] __attribute__((aligned(4096))); char rbuf[RBUFSIZE] __attribute__((aligned(16))); char wbuf[WBUFSIZE]; int magi[M+2] = {3, 4, 5, 12, 7, 24, 11, 60, 13, 84, 17, 144, 19, 180, 23, 264, 29, 420, 31, 480, 37, 684, 41, 840, 43, 924, 47, 1104, 53, 1404, 59, 1740, 61, 1860, 67, 2244, 71, 2520, 73, 2664, 79, 3120, 83, 3444, 89, 3960, 97, 4704, 101, 5100, 103, 5304, 107, 5724, 109, 5940, 113, 6384, 127, 8064, 131, 8580, 137, 9384, 139, 9660, 149, 11100, 151, 11400, 157, 12324, 163, 13284, 167, 13944, 173, 14964, 179, 16020, 181, 16380, 191, 18240, 193, 18624, 197, 19404, 199, 19800, 211, 22260, 223, 24864, 227, 25764, 229, 26220, 233, 27144, 239, 28560, 241, 29040, 251, 31500, 257, 33024, 263, 34584, 269, 36180, 271, 36720, 277, 38364, 281, 39480, 283, 40044, 293, 42924, 307, 47124, 311, 48360, 313, 48984, 317, 50244, 331, 54780, 337, 56784, 347, 60204, 349, 60900, 353, 62304, 359, 64440, 367, 67344, 373, 69564, 379, 71820, 383, 73344, 389, 75660, 397, 78804, 401, 80400, 409, 83640, 419, 87780, 421, 88620, 431, 92880, 433, 93744, 439, 96360, 443, 98124, 449, 100800, 457, 104424, 461, 106260, 463, 107184, 467, 109044, 479, 114720, 487, 118584, 491, 120540, 499, 124500, 503, 126504, 509, 129540, 521, 135720, 523, 136764, 541, 146340, 547, 149604, 557, 155124, 563, 158484, 569, 161880, 571, 163020, 577, 166464, 587, 172284, 593, 175824, 599, 179400, 601, 180600, 607, 184224, 613, 187884, 617, 190344, 619, 191580, 631, 199080, 641, 205440, 643, 206724, 647, 209304, 653, 213204, 659, 217140, 661, 218460, 673, 226464, 677, 229164, 683, 233244, 691, 238740, 701, 245700, 709, 251340, 719, 258480, 727, 264264, 733, 268644, 739, 273060, 743, 276024, 751, 282000, 757, 286524, 761, 289560, 769, 295680, 773, 298764, 787, 309684, 797, 317604, 809, 327240, 811, 328860, 821, 337020, 823, 338664, 827, 341964, 829, 343620, 839, 351960, 853, 363804, 857, 367224, 859, 368940, 863, 372384, 877, 384564, 881, 388080, 883, 389844, 887, 393384, 907, 411324, 911, 414960, 919, 422280, 929, 431520, 937, 438984, 941, 442740, 947, 448404, 953, 454104, 967, 467544, 971, 471420, 977, 477264, 983, 483144, 991, 491040, 997, 497004, 1009, 509040, 1013, 513084, 1019, 519180, 1021, 521220, 1031, 531480, 1033, 533544, 1039, 539760, 1049, 550200, 1051, 552300, 1061, 562860, 1063, 564984, 1069, 571380, 1087, 590784, 1091, 595140, 1093, 597324, 1097, 601704, 1103, 608304, 1109, 614940, 1117, 623844, 1123, 630564, 1129, 637320, 1151, 662400, 1153, 664704, 1163, 676284, 1171, 685620, 1181, 697380, 1187, 704484, 1193, 711624, 1201, 721200, 1213, 735684, 1217, 740544, 1223, 747864, 1229, 755220, 1231, 757680, 1237, 765084, 1249, 780000, 1259, 792540, 1277, 815364, 1279, 817920, 1283, 823044, 1289, 830760, 1291, 833340, 1297, 841104, 1301, 846300, 1303, 848904, 1307, 854124, 1319, 869880, 1321, 872520, 1327, 880464, 1361, 926160, 1367, 934344, 1373, 942564, 1381, 953580, 1399, 978600, 1409, 992640, 1423, 1012464, 1427, 1018164, 1429, 1021020, 1433, 1026744, 1439, 1035360, 1447, 1046904, 1451, 1052700, 1453, 1055604, 1459, 1064340, 1471, 1081920, 1481, 1096680, 1483, 1099644, 1487, 1105584, 1489, 1108560, 1493, 1114524, 1499, 1123500, 1511, 1141560, 1523, 1159764, 1531, 1171980, 1543, 1190424, 1549, 1199700, 1553, 1205904, 1559, 1215240, 1567, 1227744, 1571, 1234020, 1579, 1246620, 1583, 1252944, 1597, 1275204, 1601, 1281600, 1607, 1291224, 1609, 1294440, 1613, 1300884, 1619, 1310580, 1621, 1313820, 1627, 1323564, 1637, 1339884, 1657, 1372824, 1663, 1382784, 1667, 1389444, 1669, 1392780, 1693, 1433124, 1697, 1439904, 1699, 1443300, 1709, 1460340, 1721, 1480920, 1723, 1484364, 1733, 1501644, 1741, 1515540, 1747, 1526004, 1753, 1536504, 1759, 1547040, 1777, 1578864, 1783, 1589544, 1787, 1596684, 1789, 1600260, 1801, 1621800, 1811, 1639860, 1823, 1661664, 1831, 1676280, 1847, 1705704, 1861, 1731660, 1867, 1742844, 1871, 1750320, 1873, 1754064, 1877, 1761564, 1879, 1765320, 1889, 1784160, 1901, 1806900, 1907, 1818324, 1913, 1829784, 1931, 1864380, 1933, 1868244, 1949, 1899300, 1951, 1903200, 1973, 1946364, 1979, 1958220, 1987, 1974084, 1993, 1986024, 1997, 1994004, 1999, 1998000, 2003, 2006004, 2011, 2022060, 2017, 2034144, 2027, 2054364, 2029, 2058420, 2039, 2078760, 2053, 2107404, 2063, 2127984, 2069, 2140380, 2081, 2165280, 2083, 2169444, 2087, 2177784, 2089, 2181960, 2099, 2202900, 2111, 2228160, 2113, 2232384, 2129, 2266320, 2131, 2270580, 2137, 2283384, 2141, 2291940, 2143, 2296224, 2153, 2317704, 2161, 2334960, 2179, 2374020, 2203, 2426604, 2207, 2435424, 2213, 2448684, 2221, 2466420, 2237, 2502084, 2239, 2506560, 2243, 2515524, 2251, 2533500, 2267, 2569644, 2269, 2574180, 2273, 2583264, 2281, 2601480, 2287, 2615184, 2293, 2628924, 2297, 2638104, 2309, 2665740, 2311, 2670360, 2333, 2721444, 2339, 2735460, 2341, 2740140, 2347, 2754204, 2351, 2763600, 2357, 2777724, 2371, 2810820, 2377, 2825064, 2381, 2834580, 2383, 2839344, 2389, 2853660, 2393, 2863224, 2399, 2877600, 2411, 2906460, 2417, 2920944, 2423, 2935464, 2437, 2969484, 2441, 2979240, 2447, 2993904, 2459, 3023340, 2467, 3043044, 2473, 3057864, 2477, 3067764, 2503, 3132504, 2521, 3177720, 2531, 3202980, 2539, 3223260, 2543, 3233424, 2549, 3248700, 2551, 3253800, 2557, 3269124, 2579, 3325620, 2591, 3356640, 2593, 3361824, 2609, 3403440, 2617, 3424344, 2621, 3434820, 2633, 3466344, 2647, 3503304, 2657, 3529824, 2659, 3535140, 2663, 3545784, 2671, 3567120, 2677, 3583164, 2683, 3599244, 2687, 3609984, 2689, 3615360, 2693, 3626124, 2699, 3642300, 2707, 3663924, 2711, 3674760, 2713, 3680184, 2719, 3696480, 2729, 3723720, 2731, 3729180, 2741, 3756540, 2749, 3778500, 2753, 3789504, 2767, 3828144, 2777, 3855864, 2789, 3889260, 2791, 3894840, 2797, 3911604, 2801, 3922800, 2803, 3928404, 2819, 3973380, 2833, 4012944, 2837, 4024284, 2843, 4041324, 2851, 4064100, 2857, 4081224, 2861, 4092660, 2879, 4144320, 2887, 4167384, 2897, 4196304, 2903, 4213704, 2909, 4231140, 2917, 4254444, 2927, 4283664, 2939, 4318860, 2953, 4360104, 2957, 4371924, 2963, 4389684, 2969, 4407480, 2971, 4413420, 2999, 4497000, 3001, 4503000, 3011, 4533060, 3019, 4557180, 3023, 4569264, 3037, 4611684, 3041, 4623840, 3049, 4648200, 3061, 4684860, 3067, 4703244, 3079, 4740120, 3083, 4752444, 3089, 4770960, 3109, 4832940, 3119, 4864080, 3121, 4870320, 3137, 4920384, 0, 0x7fffffff}; // check #ifdef EFB_CHECK int c_num; void table_dump() { int i; for(i=0; i<N; i++) { fputc(char_of[i], stderr); } fputc('\n', stderr); } void num_dump() { fprintf(stderr, "%d\n", c_num); } #endif // statistics #ifdef EFB_STAT #define IF_STAT(a) a int s_reads, s_nrbyte; int s_writes, s_nwbyte; void stat_output() { fprintf(stderr, "stats\n"); fprintf(stderr, " read : %d bytes read by %d read().\n", s_nrbyte, s_reads); fprintf(stderr, " write : %d bytes written by %d write().\n", s_nwbyte, s_writes); } #else #define IF_STAT(a) "" #endif // benchmark #ifdef EFB_BENCH #define BENCH_MARK_HERE "call bench_mark\n\t" int markc, markv[10]; void bench_mark() { static struct timeval tv; gettimeofday(&tv, NULL); markv[markc++] = tv.tv_usec; } void bench_output() { int i; fprintf(stderr, "bentch: "); for(i=0; i<markc-1; i++) { if(i>0) fprintf(stderr, ", "); fprintf(stderr, "%d", markv[i+1]-markv[i]); } fprintf(stderr, "\n"); } #else #define BENCH_MARK_HERE "" #endif int main() { __asm__( ".Lstack_init:\n\t" "pushl %%ebp\n\t" "movl %%esp, %%ebp\n\t" "subl $80, %%esp\n\t" "andl $-16, %%esp\n\t" "movl %%ebp, 60(%%esp)\n\t" // store ebp BENCH_MARK_HERE "movl $12288, %%edx\n\t" // SPAN "movl $95, %%ebx\n\t" "movl magi, %%ecx\n\t" "movl magi+4, %%eax\n\t" "movl $magi+8, %%edi\n\t" ".Lsieve_loop:\n\t" // req eax:b, ebx:95, ecx:d, edx:k, esi:-, edi:p, ebp:- "movb %%bl, char_of(%%eax)\n\t" "addl %%ecx, %%eax\n\t" "cmpl %%eax, %%edx\n\t" "ja .Lsieve_loop\n\t" "movl %%eax, -4(%%edi)\n\t" "movl (%%edi), %%ecx\n\t" "movl 4(%%edi), %%eax\n\t" "addl $8, %%edi\n\t" "cmpl %%eax, %%edx\n\t" "ja .Lsieve_loop\n\t" ".Lsieve_loop_increment:\n\t" "movl magi, %%ecx\n\t" "movl magi+4, %%eax\n\t" "movl $magi+8, %%edi\n\t" "addl $12288, %%edx\n\t" // SPAN "cmpl $5001216+12288, %%edx\n\t" // PN+SPAN "jne .Lsieve_loop\n\t" BENCH_MARK_HERE "movb $95, char_of\n\t" "movl $char_of+1, %%ebx\n\t" "movl $98, %%ecx\n\t" ".Lnumbering_loop_lower:" // req eax:-, ebx:p, ecx:c, edx:-, esi:-, edi:-, ebp:- // use eax:t, ebx:p, ecx:c, edx:-, esi:-, edi:-, ebp:- "movzbl (%%ebx), %%eax\n\t" "addl $1, %%ebx\n\t" "test %%eax, %%eax\n\t" "jnz .Lnumbering_loop_lower\n\t" "movb %%cl, -1(%%ebx)\n\t" "addl $1, %%ecx\n\t" "cmpl $123, %%ecx\n\t" "jne .Lnumbering_loop_lower\n\t" "movl $65, %%ecx\n\t" ".Lnumbering_loop_upper:\n\t" "movzbl (%%ebx), %%eax\n\t" "addl $1, %%ebx\n\t" "test %%eax, %%eax\n\t" "jnz .Lnumbering_loop_upper\n\t" "movb %%cl, -1(%%ebx)\n\t" "addl $1, %%ecx\n\t" "cmpl $91, %%ecx\n\t" "jne .Lnumbering_loop_upper\n\t" "movl $97, %%ecx\n\t" "cmpl $char_of+5000000, %%ebx\n\t" // N "jb .Lnumbering_loop_lower\n\t" ".Lnumbering_loop_end:\n\t" #ifdef EFB_CHECK "call table_dump\n\t" #endif BENCH_MARK_HERE ".Lread_first:\n\t" "movl $rbuf, %%esi\n\t" "movl $65536, %%ebx\n\t" // RBUFSIZE ".Lread_first_loop:\n\t" // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:w, ebp:- "xorl %%eax, %%eax\n\t" "movl %%eax, (%%esp)\n\t" // set fd "movl %%esi, 4(%%esp)\n\t" // set buf "movl %%ebx, 8(%%esp)\n\t" // set nbyte IF_STAT("addl $1, s_reads\n\t") "call read\n\t" "cmpl $-1, %%eax\n\t" "je .Lread_first_loop\n\t" IF_STAT("addl %%eax, s_nrbyte\n\t") "test %%eax, %%eax\n\t" "jz .Lread_first_loop_end\n\t" "addl %%eax, %%esi\n\t" "subl %%eax, %%ebx\n\t" "ja .Lread_first_loop\n\t" ".Lread_first_loop_end:\n\t" ".Lparse_init:\n\t" "movl $rbuf, %%esi\n\t" "movl $wbuf, %%edi\n\t" ".Lparse:\n\t" // req eax:-, ebx:-, ecx:-, edx:-, esi:p, edi:w, ebp:- "movzbl (%%esi), %%eax\n\t" "movzbl 1(%%esi), %%ecx\n\t" "addl $1, %%esi\n\t" "subl $48, %%eax\n\t" "jb .Lwrite_newline\n\t" "subl $48, %%ecx\n\t" "jb .Lparse_end\n\t" "addl $1, %%esi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%esi), %%ebx\n\t" "leal (%%ecx,%%edx,2), %%eax\n\t" "subl $48, %%ebx\n\t" "jb .Lparse_end\n\t" "addl $1, %%esi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%esi), %%ecx\n\t" "leal (%%ebx,%%edx,2), %%eax\n\t" "subl $48, %%ecx\n\t" "jb .Lparse_end\n\t" "addl $1, %%esi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%esi), %%ebx\n\t" "leal (%%ecx,%%edx,2), %%eax\n\t" "subl $48, %%ebx\n\t" "jb .Lparse_end\n\t" "addl $1, %%esi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%esi), %%ecx\n\t" "leal (%%ebx,%%edx,2), %%eax\n\t" "subl $48, %%ecx\n\t" "jb .Lparse_end\n\t" "addl $1, %%esi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%esi), %%ebx\n\t" "leal (%%ecx,%%edx,2), %%eax\n\t" "subl $48, %%ebx\n\t" "jb .Lparse_end\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "addl $1, %%esi\n\t" "leal (%%ebx,%%edx,2), %%eax\n\t" ".Lparse_end:\n\t" "addl $1, %%esi\n\t" #ifdef EFB_CHECK "movl %%eax, c_num\n\t" "call num_dump\n\t" "movl c_num, %%eax\n\t" #endif "test $1, %%eax\n\t" "jz .Leven\n\t" "shrl $1, %%eax\n\t" "movzbl char_of(%%eax), %%ecx\n\t" "movb %%cl, (%%edi)\n\t" "addl $1, %%edi\n\t" "cmpl $wbuf+65536, %%edi\n\t" // WBUFSIZE "je .Lwrite\n\t" "cmpl $rbuf+65536-16, %%esi\n\t" // RBUFSIZE-16 "jae .Lread\n\t" "jmp .Lparse\n\t" ".Leven:\n\t" "movl $95, %%edx\n\t" "movl $97, %%ebx\n\t" "cmpl $2, %%eax\n\t" "cmovel %%ebx, %%edx\n\t" "movb %%dl, (%%edi)\n\t" "addl $1, %%edi\n\t" "cmpl $wbuf+65536, %%edi\n\t" // WBUFSIZE "je .Lwrite\n\t" "cmpl $rbuf+65536-16, %%esi\n\t" // RBUFSIZE-16 "jae .Lread\n\t" "jmp .Lparse\n\t" ".Lwrite:\n\t" // w==wbuf+WBUFSIZE "movl $wbuf, %%edi\n\t" "movl $65536, %%ebx\n\t" // WBUFSIZE ".Lwrite_loop:\n\t" // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:w, ebp:- "movl $1, %%eax\n\t" "movl %%eax, (%%esp)\n\t" "movl %%edi, 4(%%esp)\n\t" "movl %%ebx, 8(%%esp)\n\t" IF_STAT("addl $1, s_writes\n\t") "call write\n\t" "cmpl $-1, %%eax\n\t" "je .Lwrite_loop\n\t" IF_STAT("addl %%eax, s_nwbyte\n\t") "addl %%eax, %%edi\n\t" "subl %%eax, %%ebx\n\t" "ja .Lwrite_loop\n\t" ".Lwrite_loop_end:\n\t" "movl $wbuf, %%edi\n\t" "cmpl $rbuf+65536-16, %%esi\n\t" // RBUFSIZE-16 "jb .Lparse\n\t" ".Lread:\n\t" "movdqa rbuf+65536-16, %%xmm0\n\t" // RBUFSIZE-16 "subl $65536-16, %%esi\n\t" // RBUFSIZE-16 "movdqa %%xmm0, rbuf\n\t" "movl $rbuf+16, %%ebp\n\t" "movl $65536-16, %%ebx\n\t" // RBUFSIZE-16 ".Lread_loop:\n\t" // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:p, edi:w, ebp:np "xorl %%eax, %%eax\n\t" "movl %%eax, (%%esp)\n\t" // set fd "movl %%ebp, 4(%%esp)\n\t" // sed buf "movl %%ebx, 8(%%esp)\n\t" // set nbyte IF_STAT("addl $1, s_reads\n\t") "call read\n\t" "cmpl $-1, %%eax\n\t" "je .Lread_loop\n\t" IF_STAT("addl %%eax, s_nrbyte\n\t") "test %%eax, %%eax\n\t" "jz .Lparse\n\t" "addl %%eax, %%ebp\n\t" "subl %%eax, %%ebx\n\t" "jbe .Lparse\n\t" "jmp .Lread_loop\n\t" ".Lread_loop_end:\n\t" ".Lwrite_newline:\n\t" "movl $10, (%%edi)\n\t" "movl %%edi, %%ebx\n\t" "movl $wbuf, %%edi\n\t" "subl $wbuf-1, %%ebx\n\t" ".Lwrite_newline_loop:\n\t" // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:-, edi:w, ebp:- "movl $1, %%eax\n\t" "movl %%eax, (%%esp)\n\t" "movl %%edi, 4(%%esp)\n\t" "movl %%ebx, 8(%%esp)\n\t" IF_STAT("addl $1, s_writes\n\t") "call write\n\t" "cmpl $-1, %%eax\n\t" "je .Lwrite_newline_loop\n\t" IF_STAT("addl %%eax, s_nwbyte\n\t") "addl %%eax, %%edi\n\t" "subl %%eax, %%ebx\n\t" "ja .Lwrite_newline_loop\n\t" ".Lwrite_newline_end:\n\t" BENCH_MARK_HERE ".Lclean_stack:\n\t" "movl 60(%%esp), %%esp\n\t" "popl %%ebp\n\t" : : : "eax", "ebx", "ecx", "edx", "esi", "edi"); #ifdef EFB_STAT stat_output(); #endif #ifdef EFB_BENCH bench_output(); #endif return 0; }
3. n番目に大きい分数は?
Spaghetti Sourceで紹介されているアルゴリズムを使用しました。ただし、Spaghetti Sourceの実装は一部バグがあるので注意が必要です。
中央値の選択は、(左端の数+右端の数)/2という大雑把なことをしています。
ある意味、一番工夫のしどころがなかった問題でした。
#include <unistd.h> #include <stdio.h> #include <time.h> #include <sys/time.h> #define M (1000000) #define BUFSIZE (16777216) // 2**24 double fv[M]; char* fp[M]; char buf[BUFSIZE]; const double HALF=0.5; // check #ifdef EFB_CHECK #define MEM_DUMP_HERE "call mem_dump\n\t" int c_N, c_n; void mem_dump() { int i; fprintf(stderr, "mem_dump\n"); fprintf(stderr, "N=%d, n=%d\n", c_N, c_n); for(i=0; i<c_N; i++) { fprintf(stderr, "%20.12f at %d\n", fv[i], (int)(fp[i]-buf)); } } #else #define MEM_DUMP_HERE "" #endif // statistics #ifdef EFB_STAT #define IF_STAT(a) a int s_reads, s_nrbyte; int s_writes, s_nwbyte; int s_steps, s_lcmps, s_rcmps, s_swaps; void stat_output() { fprintf(stderr, "stats\n"); fprintf(stderr, " read : %d bytes read by %d read().\n", s_nrbyte, s_reads); fprintf(stderr, " write : %d bytes written by %d write().\n", s_nwbyte, s_writes); fprintf(stderr, " select : %d steps, %d(%d:%d) compares and %d swaps.\n", s_steps, s_lcmps+s_rcmps, s_lcmps, s_rcmps, s_swaps); } #else #define IF_STAT(a) "" #endif // benchmark #ifdef EFB_BENCH #define BENCH_MARK_HERE "call bench_mark\n\t" int markc, markv[10]; void bench_mark() { static struct timeval tv; gettimeofday(&tv, NULL); markv[markc++] = tv.tv_usec; } void bench_output() { int i; fprintf(stderr, "bentch: "); for(i=0; i<markc-1; i++) { if(i>0) fprintf(stderr, ", "); fprintf(stderr, "%d", markv[i+1]-markv[i]); } fprintf(stderr, "\n"); } #else #define BENCH_MARK_HERE "" #endif int main() { __asm__( ".Lstack_init:\n\t" "pushl %%ebp\n\t" "movl %%esp, %%ebp\n\t" "subl $80, %%esp\n\t" "andl $-16, %%esp\n\t" "movl %%ebp, 60(%%esp)\n\t" // store ebp BENCH_MARK_HERE ".Lread:\n\t" "movl $buf, %%edi\n\t" "movl $16777216, %%ebx\n\t" ".Lread_loop:\n\t" // req eax:-, ebx:nbyte, ecx:-, edx:-, esi:-, edi:p, ebp:- // use eax:ret, ebx:nbyte, ecx:x, edx:x, esi:-, edi:p, ebp:- "xorl %%eax, %%eax\n\t" // set fd "movl %%eax, (%%esp)\n\t" // set fd "movl %%edi, 4(%%esp)\n\t" // set buf "movl %%ebx, 8(%%esp)\n\t" // set nbyte IF_STAT("addl $1, s_reads\n\t") "call read\n\t" "cmpl $-1, %%eax\n\t" "je .Lread_loop\n\t" IF_STAT("addl %%eax, s_nrbyte\n\t") "test %%eax, %%eax\n\t" "jz .Lread_loop_fin\n\t" "subl %%eax, %%ebx\n\t" "addl %%eax, %%edi\n\t" "jmp .Lread_loop\n\t" ".Lread_loop_fin:\n\t" BENCH_MARK_HERE "movl $buf, %%edi\n\t" ".Lparse_N:\n\t" "xorl %%eax, %%eax\n\t" "movzbl (%%edi), %%ecx\n\t" ".Lparse_N_loop:\n\t" // req eax:a, ebx:-, ecx:c, edx:-, esi:-, edi:p, ebp:- // use eax:a, ebx:-, ecx:c, edx:t, esi:-, edi:p, ebp:- "leal (%%eax,%%eax,4), %%edx\n\t" "addl $1, %%edi\n\t" "leal -48(%%ecx,%%edx,2), %%eax\n\t" "movzbl (%%edi), %%ecx\n\t" "test $16, %%ecx\n\t" "jnz .Lparse_N_loop\n\t" ".Lparse_N_loop_fin:\n\t" "movl %%eax, 12(%%esp)\n\t" // store N #ifdef EFB_CHECK "movl %%eax, c_N\n\t" // store c_N #endif "addl $1, %%edi\n\t" ".Lparse_n:\n\t" "xorl %%ebx, %%ebx\n\t" "movzbl (%%edi), %%ecx\n\t" ".Lparse_n_loop:\n\t" // req eax:N, ebx:a, ecx:c, edx:-, esi:-, edi:p, ebp:- // use eax:N, ebx:a, ecx:c, edx:t, esi:-, edi:p, ebp:- "leal (%%ebx,%%ebx,4), %%edx\n\t" "addl $1, %%edi\n\t" "leal -48(%%ecx,%%edx,2), %%ebx\n\t" "movzbl (%%edi), %%ecx\n\t" "test $16, %%ecx\n\t" "jnz .Lparse_n_loop\n\t" ".Lparse_n_loop_fin:\n\t" "movl %%ebx, 16(%%esp)\n\t" // store n #ifdef EFB_CHECK "movl %%ebx, c_n\n\t" // store c_n #endif BENCH_MARK_HERE "addl $1, %%edi\n\t" ".Lparse_fractions:\n\t" "xorl %%ebp, %%ebp\n\t" ".Lparse_fractions_loop:\n\t" // req eax:-, ebx:-, ecx:-, edx:-, esi:-, edi:p, ebp:i "movl %%edi, fp(,%%ebp,4)\n\t" // store fp[i] ".Lparse_a:\n\t" // use eax:a, ebx:s, ecx:c, edx:t, esi:t, edi:p, ebp:i "movzbl (%%edi), %%eax\n\t" "movzbl 1(%%edi), %%edx\n\t" "addl $1, %%edi\n\t" "xorl %%ebx, %%ebx\n\t" "test $16, %%eax\n\t" "setz %%bl\n\t" "cmovz %%edx, %%eax\n\t" "addl %%ebx, %%edi\n\t" "subl $48, %%eax\n\t" "movzbl (%%edi), %%ecx\n\t" "subl $48, %%ecx\n\t" "jb .Lparse_b\n\t" "addl $1, %%edi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%edi), %%esi\n\t" "leal (%%ecx,%%edx,2), %%eax\n\t" "subl $48, %%esi\n\t" "jb .Lparse_b\n\t" "addl $1, %%edi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%edi), %%ecx\n\t" "leal (%%esi,%%edx,2), %%eax\n\t" "subl $48, %%ecx\n\t" "jb .Lparse_b\n\t" "addl $1, %%edi\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "movzbl (%%edi), %%esi\n\t" "leal (%%ecx,%%edx,2), %%eax\n\t" "subl $48, %%esi\n\t" "jb .Lparse_b\n\t" "leal (%%eax,%%eax,4), %%edx\n\t" "addl $1, %%edi\n\t" "leal (%%esi,%%edx,2), %%eax\n\t" ".Lparse_b:\n\t" // use eax:c, ebx:t, ecx:a, edx:t, esi:t, edi:p, ebp:i "addl $1, %%edi\n\t" "movzbl (%%edi), %%ecx\n\t" "movzbl 1(%%edi), %%esi\n\t" "addl $1, %%edi\n\t" "xorl %%edx, %%edx\n\t" "test $16, %%ecx\n\t" "setz %%dl\n\t" "cmovz %%esi, %%ecx\n\t" "addl %%edx, %%ebx\n\t" "addl %%edx, %%edi\n\t" "movl %%eax, %%esi\n\t" "negl %%eax\n\t" "test $1, %%ebx\n\t" "cmovnz %%eax, %%esi\n\t" "movzbl (%%edi), %%eax\n\t" "subl $48, %%ecx\n\t" "cvtsi2sd %%esi, %%xmm0\n\t" "subl $48, %%eax\n\t" "jb .Lstore_fraction_data\n\t" "addl $1, %%edi\n\t" "leal (%%ecx,%%ecx,4), %%edx\n\t" "movzbl (%%edi), %%esi\n\t" "leal (%%eax,%%edx,2), %%ecx\n\t" "subl $48, %%esi\n\t" "jb .Lstore_fraction_data\n\t" "addl $1, %%edi\n\t" "leal (%%ecx,%%ecx,4), %%edx\n\t" "movzbl (%%edi), %%eax\n\t" "leal (%%esi,%%edx,2), %%ecx\n\t" "subl $48, %%eax\n\t" "jb .Lstore_fraction_data\n\t" "addl $1, %%edi\n\t" "leal (%%ecx,%%ecx,4), %%edx\n\t" "movzbl (%%edi), %%esi\n\t" "leal (%%eax,%%edx,2), %%ecx\n\t" "subl $48, %%esi\n\t" "jb .Lstore_fraction_data\n\t" "leal (%%ecx,%%ecx,4), %%edx\n\t" "addl $1, %%edi\n\t" "leal (%%esi,%%edx,2), %%ecx\n\t" ".Lstore_fraction_data:\n\t" "cvtsi2sd %%ecx, %%xmm1\n\t" "addl $1, %%ebp\n\t" "divsd %%xmm1, %%xmm0\n\t" "addl $1, %%edi\n\t" "movsd %%xmm0, fv-8(,%%ebp,8)\n\t" "cmpl 12(%%esp), %%ebp\n\t" // N "jne .Lparse_fractions_loop\n\t" MEM_DUMP_HERE BENCH_MARK_HERE ".Lselect:\n\t" "movl 12(%%esp), %%edi\n\t" "movl %%edi, %%ebp\n\t" "subl 16(%%esp), %%ebp\n\t" "xorl %%esi, %%esi\n\t" "subl $1, %%edi\n\t" "movsd HALF, %%xmm3\n\t" ".Lselect_loop:\n\t" IF_STAT("addl $1, s_steps\n\t") "movsd fv(,%%esi,8), %%xmm0\n\t" "leal -1(%%esi), %%ebx\n\t" "addsd fv(,%%edi,8), %%xmm0\n\t" "leal 1(%%edi), %%edx\n\t" "mulsd %%xmm3, %%xmm0\n\t" ".Lpartition_loop:\n\t" ".Lleft_loop:\n\t" IF_STAT("addl $1, s_lcmps\n\t") "movsd fv+8(,%%ebx,8), %%xmm1\n\t" "addl $1, %%ebx\n\t" "ucomisd %%xmm1, %%xmm0\n\t" "ja .Lleft_loop\n\t" ".Lright_loop:\n\t" IF_STAT("addl $1, s_rcmps\n\t") "movsd fv-8(,%%edx,8), %%xmm2\n\t" "subl $1, %%edx\n\t" "ucomisd %%xmm2, %%xmm0\n\t" "jb .Lright_loop\n\t" "cmpl %%ebx, %%edx\n\t" "jle .Ldo_drop\n\t" ".Lswap:\n\t" IF_STAT("addl $1, s_swaps\n\t") "movl fp(,%%ebx,4), %%eax\n\t" "movl fp(,%%edx,4), %%ecx\n\t" "movsd %%xmm1, fv(,%%edx,8)\n\t" "movsd %%xmm2, fv(,%%ebx,8)\n\t" "movl %%eax, fp(,%%edx,4)\n\t" "movl %%ecx, fp(,%%ebx,4)\n\t" "jmp .Lpartition_loop\n\t" ".Ldo_drop:\n\t" "subl $1, %%ebx\n\t" "addl $1, %%edx\n\t" "cmpl %%ebp, %%ebx\n\t" "cmovge %%ebx, %%edi\n\t" "cmovl %%edx, %%esi\n\t" ".Lselect_loop_next:\n\t" "cmpl %%esi, %%edi\n\t" "jg .Lselect_loop\n\t" MEM_DUMP_HERE BENCH_MARK_HERE ".Lwrite:\n\t" "movl 12(%%esp), %%eax\n\t" "subl 16(%%esp), %%eax\n\t" "xorl %%ebx, %%ebx\n\t" "movl fp(,%%eax,4), %%edi\n\t" ".Lstrlen_loop:\n\t" "movzbl (%%edi,%%ebx), %%ecx\n\t" "addl $1, %%ebx\n\t" "cmpl $10, %%ecx\n\t" "jne .Lstrlen_loop\n\t" ".Lwrite_loop:\n\t" "movl $1, %%eax\n\t" "movl %%eax, (%%esp)\n\t" // set fd "movl %%edi, 4(%%esp)\n\t" // set buf "movl %%ebx, 8(%%esp)\n\t" // set nbyte IF_STAT("addl $1, s_writes\n\t") "call write\n\t" "cmpl $-1, %%eax\n\t" "je .Lwrite_loop\n\t" IF_STAT("addl %%eax, s_nwbyte\n\t") "addl %%eax, %%edi\n\t" "subl %%eax, %%ebx\n\t" "ja .Lwrite_loop\n\t" ".Lclean_stack:\n\t" "movl 60(%%esp), %%esp\n\t" "popl %%ebp\n\t" : : : "eax", "ebx", "ecx", "edx", "esi", "edi"); #ifdef EFB_STAT stat_output(); #endif #ifdef EFB_BENCH bench_output(); #endif return 0; }