cat > a.c <<\EOF cat /tmp/a.c #include const char *libs[] = { "libvclplug_gtk680lx.so", "libvclplug_gen680lx.so", "libnss_files.so.2", "libGL.so.1", "servicemgr.uno.so", "shlibloader.uno.so", "simplereg.uno.so", "nestedreg.uno.so", "typemgr.uno.so", "implreg.uno.so", "security.uno.so", "libreg.so.3", "libstore.so.3", "regtypeprov.uno.so", "configmgr2.uno.so", "typeconverter.uno.so", "gconfbe1.uno.so", "behelper.uno.so", "sax.uno.so", "localebe1.uno.so", "uriproc.uno.so", "libspl680lx.so", "libucb1.so", "ucpgvfs1.uno.so", "libgcc3_uno.so", "libpackage2.so", "libfileacc.so", "libuui680lx.so", "libfilterconfig1.so", "libdtransX11680lx.so", "i18npool.uno.so", "liblocaledata_en.so", "fsstorage.uno.so", "libxstor.so", "libdbtools680lx.so", "libcups.so.2", "libgnutls.so.13", "libgcrypt.so.11", "libgpg-error.so.0", "libmcnttype.so", "libucpchelp1.so", "svtmisc.uno.so" }; int main (int argc, char **argv) { int i; void *h; int flags = RTLD_LAZY; if (argv[1][0] == 'g') flags |= RTLD_GLOBAL; for (i = 0; i < sizeof (libs) / sizeof (libs[0]); ++i) h = dlopen (libs[i], flags); return 0; } EOF gcc -g -O2 -o a a.c -Wl,-rpath,/usr/lib64/openoffice.org2.0/program/ \ -L/usr/lib64/openoffice.org2.0/program/ -lsoffice -lsw680lx -lsvx680lx -lstdc++ -lm -shared-libgcc for V in local global; do for M in '' 'export LD_X=1' 'export LD_BIND_NOW=1' 'export LD_X=1 LD_BIND_NOW=1'; \ do ( for i in 1 2 3 4; do eval $M; time ./a $V; done 2>&1 > /dev/null | \ awk 'BEGIN { printf "'"$V $M"'\t" } /^real/ { printf "%s ", $2 } END { printf "\n" }' ); done; done take4 local 0m0.180s 0m0.175s 0m0.174s 0m0.174s local export LD_X=1 0m0.591s 0m0.593s 0m0.593s 0m0.603s local export LD_BIND_NOW=1 0m0.306s 0m0.303s 0m0.309s 0m0.303s local export LD_X=1 LD_BIND_NOW=1 0m1.186s 0m1.188s 0m1.146s 0m1.149s global 0m0.203s 0m0.198s 0m0.201s 0m0.198s global export LD_X=1 0m0.669s 0m0.686s 0m0.673s 0m0.672s global export LD_BIND_NOW=1 0m0.354s 0m0.349s 0m0.347s 0m0.348s global export LD_X=1 LD_BIND_NOW=1 0m1.334s 0m1.347s 0m1.356s 0m1.379s for V in local global; do for M in '' 'export LD_X=1' 'export LD_BIND_NOW=1' 'export LD_X=1 LD_BIND_NOW=1'; \ do ( echo "$V $M"; eval $M; valgrind --tool=cachegrind ./a $V 2>&1 > /dev/null | sed -n '/== I refs/,$p' ); \ done; done take4 local ==22333== I refs: 201,078,072 ==22333== I1 misses: 11,304 ==22333== L2i misses: 9,911 ==22333== I1 miss rate: 0.00% ==22333== L2i miss rate: 0.00% ==22333== ==22333== D refs: 74,797,589 (58,315,275 rd + 16,482,314 wr) ==22333== D1 misses: 3,404,397 ( 3,256,012 rd + 148,385 wr) ==22333== L2d misses: 489,312 ( 398,103 rd + 91,209 wr) ==22333== D1 miss rate: 4.5% ( 5.5% + 0.9% ) ==22333== L2d miss rate: 0.6% ( 0.6% + 0.5% ) ==22333== ==22333== L2 refs: 3,415,701 ( 3,267,316 rd + 148,385 wr) ==22333== L2 misses: 499,223 ( 408,014 rd + 91,209 wr) ==22333== L2 miss rate: 0.1% ( 0.1% + 0.5% ) local export LD_X=1 ==22337== I refs: 311,492,449 ==22337== I1 misses: 11,299 ==22337== L2i misses: 10,321 ==22337== I1 miss rate: 0.00% ==22337== L2i miss rate: 0.00% ==22337== ==22337== D refs: 128,005,658 (98,076,224 rd + 29,929,434 wr) ==22337== D1 misses: 9,559,670 ( 9,395,598 rd + 164,072 wr) ==22337== L2d misses: 3,022,405 ( 2,916,644 rd + 105,761 wr) ==22337== D1 miss rate: 7.4% ( 9.5% + 0.5% ) ==22337== L2d miss rate: 2.3% ( 2.9% + 0.3% ) ==22337== ==22337== L2 refs: 9,570,969 ( 9,406,897 rd + 164,072 wr) ==22337== L2 misses: 3,032,726 ( 2,926,965 rd + 105,761 wr) ==22337== L2 miss rate: 0.6% ( 0.7% + 0.3% ) local export LD_BIND_NOW=1 ==22341== I refs: 389,236,554 ==22341== I1 misses: 10,837 ==22341== L2i misses: 9,717 ==22341== I1 miss rate: 0.00% ==22341== L2i miss rate: 0.00% ==22341== ==22341== D refs: 148,524,589 (115,298,089 rd + 33,226,500 wr) ==22341== D1 misses: 6,929,319 ( 6,754,195 rd + 175,124 wr) ==22341== L2d misses: 657,766 ( 559,064 rd + 98,702 wr) ==22341== D1 miss rate: 4.6% ( 5.8% + 0.5% ) ==22341== L2d miss rate: 0.4% ( 0.4% + 0.2% ) ==22341== ==22341== L2 refs: 6,940,156 ( 6,765,032 rd + 175,124 wr) ==22341== L2 misses: 667,483 ( 568,781 rd + 98,702 wr) ==22341== L2 miss rate: 0.1% ( 0.1% + 0.2% ) local export LD_X=1 LD_BIND_NOW=1 ==22345== I refs: 622,558,758 ==22345== I1 misses: 10,829 ==22345== L2i misses: 9,927 ==22345== I1 miss rate: 0.00% ==22345== L2i miss rate: 0.00% ==22345== ==22345== D refs: 262,006,683 (199,686,725 rd + 62,319,958 wr) ==22345== D1 misses: 20,358,555 ( 20,138,179 rd + 220,376 wr) ==22345== L2d misses: 6,288,581 ( 6,175,041 rd + 113,540 wr) ==22345== D1 miss rate: 7.7% ( 10.0% + 0.3% ) ==22345== L2d miss rate: 2.4% ( 3.0% + 0.1% ) ==22345== ==22345== L2 refs: 20,369,384 ( 20,149,008 rd + 220,376 wr) ==22345== L2 misses: 6,298,508 ( 6,184,968 rd + 113,540 wr) ==22345== L2 miss rate: 0.7% ( 0.7% + 0.1% ) global ==22349== I refs: 216,049,983 ==22349== I1 misses: 11,443 ==22349== L2i misses: 10,043 ==22349== I1 miss rate: 0.00% ==22349== L2i miss rate: 0.00% ==22349== ==22349== D refs: 82,419,601 (64,203,003 rd + 18,216,598 wr) ==22349== D1 misses: 5,154,627 ( 5,004,907 rd + 149,720 wr) ==22349== L2d misses: 500,196 ( 408,675 rd + 91,521 wr) ==22349== D1 miss rate: 6.2% ( 7.7% + 0.8% ) ==22349== L2d miss rate: 0.6% ( 0.6% + 0.5% ) ==22349== ==22349== L2 refs: 5,166,070 ( 5,016,350 rd + 149,720 wr) ==22349== L2 misses: 510,239 ( 418,718 rd + 91,521 wr) ==22349== L2 miss rate: 0.1% ( 0.1% + 0.5% ) global export LD_X=1 ==22353== I refs: 337,282,635 ==22353== I1 misses: 11,425 ==22353== L2i misses: 10,469 ==22353== I1 miss rate: 0.00% ==22353== L2i miss rate: 0.00% ==22353== ==22353== D refs: 141,116,515 (107,952,083 rd + 33,164,432 wr) ==22353== D1 misses: 12,018,123 ( 11,852,619 rd + 165,504 wr) ==22353== L2d misses: 3,503,191 ( 3,396,933 rd + 106,258 wr) ==22353== D1 miss rate: 8.5% ( 10.9% + 0.4% ) ==22353== L2d miss rate: 2.4% ( 3.1% + 0.3% ) ==22353== ==22353== L2 refs: 12,029,548 ( 11,864,044 rd + 165,504 wr) ==22353== L2 misses: 3,513,660 ( 3,407,402 rd + 106,258 wr) ==22353== L2 miss rate: 0.7% ( 0.7% + 0.3% ) global export LD_BIND_NOW=1 ==22359== I refs: 416,388,279 ==22359== I1 misses: 10,963 ==22359== L2i misses: 9,841 ==22359== I1 miss rate: 0.00% ==22359== L2i miss rate: 0.00% ==22359== ==22359== D refs: 162,316,098 (125,953,786 rd + 36,362,312 wr) ==22359== D1 misses: 10,111,441 ( 9,932,496 rd + 178,945 wr) ==22359== L2d misses: 675,000 ( 575,984 rd + 99,016 wr) ==22359== D1 miss rate: 6.2% ( 7.8% + 0.4% ) ==22359== L2d miss rate: 0.4% ( 0.4% + 0.2% ) ==22359== ==22359== L2 refs: 10,122,404 ( 9,943,459 rd + 178,945 wr) ==22359== L2 misses: 684,841 ( 585,825 rd + 99,016 wr) ==22359== L2 miss rate: 0.1% ( 0.1% + 0.2% ) global export LD_X=1 LD_BIND_NOW=1 ==22363== I refs: 668,842,809 ==22363== I1 misses: 10,942 ==22363== L2i misses: 10,052 ==22363== I1 miss rate: 0.00% ==22363== L2i miss rate: 0.00% ==22363== ==22363== D refs: 285,593,622 (217,433,359 rd + 68,160,263 wr) ==22363== D1 misses: 24,793,665 ( 24,570,244 rd + 223,421 wr) ==22363== L2d misses: 7,189,812 ( 7,075,786 rd + 114,026 wr) ==22363== D1 miss rate: 8.6% ( 11.3% + 0.3% ) ==22363== L2d miss rate: 2.5% ( 3.2% + 0.1% ) ==22363== ==22363== L2 refs: 24,804,607 ( 24,581,186 rd + 223,421 wr) ==22363== L2 misses: 7,199,864 ( 7,085,838 rd + 114,026 wr) ==22363== L2 miss rate: 0.7% ( 0.7% + 0.1% ) for V in local global; do for M in '' '-E LD_X=1' '-E LD_BIND_NOW=1' '-E LD_X=1 -E LD_BIND_NOW=1'; \ do ( echo "$V $M"; ./timing $M ./a $V ); done; done take4 local Strip out best and worst realtime result minimum: 0.171330000 sec real / 0.000059879 sec CPU maximum: 0.183637000 sec real / 0.000140912 sec CPU average: 0.173522535 sec real / 0.000065315 sec CPU stdev : 0.000694442 sec real / 0.000000770 sec CPU local -E LD_X=1 optarg="LD_X=1" Strip out best and worst realtime result minimum: 0.575774000 sec real / 0.000053405 sec CPU maximum: 0.595938000 sec real / 0.000079537 sec CPU average: 0.587580821 sec real / 0.000075442 sec CPU stdev : 0.004159043 sec real / 0.000001989 sec CPU local -E LD_BIND_NOW=1 optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.299670000 sec real / 0.000055573 sec CPU maximum: 0.306395000 sec real / 0.000086738 sec CPU average: 0.302749821 sec real / 0.000073164 sec CPU stdev : 0.001761517 sec real / 0.000003183 sec CPU local -E LD_X=1 -E LD_BIND_NOW=1 optarg="LD_X=1" optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 1.162265000 sec real / 0.000055101 sec CPU maximum: 1.209383000 sec real / 0.000073149 sec CPU average: 1.189671071 sec real / 0.000065435 sec CPU stdev : 0.007947359 sec real / 0.000000731 sec CPU global Strip out best and worst realtime result minimum: 0.195825000 sec real / 0.000064238 sec CPU maximum: 0.198755000 sec real / 0.000069930 sec CPU average: 0.197095571 sec real / 0.000065329 sec CPU stdev : 0.000657043 sec real / 0.000000924 sec CPU global -E LD_X=1 optarg="LD_X=1" Strip out best and worst realtime result minimum: 0.658855000 sec real / 0.000052899 sec CPU maximum: 0.693884000 sec real / 0.000084485 sec CPU average: 0.676305857 sec real / 0.000076110 sec CPU stdev : 0.004805411 sec real / 0.000003079 sec CPU global -E LD_BIND_NOW=1 optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.342948000 sec real / 0.000054621 sec CPU maximum: 0.353243000 sec real / 0.000080357 sec CPU average: 0.347779142 sec real / 0.000073702 sec CPU stdev : 0.002035106 sec real / 0.000003696 sec CPU global -E LD_X=1 -E LD_BIND_NOW=1 optarg="LD_X=1" optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 1.314006000 sec real / 0.000053499 sec CPU maximum: 1.400049000 sec real / 0.000070065 sec CPU average: 1.346385214 sec real / 0.000065079 sec CPU stdev : 0.012294494 sec real / 0.000000950 sec CPU /usr/sbin/prelink -vmR ./a for V in local global; do for M in '' 'export LD_X=1' 'export LD_BIND_NOW=1' 'export LD_X=1 LD_BIND_NOW=1'; \ do ( for i in 1 2 3 4; do eval $M; time ./a $V; done 2>&1 > /dev/null | \ awk 'BEGIN { printf "'"$V $M"'\t" } /^real/ { printf "%s ", $2 } END { printf "\n" }' ); done; done take4 local 0m0.110s 0m0.090s 0m0.090s 0m0.089s local export LD_X=1 0m0.287s 0m0.279s 0m0.285s 0m0.277s local export LD_BIND_NOW=1 0m0.147s 0m0.145s 0m0.143s 0m0.143s local export LD_X=1 LD_BIND_NOW=1 0m0.547s 0m0.535s 0m0.533s 0m0.527s global 0m0.115s 0m0.112s 0m0.113s 0m0.113s global export LD_X=1 0m0.368s 0m0.374s 0m0.360s 0m0.356s global export LD_BIND_NOW=1 0m0.187s 0m0.185s 0m0.185s 0m0.185s global export LD_X=1 LD_BIND_NOW=1 0m0.686s 0m0.690s 0m0.667s 0m0.666s # valgrind --tool=cachegrind stats not provided for prelinked testcase, # as valgrind apparently uses LD_PRELOAD internally and thus prevents # prelinking. for V in local global; do for M in '' '-E LD_X=1' '-E LD_BIND_NOW=1' '-E LD_X=1 -E LD_BIND_NOW=1'; \ do ( echo "$V $M"; ./timing $M ./a $V ); done; done take4 local Strip out best and worst realtime result minimum: 0.088558000 sec real / 0.000062737 sec CPU maximum: 0.090857000 sec real / 0.000075048 sec CPU average: 0.089202000 sec real / 0.000064855 sec CPU stdev : 0.000453942 sec real / 0.000002985 sec CPU local -E LD_X=1 optarg="LD_X=1" Strip out best and worst realtime result minimum: 0.274995000 sec real / 0.000055459 sec CPU maximum: 0.291724000 sec real / 0.000085829 sec CPU average: 0.285347750 sec real / 0.000073140 sec CPU stdev : 0.004344181 sec real / 0.000003294 sec CPU local -E LD_BIND_NOW=1 optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.142244000 sec real / 0.000052852 sec CPU maximum: 0.145730000 sec real / 0.000074901 sec CPU average: 0.143050857 sec real / 0.000063817 sec CPU stdev : 0.000588257 sec real / 0.000000572 sec CPU local -E LD_X=1 -E LD_BIND_NOW=1 optarg="LD_X=1" optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.516412000 sec real / 0.000052547 sec CPU maximum: 0.551295000 sec real / 0.000080505 sec CPU average: 0.536137714 sec real / 0.000072925 sec CPU stdev : 0.008204213 sec real / 0.000003287 sec CPU global Strip out best and worst realtime result minimum: 0.111122000 sec real / 0.000057277 sec CPU maximum: 0.113199000 sec real / 0.000072477 sec CPU average: 0.111739535 sec real / 0.000063569 sec CPU stdev : 0.000365911 sec real / 0.000002389 sec CPU global -E LD_X=1 optarg="LD_X=1" Strip out best and worst realtime result minimum: 0.356423000 sec real / 0.000054153 sec CPU maximum: 0.385610000 sec real / 0.000077109 sec CPU average: 0.369770464 sec real / 0.000072567 sec CPU stdev : 0.005942862 sec real / 0.000003175 sec CPU global -E LD_BIND_NOW=1 optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.182965000 sec real / 0.000054313 sec CPU maximum: 0.189492000 sec real / 0.000068199 sec CPU average: 0.184005428 sec real / 0.000063653 sec CPU stdev : 0.000723054 sec real / 0.000001477 sec CPU global -E LD_X=1 -E LD_BIND_NOW=1 optarg="LD_X=1" optarg="LD_BIND_NOW=1" Strip out best and worst realtime result minimum: 0.650583000 sec real / 0.000052562 sec CPU maximum: 0.700625000 sec real / 0.000077906 sec CPU average: 0.680901214 sec real / 0.000073774 sec CPU stdev : 0.011186022 sec real / 0.000002110 sec CPU