This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: Pass 4 (non-)optimization speedup


On 07/13/2009 03:35 PM, Mark Wielaard wrote:
> This is really a shame, gcc is really a lot slower compiling with -O2.
> It would be good to find some other way to make pass 4 quicker by
> default.

Another approach mentioned on IRC is trying to simplify the code that we
present to GCC.

I took a stab at consolidating the probe locking code, patch attached.
It makes each probe body contain just a static array of locks needed,
and then call to a single function that actually loops over the locks.

This will only be a win for scripts with many distinct probe bodies,
like topsys.stp.  For that script, I see pass-4 time go from 5 seconds
to 4.4 seconds.  In runtime, it's hard to get consistent measurement,
but the average probe time is about 2-3% slower.

So, is that overhead worth 1/2 second compile time?  Can anyone see ways
to improve this patch, or suggest other areas that could benefit from focus?

Josh
commit d8a8b06d763d006be5efe0ec53c7b5a41df30223
Author: Josh Stone <jistone@redhat.com>
Date:   Mon Jul 13 13:22:11 2009 -0700

    make probe locking data-driven

diff --git a/translate.cxx b/translate.cxx
index 9c90106..a35be58 100644
--- a/translate.cxx
+++ b/translate.cxx
@@ -1590,14 +1590,7 @@ c_unparser::emit_probe (derived_probe* v)
       // initialize frame pointer
       o->newline() << "struct " << v->name << "_locals * __restrict__ l =";
       o->newline(1) << "& c->locals[0]." << v->name << ";";
-      o->newline(-1) << "(void) l;"; // make sure "l" is marked used
-
-      o->newline() << "#ifdef STP_TIMING";
-      o->newline() << "c->statp = & time_" << v->basest()->name << ";";
-      o->newline() << "#endif";
-
-      // emit probe local initialization block
-      v->emit_probe_local_init(o);
+      o->indent (-1);
 
       // emit all read/write locks for global variables
       varuse_collecting_visitor vut;
@@ -1607,6 +1600,15 @@ c_unparser::emit_probe (derived_probe* v)
 	  emit_locks (vut);
 	}
 
+      o->newline() << "(void) l;"; // make sure "l" is marked used
+
+      o->newline() << "#ifdef STP_TIMING";
+      o->newline() << "c->statp = & time_" << v->basest()->name << ";";
+      o->newline() << "#endif";
+
+      // emit probe local initialization block
+      v->emit_probe_local_init(o);
+
       // initialize locals
       for (unsigned j=0; j<v->locals.size(); j++)
         {
@@ -1658,11 +1660,8 @@ c_unparser::emit_probe (derived_probe* v)
 void
 c_unparser::emit_locks(const varuse_collecting_visitor& vut)
 {
-  o->newline() << "{";
-  o->newline(1) << "unsigned numtrylock = 0;";
-  o->newline() << "(void) numtrylock;";
+  bool has_locks = false;
 
-  string last_locked_var;
   for (unsigned i = 0; i < session->globals.size(); i++)
     {
       vardecl* v = session->globals[i];
@@ -1694,37 +1693,36 @@ c_unparser::emit_locks(const varuse_collecting_visitor& vut)
 	    continue;
 	}
 
-      string lockcall =
-        string (write_p ? "write" : "read") +
-        "_trylock (& global.s_" + v->name + "_lock)";
+      if (!has_locks)
+        {
+          o->newline() << "static const probe_lock_t locks[] = {";
+          o->indent(1);
+          has_locks = true;
+        }
 
-      o->newline() << "while (! " << lockcall
-                   << "&& (++numtrylock < MAXTRYLOCK))";
-      o->newline(1) << "ndelay (TRYLOCKDELAY);";
-      o->newline(-1) << "if (unlikely (numtrylock >= MAXTRYLOCK)) {";
-      o->newline(1) << "atomic_inc (& skipped_count);";
+      o->newline() << "{";
+      o->newline(1) << ".lock = & global.s_" << c_varname (v->name) << "_lock,";
       o->newline() << "#ifdef STP_TIMING";
-      o->newline() << "atomic_inc (& global.s_" << c_varname (v->name) << "_lock_skip_count);";
+      o->newline() << ".skip = & global.s_" << c_varname (v->name) << "_lock_skip_count,";
       o->newline() << "#endif";
-      // The following works even if i==0.  Note that using
-      // globals[i-1]->name is wrong since that global may not have
-      // been lockworthy by this probe.
-      o->newline() << "goto unlock_" << last_locked_var << ";";
-      o->newline(-1) << "}";
-
-      last_locked_var = v->name;
+      o->newline() << ".write_p = " << (write_p ? 1 : 0) << ",";
+      o->newline(-1) << "},";
     }
 
-  o->newline() << "if (0) goto unlock_;";
-
-  o->newline(-1) << "}";
+  if (has_locks)
+    {
+      o->newline(-1) << "};";
+      o->newline() << "if (!probe_trylock(locks, ARRAY_SIZE(locks)))";
+      o->newline(1) << "goto unlocked;";
+      o->indent(-1);
+    }
 }
 
 
 void
 c_unparser::emit_unlocks(const varuse_collecting_visitor& vut)
 {
-  unsigned numvars = 0;
+  bool has_locks = false;
 
   if (session->verbose>1)
     clog << current_probe->name << " locks ";
@@ -1751,32 +1749,24 @@ c_unparser::emit_unlocks(const varuse_collecting_visitor& vut)
 	    continue;
 	}
 
-      numvars ++;
-      o->newline(-1) << "unlock_" << v->name << ":";
-      o->indent(1);
-
+      has_locks = true;
       if (session->verbose>1)
         clog << v->name << "[" << (read_p ? "r" : "")
              << (write_p ? "w" : "")  << "] ";
-
-      if (write_p) // emit write lock
-        o->newline() << "write_unlock (& global.s_" << v->name << "_lock);";
-      else // (read_p && !write_p) : emit read lock
-        o->newline() << "read_unlock (& global.s_" << v->name << "_lock);";
-
-      // fall through to next variable; thus the reverse ordering
     }
 
-  // emit plain "unlock" label, used if the very first lock failed.
-  o->newline(-1) << "unlock_: ;";
-  o->indent(1);
-
-  if (numvars) // is there a chance that any lock attempt failed?
+  if (has_locks) // is there a chance that any lock attempt failed?
     {
       // Formerly, we checked skipped_count > MAXSKIPPED here, and set
       // SYSTEMTAP_SESSION_ERROR if so.  But now, this check is shared
       // via common_probe_entryfn_epilogue().
 
+      o->newline() << "probe_unlock(locks, ARRAY_SIZE(locks));";
+
+      // emit plain "unlocked" label, used if any locks failed
+      o->newline(-1) << "unlocked: ;";
+      o->indent(1);
+
       if (session->verbose>1)
         clog << endl;
     }
@@ -5223,6 +5213,46 @@ translate_pass (systemtap_session& s)
 	}
       s.op->assert_0_indent();
 
+      s.op->newline() << "typedef struct {";
+      s.op->newline(1) << "rwlock_t *lock;";
+      s.op->newline() << "#ifdef STP_TIMING";
+      s.op->newline() << "atomic_t *skip;";
+      s.op->newline() << "#endif";
+      s.op->newline() << "int write_p;";
+      s.op->newline(-1) << "} probe_lock_t;";
+      s.op->newline() << "static void";
+      s.op->newline() << "probe_unlock(const probe_lock_t *locks, unsigned n)";
+      s.op->newline() << "{";
+      s.op->newline(1) << "while (n--)";
+      s.op->newline(1) << "if (locks[n].write_p)";
+      s.op->newline(1) << "write_unlock(locks[n].lock);";
+      s.op->newline(-1) << "else";
+      s.op->newline(1) << "read_unlock(locks[n].lock);";
+      s.op->newline(-3) << "}";
+      s.op->newline() << "static int";
+      s.op->newline() << "probe_trylock(const probe_lock_t *locks, unsigned n)";
+      s.op->newline() << "{";
+      s.op->newline(1) << "unsigned i, numtrylock = 0;";
+      s.op->newline(0) << "for (i = 0; i < n; ++i) {";
+      s.op->newline(1) << "if (locks[i].write_p)";
+      s.op->newline(1) << "while (!write_trylock(locks[i].lock) && (++numtrylock < MAXTRYLOCK))";
+      s.op->newline(1) << "ndelay (TRYLOCKDELAY);";
+      s.op->newline(-2) << "else";
+      s.op->newline(1) << "while (!read_trylock(locks[i].lock) && (++numtrylock < MAXTRYLOCK))";
+      s.op->newline(1) << "ndelay (TRYLOCKDELAY);";
+      s.op->newline(-2) << "if (unlikely (numtrylock >= MAXTRYLOCK)) {";
+      s.op->newline(1) << "atomic_inc (& skipped_count);";
+      s.op->newline() << "#ifdef STP_TIMING";
+      s.op->newline() << "atomic_inc (locks[i].skip);";
+      s.op->newline() << "#endif";
+      s.op->newline() << "probe_unlock(locks, i);";
+      s.op->newline() << "return 0;";
+      s.op->newline(-1) << "}";
+      s.op->newline(-1) << "}";
+      s.op->newline() << "return 1;";
+      s.op->newline(-1) << "}";
+      s.op->assert_0_indent();
+
       // Run a varuse_collecting_visitor over probes that need global
       // variable locks.  We'll use this information later in
       // emit_locks()/emit_unlocks().

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]