diff --git a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
@@ -228,9 +228,8 @@
 // Implementation of MonitorAccessStubs
 
 MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
-: MonitorAccessStub(obj_reg, lock_reg)
+: MonitorAccessStub(obj_reg, lock_reg, info)
 {
-  _info = new CodeEmitInfo(info);
 }
 
 
@@ -267,6 +266,9 @@
     exit_id = Runtime1::monitorexit_nofpu_id;
   }
   __ call(RuntimeAddress(Runtime1::entry_for(exit_id)));
+  if (_info != NULL) {
+    ce->add_non_safepoint_debug_info_here(_info);
+  }
   __ jmp(_continuation);
 }
 
diff --git a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp
@@ -449,7 +449,7 @@
   MonitorExitStub* stub = NULL;
   if (method()->is_synchronized()) {
     monitor_address(0, FrameMap::rax_opr);
-    stub = new MonitorExitStub(FrameMap::rax_opr, true, 0);
+    stub = new MonitorExitStub(FrameMap::rax_opr, true, 0, NULL);
     __ unlock_object(rdi, rsi, rax, *stub->entry());
     __ bind(*stub->continuation());
   }
diff --git a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
@@ -372,7 +372,7 @@
   LIR_Opr lock = new_register(T_INT);
   LIR_Opr obj_temp = new_register(T_INT);
   set_no_result(x);
-  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
+  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no(), state_for(x, x->state(), true));
 }
 
 
diff --git a/src/cpu/x86/vm/frame_x86.cpp b/src/cpu/x86/vm/frame_x86.cpp
--- a/src/cpu/x86/vm/frame_x86.cpp
+++ b/src/cpu/x86/vm/frame_x86.cpp
@@ -148,6 +148,10 @@
       sender_pc = (address) *(sender_sp-1);
     }
 
+    if (SharedRuntime::is_memento_stack_trace_return_handler(sender_pc)) {
+      sender_pc = thread->memento_original_return_address();
+    }
+
 
     // If the potential sender is the interpreter then we can do some more checking
     if (Interpreter::contains(sender_pc)) {
@@ -165,7 +169,7 @@
 
       // construct the potential sender
 
-      frame sender(sender_sp, saved_fp, sender_pc);
+      frame sender(thread, sender_sp, saved_fp, sender_pc);
 
       return sender.is_interpreted_frame_valid(thread);
 
@@ -203,7 +207,7 @@
 
       // construct the potential sender
 
-      frame sender(sender_sp, saved_fp, sender_pc);
+      frame sender(thread, sender_sp, saved_fp, sender_pc);
 
       // Validate the JavaCallWrapper an entry frame must have
       address jcw = (address)sender.entry_frame_call_wrapper();
@@ -273,6 +277,11 @@
     tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
                   pc_addr, *pc_addr, pc);
   }
+  assert(!SharedRuntime::is_memento_stack_trace_return_handler(pc), "new return address must not be memento return handler");
+  if (SharedRuntime::is_memento_stack_trace_return_handler(*pc_addr)) {
+    pc_addr = &(thread->memento_original_return_address());
+    assert(*pc_addr != NULL, "memento original return address must be set");
+  }
   // Either the return address is the original one or we are going to
   // patch in the same address that's already there.
   assert(_pc == *pc_addr || pc == *pc_addr, "must be");
@@ -372,10 +381,10 @@
   map->clear();
   assert(map->include_argument_oops(), "should be set by clear");
   if (jfa->last_Java_pc() != NULL ) {
-    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+    frame fr(map->thread(), jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
     return fr;
   }
-  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
+  frame fr(map->thread(), jfa->last_Java_sp(), jfa->last_Java_fp());
   return fr;
 }
 
@@ -466,7 +475,7 @@
   }
 #endif // COMPILER2
 
-  return frame(sender_sp, unextended_sp, link(), sender_pc());
+  return frame(map->thread(), sender_sp, unextended_sp, link(), sender_pc());
 }
 
 
@@ -503,7 +512,7 @@
   }
 
   assert(sender_sp != sp(), "must have changed");
-  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
+  return frame(map->thread(), sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
 }
 
 
@@ -523,7 +532,7 @@
   }
   // Must be native-compiled frame, i.e. the marshaling code for native
   // methods that exists in the core system.
-  return frame(sender_sp(), link(), sender_pc());
+  return frame(map->thread(), sender_sp(), link(), sender_pc());
 }
 
 
diff --git a/src/cpu/x86/vm/frame_x86.hpp b/src/cpu/x86/vm/frame_x86.hpp
--- a/src/cpu/x86/vm/frame_x86.hpp
+++ b/src/cpu/x86/vm/frame_x86.hpp
@@ -181,11 +181,11 @@
  public:
   // Constructors
 
-  frame(intptr_t* sp, intptr_t* fp, address pc);
+  frame(Thread* thread, intptr_t* sp, intptr_t* fp, address pc);
 
-  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
+  frame(Thread* thread, intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
 
-  frame(intptr_t* sp, intptr_t* fp);
+  frame(Thread* thread, intptr_t* sp, intptr_t* fp);
 
   // accessors for the instance variables
   // Note: not necessarily the real 'frame pointer' (see real_fp)
diff --git a/src/cpu/x86/vm/frame_x86.inline.hpp b/src/cpu/x86/vm/frame_x86.inline.hpp
--- a/src/cpu/x86/vm/frame_x86.inline.hpp
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp
@@ -40,7 +40,11 @@
   _deopt_state = unknown;
 }
 
-inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+inline frame::frame(Thread* thread, intptr_t* sp, intptr_t* fp, address pc) {
+  if (thread != NULL && thread->is_Java_thread() && SharedRuntime::is_memento_stack_trace_return_handler(pc)) {
+    pc = ((JavaThread*) thread)->memento_original_return_address();
+  }
+
   _sp = sp;
   _unextended_sp = sp;
   _fp = fp;
@@ -56,9 +60,15 @@
   } else {
     _deopt_state = not_deoptimized;
   }
+
+  assert(!SharedRuntime::is_memento_stack_trace_return_handler(_pc), "original return address not resolvable");
 }
 
-inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
+inline frame::frame(Thread* thread, intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
+  if (thread != NULL && thread->is_Java_thread() && SharedRuntime::is_memento_stack_trace_return_handler(pc)) {
+    pc = ((JavaThread*) thread)->memento_original_return_address();
+  }
+
   _sp = sp;
   _unextended_sp = unextended_sp;
   _fp = fp;
@@ -75,14 +85,20 @@
   } else {
     _deopt_state = not_deoptimized;
   }
+
+  assert(!SharedRuntime::is_memento_stack_trace_return_handler(_pc), "original return address not resolvable");
 }
 
-inline frame::frame(intptr_t* sp, intptr_t* fp) {
+inline frame::frame(Thread* thread, intptr_t* sp, intptr_t* fp) {
   _sp = sp;
   _unextended_sp = sp;
   _fp = fp;
   _pc = (address)(sp[-1]);
 
+  if (thread != NULL && thread->is_Java_thread() && SharedRuntime::is_memento_stack_trace_return_handler(_pc)) {
+    _pc = ((JavaThread*) thread)->memento_original_return_address();
+  }
+
   // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
   // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
   // unlucky the junk value could be to a zombied method and we'll die on the
@@ -104,6 +120,8 @@
   } else {
     _deopt_state = not_deoptimized;
   }
+
+  assert(!SharedRuntime::is_memento_stack_trace_return_handler(_pc), "original return address not resolvable");
 }
 
 // Accessors
@@ -312,4 +330,44 @@
   *result_adr = obj;
 }
 
+inline address* frame::raw_sender_pc_addr() {
+  address* sender_pc;
+
+  if (is_interpreted_frame()) {
+    sender_pc = sender_pc_addr();
+    assert(interpreter_frame_sender_sp() > (intptr_t*) sender_pc, "sender_sp should be below return address");
+  } else {
+    assert(_cb != NULL, "code blob is required");
+    assert(is_compiled_frame() || is_native_frame() || is_stub_frame(), "unexpected frame type");
+
+    // frame owned by optimizing compiler
+    int frame_size = _cb->frame_size();
+    assert(frame_size > 0, "must have non-zero frame size");
+    intptr_t* sender_sp = unextended_sp() + frame_size;
+
+    // On Intel the return_address is always the word on the stack
+    sender_pc = (address*) sender_sp-1;
+  }
+  assert(CodeCache::contains(*sender_pc), "must be in code cache");
+
+  return sender_pc;
+}
+
+inline void frame::memento_mark(Thread* thread) {
+  address& original_return_address = thread->memento_original_return_address();
+  assert(original_return_address == NULL, "only 1 frame can be patched per thread");
+
+  address* sender_pc = raw_sender_pc_addr();
+  original_return_address = *sender_pc;
+  *sender_pc = SharedRuntime::get_memento_stack_trace_return_handler();
+}
+
+inline bool frame::is_memento_marked(Thread* thread) {
+  bool memento_marked = *raw_sender_pc_addr() == SharedRuntime::get_memento_stack_trace_return_handler();
+  if (memento_marked) {
+    assert(thread->memento_original_return_address() != NULL, "original return address must be set if frame is patched");
+  }
+  return memento_marked;
+}
+
 #endif // CPU_X86_VM_FRAME_X86_INLINE_HPP
diff --git a/src/cpu/x86/vm/macroAssembler_x86.cpp b/src/cpu/x86/vm/macroAssembler_x86.cpp
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp
@@ -2033,7 +2033,7 @@
     cmpxchgptr(tmpReg, Address(objReg, 0));   // Uses RAX which is box
     bind(DONE_LABEL);
   } else {
-    Label DONE_LABEL, Stacked, CheckSucc;
+    Label DONE_LABEL, Stacked;
 
     // Critically, the biased locking test must have precedence over
     // and appear before the (box->dhw == 0) recursive stack-lock test.
@@ -2227,6 +2227,8 @@
        bind (CheckSucc);
     }
 #else // _LP64
+    Label LGoSlowPath;
+
     // It's inflated
     movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
     xorptr(boxReg, r15_thread);
@@ -2234,50 +2236,23 @@
     jccb  (Assembler::notZero, DONE_LABEL);
     movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2));
     orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2));
-    jccb  (Assembler::notZero, CheckSucc);
+    orptr (boxReg, Address (tmpReg, ObjectMonitor::trace_exit_stack_offset_in_bytes()-2));
+    jccb  (Assembler::notZero, LGoSlowPath);
     movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
     jmpb  (DONE_LABEL);
 
-    if ((EmitSync & 65536) == 0) {
-      Label LSuccess, LGoSlowPath ;
-      bind  (CheckSucc);
-      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
-      jccb  (Assembler::zero, LGoSlowPath);
-
-      // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
-      // the explicit ST;MEMBAR combination, but masm doesn't currently support
-      // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
-      // are all faster when the write buffer is populated.
-      movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD);
-      if (os::is_MP()) {
-         lock (); addl (Address(rsp, 0), 0);
-      }
-      cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD);
-      jccb  (Assembler::notZero, LSuccess);
-
-      movptr (boxReg, (int32_t)NULL_WORD);                   // box is really EAX
-      if (os::is_MP()) { lock(); }
-      cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
-      jccb  (Assembler::notEqual, LSuccess);
-      // Intentional fall-through into slow-path
-
-      bind  (LGoSlowPath);
-      orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
-      jmpb  (DONE_LABEL);
-
-      bind  (LSuccess);
-      testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
-      jmpb  (DONE_LABEL);
-    }
+    // We had a fast path here for when _succ was set, but event tracing
+    // requires always generating an event when there are any threads
+    // blocked on the monitor. Hence we enter the slow path.
+    bind  (LGoSlowPath);
+    orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
+    jmpb  (DONE_LABEL);
 
     bind  (Stacked);
     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
     if (os::is_MP()) { lock(); }
     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
 
-    if (EmitSync & 65536) {
-       bind (CheckSucc);
-    }
 #endif
     bind(DONE_LABEL);
     // Avoid branch to branch on AMD processors
diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp
--- a/src/cpu/x86/vm/methodHandles_x86.cpp
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp
@@ -523,9 +523,9 @@
       // Robust search of trace_calling_frame (independant of inlining).
       // Assumes saved_regs comes from a pusha in the trace_calling_frame.
       assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
-      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
+      frame trace_calling_frame = os::get_sender_for_C_frame(p, &cur_frame);
       while (trace_calling_frame.fp() < saved_regs) {
-        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
+        trace_calling_frame = os::get_sender_for_C_frame(p, &trace_calling_frame);
       }
 
       // safely create a frame and call frame::describe
@@ -541,7 +541,7 @@
         // modified to support the current or future non walkable
         // frames (but this is more intrusive and is not considered as
         // part of this RFE, which will instead use a simpler output).
-        frame dump_frame = frame(dump_sp, dump_fp);
+        frame dump_frame = frame(p, dump_sp, dump_fp);
         dump_frame.describe(values, 1);
       } else {
         // Stack may not be walkable (invalid PC above FP):
diff --git a/src/cpu/x86/vm/runtime_x86_64.cpp b/src/cpu/x86/vm/runtime_x86_64.cpp
--- a/src/cpu/x86/vm/runtime_x86_64.cpp
+++ b/src/cpu/x86/vm/runtime_x86_64.cpp
@@ -38,9 +38,49 @@
 #include "vmreg_x86.inline.hpp"
 #endif
 
+#define __ masm->
+
+#ifdef COMPILER2
+
+// A wrapper for complete_monitor_unlocking_C() which sets last_Java_sp and _pc
+// to reasonable values so stack walks become possible.
+void OptoRuntime::generate_complete_monitor_unlocking_wrapper() {
+  // Allocate space for the code
+  ResourceMark rm;
+  // Setup code generation tools
+  CodeBuffer buffer("complete_monitor_unlocking_wrapper", 256, 128);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  address start = __ pc();
+
+  assert_different_registers(rscratch1, c_rarg0, c_rarg1); // don't touch arguments
+
+  __ lea(rscratch1, Address(rsp, wordSize));
+  __ movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), rscratch1);
+
+  __ movptr(rscratch1, Address(rsp, 0));
+  __ movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), rscratch1);
+
+  __ subptr(rsp, wordSize); // align stack for call
+  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
+  __ addptr(rsp, wordSize);
+
+  __ reset_last_Java_frame(false, true);
+
+  __ ret(0);
+
+  // Make sure all code is generated
+  masm->flush();
+
+  // Set exception blob
+  _complete_monitor_unlocking_wrapper = RuntimeStub::new_runtime_stub(
+      buffer.name(), &buffer, CodeOffsets::frame_never_safe, 2 * wordSize, NULL, false);
+}
 
 // This file should really contain the code for generating the OptoRuntime
 // exception_blob. However that code uses SimpleRuntimeFrame which only
 // exists in sharedRuntime_x86_64.cpp. When there is a sharedRuntime_<arch>.hpp
 // file and SimpleRuntimeFrame is able to move there then the exception_blob
 // code will move here where it belongs.
+
+#endif // COMPILER2
diff --git a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
@@ -3965,6 +3965,59 @@
   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
 }
 
+//
+// generate_memento_stack_trace_blob - trampoline for memento stack trace
+//
+void SharedRuntime::generate_memento_stack_trace_blob() {
+  // allocate space for the code
+  ResourceMark rm;
+
+  CodeBuffer buffer("memento_stack_trace_blob", 1000, 512);
+  MacroAssembler* masm = new MacroAssembler(&buffer);
+
+  // Entry point for function returns
+  int return_offset = __ offset();
+
+  __ movptr(rscratch1, Address(r15_thread, Thread::memento_original_return_address_offset()));
+  __ movptr(Address(r15_thread, Thread::memento_original_return_address_offset()), (int32_t) 0);
+  __ jmp(rscratch1);
+
+  // Exception handler entry point
+  int exception_offset = __ offset();
+
+  // return address of handler in rdx, overwrite with original return address
+  __ movptr(rdx, Address(r15_thread, Thread::memento_original_return_address_offset()));
+  __ movptr(Address(r15_thread, Thread::memento_original_return_address_offset()), (int32_t) 0);
+
+  // Inbetween activations - previous activation type unknown yet
+  // compute continuation point - the continuation point expects
+  // the following registers set up:
+  //
+  // rax: exception
+  // rdx: return address/pc that threw exception
+  // rsp: expression stack of caller
+  // rbp: rbp, of caller
+  __ push(rax);                                  // save exception
+  __ push(rdx);                                  // save return address
+  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), r15_thread, rdx);
+  __ mov(rbx, rax);                              // save exception handler
+  __ pop(rdx);                                   // restore return address
+  __ pop(rax);                                   // restore exception
+  // Note that an "issuing PC" is actually the next PC after the call
+  __ jmp(rbx);                                   // jump to exception handler of caller
+
+  // -------------
+  // make sure all code is generated
+  masm->flush();
+
+  // return the blob
+  // frame_size_words or bytes??
+  RuntimeStub* stub = RuntimeStub::new_runtime_stub("memento_stack_trace_blob", &buffer, return_offset, 0, NULL, true);
+
+  _memento_stack_trace_return_handler    = stub->code_begin() + return_offset;
+  _memento_stack_trace_exception_handler = stub->code_begin() + exception_offset;
+}
+
 
 #ifdef COMPILER2
 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
diff --git a/src/os/bsd/vm/osThread_bsd.hpp b/src/os/bsd/vm/osThread_bsd.hpp
--- a/src/os/bsd/vm/osThread_bsd.hpp
+++ b/src/os/bsd/vm/osThread_bsd.hpp
@@ -62,10 +62,8 @@
   sigset_t  caller_sigmask() const       { return _caller_sigmask; }
   void    set_caller_sigmask(sigset_t sigmask)  { _caller_sigmask = sigmask; }
 
-#ifndef PRODUCT
-  // Used for debugging, return a unique integer for each thread.
+  // unique integer for each thread
   intptr_t thread_identifier() const   { return (intptr_t)_pthread_id; }
-#endif
 
 #ifdef ASSERT
   // We expect no reposition failures so kill vm if we get one.
diff --git a/src/os/linux/vm/osThread_linux.hpp b/src/os/linux/vm/osThread_linux.hpp
--- a/src/os/linux/vm/osThread_linux.hpp
+++ b/src/os/linux/vm/osThread_linux.hpp
@@ -51,10 +51,9 @@
   sigset_t  caller_sigmask() const       { return _caller_sigmask; }
   void    set_caller_sigmask(sigset_t sigmask)  { _caller_sigmask = sigmask; }
 
-#ifndef PRODUCT
-  // Used for debugging, return a unique integer for each thread.
+  // unique integer for each thread
   int thread_identifier() const   { return _thread_id; }
-#endif
+
 #ifdef ASSERT
   // We expect no reposition failures so kill vm if we get one.
   //
diff --git a/src/os/linux/vm/os_linux.cpp b/src/os/linux/vm/os_linux.cpp
--- a/src/os/linux/vm/os_linux.cpp
+++ b/src/os/linux/vm/os_linux.cpp
@@ -61,6 +61,7 @@
 #include "services/attachListener.hpp"
 #include "services/memTracker.hpp"
 #include "services/runtimeService.hpp"
+#include "evtrace/traceEvents.hpp"
 #include "utilities/decoder.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/events.hpp"
@@ -4207,7 +4208,7 @@
 
   // For JSR166. Unpark even if interrupt status already was set
   if (thread->is_Java_thread())
-    ((JavaThread*)thread)->parker()->unpark();
+    ((JavaThread*)thread)->parker()->unpark((JavaThread*)thread);
 
   ParkEvent * ev = thread->_ParkEvent ;
   if (ev != NULL) ev->unpark() ;
@@ -5861,25 +5862,41 @@
   // Ideally we'd do something useful while spinning, such
   // as calling unpackTime().
 
+  Thread* thread = Thread::current();
+  assert(thread == AssociatedWith, "must be associated thread");
+  assert(thread->is_Java_thread(), "must be Java thread");
+  JavaThread *jt = (JavaThread *)thread;
+
+  if (EnableEventTracing && EnableEventTracingParkEvents && thread->park_priority() >= 0) {
+    TraceEvents::write_thread_park_begin(jt, isAbsolute, time);
+  }
+
+  // before ThreadBlockInVM below so the event is never written in a safepoint (destruction order)
+  TraceEventThreadParkEnd event(thread);
+
   // Optional fast-path check:
   // Return immediately if a permit is available.
   // We depend on Atomic::xchg() having full barrier semantics
   // since we are doing a lock-free update to _counter.
-  if (Atomic::xchg(0, &_counter) > 0) return;
-
-  Thread* thread = Thread::current();
-  assert(thread->is_Java_thread(), "Must be JavaThread");
-  JavaThread *jt = (JavaThread *)thread;
+  {
+    intptr_t unpark_seq = Atomic::xchg_ptr(0, &_counter);
+    if (unpark_seq > 0) {
+      event.fill(unpark_seq, TraceTypes::park_immediate_fast);
+      return;
+    }
+  }
 
   // Optional optimization -- avoid state transitions if there's an interrupt pending.
   // Check interrupt before trying to wait
   if (Thread::is_interrupted(thread, false)) {
+    event.fill(-1, TraceTypes::park_interrupted_fast);
     return;
   }
 
   // Next, demultiplex/decode time arguments
   timespec absTime;
   if (time < 0 || (isAbsolute && time == 0) ) { // don't wait at all
+    event.fill(-1, TraceTypes::park_no_wait_time);
     return;
   }
   if (time > 0) {
@@ -5897,12 +5914,18 @@
 
   // Don't wait if cannot get lock since interference arises from
   // unblocking.  Also. check interrupt before trying wait
-  if (Thread::is_interrupted(thread, false) || pthread_mutex_trylock(_mutex) != 0) {
+  if (Thread::is_interrupted(thread, false)) {
+    event.fill(-1, TraceTypes::park_interrupted_slow);
+    return;
+  }
+  if (pthread_mutex_trylock(_mutex) != 0) {
+    event.fill(-1, TraceTypes::park_locked);
     return;
   }
 
   int status ;
   if (_counter > 0)  { // no wait needed
+    event.fill(_counter, TraceTypes::park_immediate_slow);
     _counter = 0;
     status = pthread_mutex_unlock(_mutex);
     assert (status == 0, "invariant") ;
@@ -5945,6 +5968,20 @@
   pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
 #endif
 
+  TraceTypes::park_return_code return_code;
+  switch (status) {
+    case 0:
+      return_code = TraceTypes::park_normal;
+      break;
+    case ETIMEDOUT:
+      return_code = TraceTypes::park_timedout;
+      break;
+    default:
+      assert(false, "unexpected pthread_cond_(timed)wait return code");
+      return_code = TraceTypes::park_unknown;
+  }
+  event.fill(_counter, return_code);
+
   _counter = 0 ;
   status = pthread_mutex_unlock(_mutex) ;
   assert_status(status == 0, status, "invariant") ;
@@ -5958,12 +5995,20 @@
   }
 }
 
-void Parker::unpark() {
-  int s, status ;
+void Parker::unpark(JavaThread *thread) {
+  int status;
   status = pthread_mutex_lock(_mutex);
   assert (status == 0, "invariant") ;
-  s = _counter;
-  _counter = 1;
+  if (thread != AssociatedWith) {
+    status = pthread_mutex_unlock(_mutex);
+    assert (status == 0, "invariant") ;
+    return;
+  }
+  TraceEventThreadUnpark e(thread);
+  intptr_t s = e.seq();
+  assert(s > 0, "seq must be greater zero");
+  s = Atomic::xchg_ptr(s, &_counter);
+  e.set_chained_seq(s);
   if (s < 1) {
     // thread might be parked
     if (_cur_index != -1) {
@@ -5980,11 +6025,11 @@
         assert (status == 0, "invariant");
       }
     } else {
-      pthread_mutex_unlock(_mutex);
+      status = pthread_mutex_unlock(_mutex);
       assert (status == 0, "invariant") ;
     }
   } else {
-    pthread_mutex_unlock(_mutex);
+    status = pthread_mutex_unlock(_mutex);
     assert (status == 0, "invariant") ;
   }
 }
diff --git a/src/os/posix/vm/os_posix.cpp b/src/os/posix/vm/os_posix.cpp
--- a/src/os/posix/vm/os_posix.cpp
+++ b/src/os/posix/vm/os_posix.cpp
@@ -81,6 +81,7 @@
 
   int frame_idx = 0;
   int num_of_frames;  // number of frames captured
+  Thread* thread = Thread::current();
   frame fr = os::current_frame();
   while (fr.pc() && frame_idx < frames) {
     if (toSkip > 0) {
@@ -92,7 +93,7 @@
         ||fr.sender_pc() == NULL || fr.cb() != NULL) break;
 
     if (fr.sender_pc() && !os::is_first_C_frame(&fr)) {
-      fr = os::get_sender_for_C_frame(&fr);
+      fr = os::get_sender_for_C_frame(thread, &fr);
     } else {
       break;
     }
diff --git a/src/os/windows/vm/osThread_windows.hpp b/src/os/windows/vm/osThread_windows.hpp
--- a/src/os/windows/vm/osThread_windows.hpp
+++ b/src/os/windows/vm/osThread_windows.hpp
@@ -43,10 +43,9 @@
   HANDLE interrupt_event() const                   { return _interrupt_event; }
   void set_interrupt_event(HANDLE interrupt_event) { _interrupt_event = interrupt_event; }
 
-#ifndef PRODUCT
-  // Used for debugging, return a unique integer for each thread.
+  // unique integer for each thread
   int thread_identifier() const                    { return _thread_id; }
-#endif
+
 #ifdef ASSERT
   // We expect no reposition failures so kill vm if we get one
   //
diff --git a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp
--- a/src/os_cpu/linux_x86/vm/os_linux_x86.cpp
+++ b/src/os_cpu/linux_x86/vm/os_linux_x86.cpp
@@ -164,17 +164,17 @@
   return epc;
 }
 
-frame os::fetch_frame_from_context(void* ucVoid) {
+frame os::fetch_frame_from_context(Thread* thread, void* ucVoid) {
   intptr_t* sp;
   intptr_t* fp;
   ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
-  return frame(sp, fp, epc.pc());
+  return frame(thread, sp, fp, epc.pc());
 }
 
 // By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
 // turned off by -fomit-frame-pointer,
-frame os::get_sender_for_C_frame(frame* fr) {
-  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
+frame os::get_sender_for_C_frame(Thread* thread, frame* fr) {
+  return frame(thread, fr->sender_sp(), fr->link(), fr->sender_pc());
 }
 
 intptr_t* _get_previous_fp() {
@@ -192,15 +192,17 @@
 
 
 frame os::current_frame() {
+  Thread* thread = Thread::current();
   intptr_t* fp = _get_previous_fp();
-  frame myframe((intptr_t*)os::current_stack_pointer(),
+  frame myframe(thread,
+                (intptr_t*)os::current_stack_pointer(),
                 (intptr_t*)fp,
                 CAST_FROM_FN_PTR(address, os::current_frame));
   if (os::is_first_C_frame(&myframe)) {
     // stack is not walkable
     return frame();
   } else {
-    return os::get_sender_for_C_frame(&myframe);
+    return os::get_sender_for_C_frame(thread, &myframe);
   }
 }
 
diff --git a/src/os_cpu/linux_x86/vm/thread_linux_x86.cpp b/src/os_cpu/linux_x86/vm/thread_linux_x86.cpp
--- a/src/os_cpu/linux_x86/vm/thread_linux_x86.cpp
+++ b/src/os_cpu/linux_x86/vm/thread_linux_x86.cpp
@@ -65,11 +65,11 @@
       return false;
     }
 
-    frame ret_frame(ret_sp, ret_fp, addr.pc());
+    frame ret_frame(this, ret_sp, ret_fp, addr.pc());
     if (!ret_frame.safe_for_sender(jt)) {
 #ifdef COMPILER2
       // C2 uses ebp as a general register see if NULL fp helps
-      frame ret_frame2(ret_sp, NULL, addr.pc());
+      frame ret_frame2(this, ret_sp, NULL, addr.pc());
       if (!ret_frame2.safe_for_sender(jt)) {
         // nothing else to try if the frame isn't good
         return false;
diff --git a/src/os_cpu/linux_x86/vm/thread_linux_x86.hpp b/src/os_cpu/linux_x86/vm/thread_linux_x86.hpp
--- a/src/os_cpu/linux_x86/vm/thread_linux_x86.hpp
+++ b/src/os_cpu/linux_x86/vm/thread_linux_x86.hpp
@@ -33,10 +33,10 @@
   frame pd_last_frame() {
     assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
     if (_anchor.last_Java_pc() != NULL) {
-      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
+      return frame(this, _anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
     } else {
       // This will pick up pc from sp
-      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
+      return frame(this, _anchor.last_Java_sp(), _anchor.last_Java_fp());
     }
   }
 
diff --git a/src/share/vm/c1/c1_CodeStubs.hpp b/src/share/vm/c1/c1_CodeStubs.hpp
--- a/src/share/vm/c1/c1_CodeStubs.hpp
+++ b/src/share/vm/c1/c1_CodeStubs.hpp
@@ -302,12 +302,15 @@
  protected:
   LIR_Opr _obj_reg;
   LIR_Opr _lock_reg;
+  CodeEmitInfo* _info;
 
  public:
-  MonitorAccessStub(LIR_Opr obj_reg, LIR_Opr lock_reg) {
+  MonitorAccessStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info) {
     _obj_reg  = obj_reg;
     _lock_reg  = lock_reg;
+    _info = (info != NULL) ? new CodeEmitInfo(info) : NULL;
   }
+  virtual CodeEmitInfo* info() const             { return _info; }
 
 #ifndef PRODUCT
   virtual void print_name(outputStream* out) const { out->print("MonitorAccessStub"); }
@@ -316,14 +319,10 @@
 
 
 class MonitorEnterStub: public MonitorAccessStub {
- private:
-  CodeEmitInfo* _info;
-
  public:
   MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info);
 
   virtual void emit_code(LIR_Assembler* e);
-  virtual CodeEmitInfo* info() const             { return _info; }
   virtual void visit(LIR_OpVisitState* visitor) {
     visitor->do_input(_obj_reg);
     visitor->do_input(_lock_reg);
@@ -341,8 +340,8 @@
   int  _monitor_ix;
 
  public:
-  MonitorExitStub(LIR_Opr lock_reg, bool compute_lock, int monitor_ix)
-    : MonitorAccessStub(LIR_OprFact::illegalOpr, lock_reg),
+  MonitorExitStub(LIR_Opr lock_reg, bool compute_lock, int monitor_ix, CodeEmitInfo* info)
+    : MonitorAccessStub(LIR_OprFact::illegalOpr, lock_reg, info),
       _compute_lock(compute_lock), _monitor_ix(monitor_ix) { }
   virtual void emit_code(LIR_Assembler* e);
   virtual void visit(LIR_OpVisitState* visitor) {
@@ -352,6 +351,15 @@
     } else {
       visitor->do_input(_lock_reg);
     }
+    /* Do NOT call this, we don't need it to emit non-safepoint debug info.
+     * This makes the compiler assume there is a safepoint associated with
+     * this stub, but there's not. It causes C1 crashes in Eclipse and other
+     * OSGi applications.
+
+    if (_info != NULL) {
+      visitor->do_slow_case(_info);
+    }
+    */
   }
 #ifndef PRODUCT
   virtual void print_name(outputStream* out) const { out->print("MonitorExitStub"); }
diff --git a/src/share/vm/c1/c1_IR.cpp b/src/share/vm/c1/c1_IR.cpp
--- a/src/share/vm/c1/c1_IR.cpp
+++ b/src/share/vm/c1/c1_IR.cpp
@@ -218,6 +218,23 @@
 }
 
 
+void CodeEmitInfo::record_non_safepoint_debug_info(DebugInformationRecorder* recorder, int pc_offset) {
+  if (recorder->recording_non_safepoints()) {
+    recorder->add_non_safepoint(pc_offset);
+    record_scope_non_safepoint_debug_info(recorder, pc_offset, scope(), stack());
+    recorder->end_non_safepoint(pc_offset);
+  }
+}
+
+
+void CodeEmitInfo::record_scope_non_safepoint_debug_info(DebugInformationRecorder* recorder, int pc_offset, IRScope *scope, ValueStack *state) {
+  if (scope->caller() != NULL) {
+    record_scope_non_safepoint_debug_info(recorder, pc_offset, scope->caller(), state->caller_state());
+  }
+  recorder->describe_scope(pc_offset, scope->method(), state->bci(), false);
+}
+
+
 void CodeEmitInfo::add_register_oop(LIR_Opr opr) {
   assert(_oop_map != NULL, "oop map must already exist");
   assert(opr->is_single_cpu(), "should not call otherwise");
diff --git a/src/share/vm/c1/c1_IR.hpp b/src/share/vm/c1/c1_IR.hpp
--- a/src/share/vm/c1/c1_IR.hpp
+++ b/src/share/vm/c1/c1_IR.hpp
@@ -259,6 +259,8 @@
   FrameMap*     frame_map() const                { return scope()->compilation()->frame_map(); }
   Compilation*  compilation() const              { return scope()->compilation(); }
 
+  void record_scope_non_safepoint_debug_info(DebugInformationRecorder* recorder, int pc_offset, IRScope *scope, ValueStack *state);
+
  public:
 
   // use scope from ValueStack
@@ -277,6 +279,7 @@
 
   void add_register_oop(LIR_Opr opr);
   void record_debug_info(DebugInformationRecorder* recorder, int pc_offset);
+  void record_non_safepoint_debug_info(DebugInformationRecorder* recorder, int pc_offset);
 
   bool     is_method_handle_invoke() const { return _is_method_handle_invoke;     }
   void set_is_method_handle_invoke(bool x) {        _is_method_handle_invoke = x; }
diff --git a/src/share/vm/c1/c1_LIRAssembler.cpp b/src/share/vm/c1/c1_LIRAssembler.cpp
--- a/src/share/vm/c1/c1_LIRAssembler.cpp
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp
@@ -121,7 +121,8 @@
  , _frame_map(c->frame_map())
  , _current_block(NULL)
  , _pending_non_safepoint(NULL)
- , _pending_non_safepoint_offset(0)
+ , _pending_non_safepoint_begin_offset(0)
+ , _pending_non_safepoint_end_offset(0)
 {
   _slow_case_stubs = new CodeStubList();
 }
@@ -365,7 +366,7 @@
   if (src == NULL)  return;
   int pc_offset = code_offset();
   if (_pending_non_safepoint == src) {
-    _pending_non_safepoint_offset = pc_offset;
+    _pending_non_safepoint_end_offset = pc_offset;
     return;
   }
   ValueStack* vstack = debug_info(src);
@@ -373,10 +374,13 @@
   if (_pending_non_safepoint != NULL) {
     // Got some old debug info.  Get rid of it.
     if (debug_info(_pending_non_safepoint) == vstack) {
-      _pending_non_safepoint_offset = pc_offset;
+      _pending_non_safepoint_end_offset = pc_offset;
       return;
     }
-    if (_pending_non_safepoint_offset < pc_offset) {
+    if (_pending_non_safepoint_end_offset < pc_offset) {
+      record_non_safepoint_debug_info();
+    } else if (_pending_non_safepoint_begin_offset < pc_offset) {
+      _pending_non_safepoint_end_offset = pc_offset - 1;
       record_non_safepoint_debug_info();
     }
     _pending_non_safepoint = NULL;
@@ -384,7 +388,8 @@
   // Remember the debug info.
   if (pc_offset > compilation()->debug_info_recorder()->last_pc_offset()) {
     _pending_non_safepoint = src;
-    _pending_non_safepoint_offset = pc_offset;
+    _pending_non_safepoint_begin_offset = pc_offset;
+    _pending_non_safepoint_end_offset = pc_offset;
   }
 }
 
@@ -408,7 +413,7 @@
 }
 
 void LIR_Assembler::record_non_safepoint_debug_info() {
-  int         pc_offset = _pending_non_safepoint_offset;
+  int         pc_offset = _pending_non_safepoint_end_offset;
   ValueStack* vstack    = debug_info(_pending_non_safepoint);
   int         bci       = vstack->bci();
 
@@ -449,6 +454,11 @@
   append_code_stub(stub);
 }
 
+void LIR_Assembler::add_non_safepoint_debug_info_here(CodeEmitInfo* info) {
+  flush_debug_info(code_offset());
+  info->record_non_safepoint_debug_info(compilation()->debug_info_recorder(), code_offset());
+}
+
 void LIR_Assembler::emit_rtcall(LIR_OpRTCall* op) {
   rt_call(op->result_opr(), op->addr(), op->arguments(), op->tmp(), op->info());
 }
diff --git a/src/share/vm/c1/c1_LIRAssembler.hpp b/src/share/vm/c1/c1_LIRAssembler.hpp
--- a/src/share/vm/c1/c1_LIRAssembler.hpp
+++ b/src/share/vm/c1/c1_LIRAssembler.hpp
@@ -45,7 +45,8 @@
   BlockBegin*        _current_block;
 
   Instruction*       _pending_non_safepoint;
-  int                _pending_non_safepoint_offset;
+  int                _pending_non_safepoint_begin_offset;
+  int                _pending_non_safepoint_end_offset;
 
   Label              _unwind_handler_entry;
 
@@ -62,8 +63,13 @@
   // non-safepoint debug info management
   void flush_debug_info(int before_pc_offset) {
     if (_pending_non_safepoint != NULL) {
-      if (_pending_non_safepoint_offset < before_pc_offset)
+      if (_pending_non_safepoint_end_offset < before_pc_offset) {
         record_non_safepoint_debug_info();
+      } else if (_pending_non_safepoint_begin_offset < before_pc_offset) {
+        // stretch as far as possible before the conflict
+        _pending_non_safepoint_end_offset = before_pc_offset - 1;
+        record_non_safepoint_debug_info();
+      }
       _pending_non_safepoint = NULL;
     }
   }
@@ -146,6 +152,7 @@
   void emit_static_call_stub();
   void append_code_stub(CodeStub* op);
   void add_call_info_here(CodeEmitInfo* info)                              { add_call_info(code_offset(), info); }
+  void add_non_safepoint_debug_info_here(CodeEmitInfo* info);
 
   // code patterns
   int  emit_exception_handler();
diff --git a/src/share/vm/c1/c1_LIRGenerator.cpp b/src/share/vm/c1/c1_LIRGenerator.cpp
--- a/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp
@@ -434,7 +434,7 @@
         // all locals are dead on exit from the synthetic unlocker
         liveness.clear();
       } else {
-        assert(x->as_MonitorEnter() || x->as_ProfileInvoke(), "only other cases are MonitorEnter and ProfileInvoke");
+        assert(x->as_MonitorEnter() || x->as_MonitorExit() || x->as_ProfileInvoke(), "only other cases are MonitorEnter, MonitorExit and ProfileInvoke");
       }
     }
     if (!liveness.is_valid()) {
@@ -653,12 +653,12 @@
 }
 
 
-void LIRGenerator::monitor_exit(LIR_Opr object, LIR_Opr lock, LIR_Opr new_hdr, LIR_Opr scratch, int monitor_no) {
+void LIRGenerator::monitor_exit(LIR_Opr object, LIR_Opr lock, LIR_Opr new_hdr, LIR_Opr scratch, int monitor_no, CodeEmitInfo* info) {
   if (!GenerateSynchronizationCode) return;
   // setup registers
   LIR_Opr hdr = lock;
   lock = new_hdr;
-  CodeStub* slow_path = new MonitorExitStub(lock, UseFastLocking, monitor_no);
+  CodeStub* slow_path = new MonitorExitStub(lock, UseFastLocking, monitor_no, info);
   __ load_stack_address_monitor(monitor_no, lock);
   __ unlock_object(hdr, object, lock, scratch, slow_path);
 }
diff --git a/src/share/vm/c1/c1_LIRGenerator.hpp b/src/share/vm/c1/c1_LIRGenerator.hpp
--- a/src/share/vm/c1/c1_LIRGenerator.hpp
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp
@@ -325,7 +325,7 @@
   void logic_op   (Bytecodes::Code code, LIR_Opr dst_reg, LIR_Opr left, LIR_Opr right);
 
   void monitor_enter (LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no, CodeEmitInfo* info_for_exception, CodeEmitInfo* info);
-  void monitor_exit  (LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no);
+  void monitor_exit  (LIR_Opr object, LIR_Opr lock, LIR_Opr hdr, LIR_Opr scratch, int monitor_no, CodeEmitInfo* info);
 
   void new_instance    (LIR_Opr  dst, ciInstanceKlass* klass, bool is_unresolved, LIR_Opr  scratch1, LIR_Opr  scratch2, LIR_Opr  scratch3,  LIR_Opr scratch4, LIR_Opr  klass_reg, CodeEmitInfo* info);
 
diff --git a/src/share/vm/classfile/classLoaderData.cpp b/src/share/vm/classfile/classLoaderData.cpp
--- a/src/share/vm/classfile/classLoaderData.cpp
+++ b/src/share/vm/classfile/classLoaderData.cpp
@@ -64,6 +64,7 @@
 #include "utilities/growableArray.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/ostream.hpp"
+#include "evtrace/traceEvents.hpp"
 #if INCLUDE_TRACE
 #include "trace/tracing.hpp"
 #endif
@@ -306,6 +307,9 @@
   _unloading = true;
 
   // Tell serviceability tools these classes are unloading
+  if (EnableEventTracing) {
+    TraceManager::class_loader_is_unloading(this); // also writes an event
+  }
   classes_do(InstanceKlass::notify_unload_class);
 
   if (TraceClassLoaderData) {
diff --git a/src/share/vm/classfile/javaClasses.cpp b/src/share/vm/classfile/javaClasses.cpp
--- a/src/share/vm/classfile/javaClasses.cpp
+++ b/src/share/vm/classfile/javaClasses.cpp
@@ -50,6 +50,7 @@
 #include "runtime/safepoint.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/vframe.hpp"
+#include "evtrace/traceEvents.hpp"
 #include "utilities/preserveException.hpp"
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
@@ -937,6 +938,16 @@
   compute_optional_offset(_park_blocker_offset, k, vmSymbols::park_blocker_name(), vmSymbols::object_signature());
   compute_optional_offset(_park_event_offset, k, vmSymbols::park_event_name(),
  vmSymbols::long_signature());
+
+  int park_last_global_seq_offset_offset = 0;
+  compute_offset(park_last_global_seq_offset_offset, k, vmSymbols::park_last_global_seq_offset_name(), vmSymbols::long_signature());
+  k->java_mirror()->long_field_put(park_last_global_seq_offset_offset, in_bytes(Thread::park_last_global_seq_offset()));
+  int park_priority_offset_offset = 0;
+  compute_offset(park_priority_offset_offset, k, vmSymbols::park_priority_offset_name(), vmSymbols::long_signature());
+  k->java_mirror()->long_field_put(park_priority_offset_offset, in_bytes(Thread::park_priority_offset()));
+  int nesting_level_offset_offset = 0;
+  compute_offset(nesting_level_offset_offset, k, vmSymbols::nesting_level_offset_name(), vmSymbols::long_signature());
+  k->java_mirror()->long_field_put(nesting_level_offset_offset, in_bytes(Thread::nesting_level_offset()));
 }
 
 
diff --git a/src/share/vm/classfile/systemDictionary.cpp b/src/share/vm/classfile/systemDictionary.cpp
--- a/src/share/vm/classfile/systemDictionary.cpp
+++ b/src/share/vm/classfile/systemDictionary.cpp
@@ -496,10 +496,11 @@
   assert(calledholdinglock,"must hold lock for notify");
   assert((!(lockObject() == _system_loader_lock_obj) && !is_parallelCapable(lockObject)), "unexpected double_lock_wait");
   ObjectSynchronizer::notifyall(lockObject, THREAD);
-  intptr_t recursions =  ObjectSynchronizer::complete_exit(lockObject, THREAD);
+  intptr_t saved_recursions, saved_trace_exit_stack;
+  ObjectSynchronizer::complete_exit(lockObject, &saved_recursions, &saved_trace_exit_stack, THREAD);
   SystemDictionary_lock->wait();
   SystemDictionary_lock->unlock();
-  ObjectSynchronizer::reenter(lockObject, recursions, THREAD);
+  ObjectSynchronizer::reenter(lockObject, saved_recursions, saved_trace_exit_stack, THREAD);
   SystemDictionary_lock->lock();
 }
 
diff --git a/src/share/vm/classfile/vmSymbols.hpp b/src/share/vm/classfile/vmSymbols.hpp
--- a/src/share/vm/classfile/vmSymbols.hpp
+++ b/src/share/vm/classfile/vmSymbols.hpp
@@ -371,6 +371,9 @@
   template(exclusive_owner_thread_name,               "exclusiveOwnerThread")                     \
   template(park_blocker_name,                         "parkBlocker")                              \
   template(park_event_name,                           "nativeParkEventPointer")                   \
+  template(park_last_global_seq_offset_name,          "parkLastGlobalSeqOffset")                  \
+  template(park_priority_offset_name,                 "parkPriorityOffset")                       \
+  template(nesting_level_offset_name,                 "nestingLevelOffset")                       \
   template(cache_field_name,                          "cache")                                    \
   template(value_name,                                "value")                                    \
   template(offset_name,                               "offset")                                   \
@@ -581,6 +584,12 @@
   template(addThreadDumpForMonitors_signature,         "(Ljava/lang/management/ThreadInfo;[Ljava/lang/Object;[I)V") \
   template(addThreadDumpForSynchronizers_signature,    "(Ljava/lang/management/ThreadInfo;[Ljava/lang/Object;)V")   \
                                                                                                                   \
+  template(sun_evtracing_TraceReaderThread,            "sun/evtracing/TraceReaderThread")                         \
+  template(TraceReaderThread_constructor_signature,    "(Lsun/evtracing/TraceBufferQueue;Lsun/evtracing/TraceBufferQueue;)V") \
+  template(sun_evtracing_TraceBufferQueue,             "sun/evtracing/TraceBufferQueue")                          \
+  template(TraceBufferQueue_from_handle_method_name,   "fromHandle")                                              \
+  template(TraceBufferQueue_from_handle_method_signature, "(J)Lsun/evtracing/TraceBufferQueue;")                  \
+                                                                                                                  \
   /* JVMTI/java.lang.instrument support and VM Attach mechanism */                                                \
   template(sun_misc_VMSupport,                         "sun/misc/VMSupport")                                      \
   template(appendToClassPathForInstrumentation_name,   "appendToClassPathForInstrumentation")                     \
diff --git a/src/share/vm/code/debugInfoRec.cpp b/src/share/vm/code/debugInfoRec.cpp
--- a/src/share/vm/code/debugInfoRec.cpp
+++ b/src/share/vm/code/debugInfoRec.cpp
@@ -80,10 +80,10 @@
 };
 
 static inline bool compute_recording_non_safepoints() {
-  if (JvmtiExport::should_post_compiled_method_load()
+  if ((EnableEventTracing || JvmtiExport::should_post_compiled_method_load())
       && FLAG_IS_DEFAULT(DebugNonSafepoints)) {
     // The default value of this flag is taken to be true,
-    // if JVMTI is looking at nmethod codes.
+    // if event tracing is enabled or JVMTI is looking at nmethod codes.
     // We anticipate that JVMTI may wish to participate in profiling.
     return true;
   }
diff --git a/src/share/vm/code/nmethod.cpp b/src/share/vm/code/nmethod.cpp
--- a/src/share/vm/code/nmethod.cpp
+++ b/src/share/vm/code/nmethod.cpp
@@ -43,6 +43,7 @@
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
 #include "utilities/xmlstream.hpp"
+#include "evtrace/traceManager.hpp"
 #ifdef SHARK
 #include "shark/sharkCompiler.hpp"
 #endif
@@ -1687,6 +1688,11 @@
   }
 
   assert(_method != NULL && !is_unloaded(), "just checking");
+
+  if (EnableEventTracing) {
+    TraceManager::nmethod_is_unloading(this);
+  }
+
   DTRACE_METHOD_UNLOAD_PROBE(method());
 
   // If a JVMTI agent has enabled the CompiledMethodUnload event then
diff --git a/src/share/vm/evtrace/traceBuffer.hpp b/src/share/vm/evtrace/traceBuffer.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceBuffer.hpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEBUFFER_HPP
+#define SHARE_VM_EVTRACE_TRACEBUFFER_HPP
+
+#include "memory/allocation.hpp"
+
+class Thread;
+
+class TraceBuffer: public CHeapObj<mtEventTracing> {
+public:
+  TraceBuffer(size_t capacity);
+  virtual ~TraceBuffer() { }
+
+  void reset();
+  bool reserve(size_t nbytes);
+  size_t capacity() const;
+  size_t filled_size() const;
+  size_t remaining_capacity() const;
+
+  void* operator new(size_t size, size_t capacity) throw();
+
+  TraceBuffer  *queue_next;
+  u1           *top;
+  const u1     *end;
+  Thread       *owner;
+  s8            owner_id;
+  u1            data[0]; // follows here
+};
+
+#include "evtrace/traceBuffer.inline.hpp"
+
+#endif /* SHARE_VM_EVTRACE_TRACEBUFFER_HPP */
diff --git a/src/share/vm/evtrace/traceBuffer.inline.hpp b/src/share/vm/evtrace/traceBuffer.inline.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceBuffer.inline.hpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEBUFFER_INLINE_HPP
+#define SHARE_VM_EVTRACE_TRACEBUFFER_INLINE_HPP
+
+#include "runtime/thread.hpp"
+
+inline TraceBuffer::TraceBuffer(size_t capacity)
+  : end(data + capacity),
+    queue_next(NULL)
+{
+  reset();
+}
+
+inline void TraceBuffer::reset() {
+  top = data;
+  owner = NULL;
+  owner_id = 0;
+  DEBUG_ONLY(for (uint8_t *p = data; p != end; p++) *p = 0;)
+}
+
+inline bool TraceBuffer::reserve(size_t nbytes) {
+  assert(top >= data, "top before data area");
+  assert(top <= end, "top after data area");
+  assert(owner != NULL, "has no owner");
+  assert(Thread::current() == owner, "current thread is not the owner");
+
+  u1 *new_top = top + nbytes;
+  if (new_top > end) {
+    return false;
+  }
+  top = new_top;
+  return true;
+}
+
+inline void* TraceBuffer::operator new(size_t size, size_t capacity) throw () {
+  return CHeapObj<mtEventTracing>::operator new(size + capacity, CALLER_PC);
+}
+
+inline size_t TraceBuffer::capacity() const {
+  return (size_t) (end - data);
+}
+
+inline size_t TraceBuffer::filled_size() const {
+  return (size_t) (top - data);
+}
+
+inline size_t TraceBuffer::remaining_capacity() const {
+  return (size_t) (end - top);
+}
+
+#endif /* SHARE_VM_EVTRACE_TRACEBUFFER_INLINE_HPP */
diff --git a/src/share/vm/evtrace/traceBufferQueue.cpp b/src/share/vm/evtrace/traceBufferQueue.cpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceBufferQueue.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "evtrace/traceBufferQueue.hpp"
+
+#include "evtrace/traceBuffer.hpp"
+
+class TraceBufferQueue::SpinLocker: public StackObj {
+private:
+  TraceBufferQueue * const _queue;
+
+public:
+  SpinLocker(TraceBufferQueue *q, const char *s) : _queue(q) {
+    Thread::SpinAcquire(&_queue->_mtx, s);
+  }
+
+  ~SpinLocker() {
+    Thread::SpinRelease(&_queue->_mtx);
+  }
+};
+
+TraceBufferQueue::TraceBufferQueue()
+: _head(NULL),
+  _tail(NULL),
+  _mtx(0),
+  _enqueue_ops(0),
+  _dequeue_ops(0)
+{
+}
+
+TraceBufferQueue::~TraceBufferQueue() {
+}
+
+size_t TraceBufferQueue::count() {
+  SpinLocker sl(this, "TraceBufferQueue - count");
+  return (_enqueue_ops - _dequeue_ops);
+}
+
+TraceBuffer * TraceBufferQueue::try_dequeue() {
+  TraceBuffer *buffer = NULL;
+
+  {
+    SpinLocker sl(this, "TraceBufferQueue - try_dequeue");
+    if (_head != NULL) {
+      buffer = _head;
+      _head = _head->queue_next;
+      if (_head == NULL) {
+        _tail = NULL;
+      }
+      _dequeue_ops++;
+    }
+  }
+
+  if (buffer != NULL) {
+    buffer->queue_next = NULL;
+  }
+  return buffer;
+}
+
+void TraceBufferQueue::enqueue(TraceBuffer * const buffer) {
+  assert(buffer != NULL, "sanity");
+  assert(buffer->queue_next == NULL, "queue linkage");
+
+  SpinLocker sl(this, "TraceBufferQueue - enqueue");
+  if (_tail != NULL) {
+    _tail->queue_next = buffer;
+  } else {
+    _head = buffer;
+  }
+  _tail = buffer;
+  _enqueue_ops++;
+}
diff --git a/src/share/vm/evtrace/traceBufferQueue.hpp b/src/share/vm/evtrace/traceBufferQueue.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceBufferQueue.hpp
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEBUFFERQUEUE_HPP
+#define SHARE_VM_EVTRACE_TRACEBUFFERQUEUE_HPP
+
+#include "memory/allocation.hpp"
+
+class TraceBuffer;
+
+class TraceBufferQueue: public CHeapObj<mtEventTracing> {
+private:
+  volatile jlong _enqueue_ops;
+  volatile jlong _dequeue_ops;
+  volatile int _mtx;
+  TraceBuffer * volatile _head;
+  TraceBuffer * volatile _tail;
+
+  class SpinLocker;
+
+public:
+  TraceBufferQueue();
+  virtual ~TraceBufferQueue();
+
+  bool is_empty() { return (_head == NULL); }
+  size_t count();
+
+  void enqueue(TraceBuffer *buffer);
+  TraceBuffer * try_dequeue();
+
+  jlong fill_mark()   { return _dequeue_ops; }
+  bool has_dequeued_at_mark(jlong mark) { return (_dequeue_ops >= mark); }
+
+  jlong enqueue_ops() { return _enqueue_ops; }
+  jlong dequeue_ops() { return _dequeue_ops; }
+};
+
+#endif /* SHARE_VM_EVTRACE_TRACEBUFFERQUEUE_HPP */
diff --git a/src/share/vm/evtrace/traceEvents.cpp b/src/share/vm/evtrace/traceEvents.cpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceEvents.cpp
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "evtrace/traceEvents.hpp"
+
+#include "evtrace/traceWriter.hpp"
+#include "evtrace/traceManager.hpp"
+#include "evtrace/traceMetadata.hpp"
+
+#include "runtime/vframe.hpp"
+
+void TraceEvents::initialize() {
+  assert(_event_max <= UINT8_MAX, "event type does not fit");
+  assert(_park_return_code_max <= UINT8_MAX, "park return code does not fit");
+  assert(_safepoint_reason_max <= UINT8_MAX, "safepoint reason does not fit");
+  assert(_monitor_enter_wait_max <= UINT8_MAX, "monitor wait code does not fit");
+  assert(_monitor_entered_flags_max <= UINT8_MAX, "monitor entered flags do not fit");
+}
+
+inline bool TraceEvents::can_write() {
+  return TraceManager::is_initialized();
+}
+
+inline TraceTypes::timestamp TraceEvents::time_now() {
+  return TraceManager::metadata()->time_now();
+}
+
+inline TraceTypes::thread_id TraceEvents::thread_id_for(Thread* t) {
+  return TraceManager::metadata()->thread_id(t);
+}
+
+inline TraceTypes::object_id TraceEvents::object_id_for(oop obj) {
+  return TraceManager::metadata()->object_id(obj);
+}
+
+inline TraceTypes::objmonitor_id TraceEvents::objmonitor_id_for(ObjectMonitor* om) {
+  return TraceManager::metadata()->objmonitor_id(om);
+}
+
+inline TraceTypes::object_id TraceEvents::objmonitor_object_id_for(ObjectMonitor* om) {
+  return TraceManager::metadata()->objmonitor_object_id(om);
+}
+
+inline TraceTypes::classloader_id TraceEvents::classloader_id_for(ClassLoaderData* cld) {
+  return TraceManager::metadata()->classloader_id(cld);
+}
+
+inline TraceTypes::method_id TraceEvents::method_id_for(Method* m) {
+  bool added;
+  method_id mid = TraceManager::metadata()->method_id(m, added);
+  assert(!added, "must have been known");
+  return mid;
+}
+
+void TraceEvents::write_thread_start() {
+  if (!can_write())
+    return;
+
+  Thread *t = Thread::current();
+  assert(t != NULL, "thread not attached");
+
+  ResourceMark rm;
+  const char *name = NULL;
+  if (t->is_Java_thread()) {
+    name = ((JavaThread *) t)->get_thread_name();
+  } else if (t->is_Named_thread()) {
+    name = ((NamedThread *) t)->name();
+  }
+
+  const size_t name_nbytes = TraceWriter::nbytes_for_utf8str(name, 64);
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + name_nbytes);
+  wr.put_event_type(event_thread_start);
+  wr.put_timestamp(time_now());
+  wr.put_utf8str(name, name_nbytes);
+}
+
+void TraceEvents::write_thread_name_change(Thread *t) {
+  assert(t != NULL, "null thread");
+
+  if (!can_write())
+    return;
+
+  ResourceMark rm;
+  const char *name = NULL;
+  if (t->is_Java_thread()) {
+    name = ((JavaThread *) t)->get_thread_name();
+  }
+
+  const size_t name_nbytes = TraceWriter::nbytes_for_utf8str(name, 64);
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(thread_id) + name_nbytes);
+  wr.put_event_type(event_thread_name_change);
+  wr.put_timestamp(time_now());
+  wr.put_thread_id(thread_id_for(t));
+  wr.put_utf8str(name, name_nbytes);
+}
+
+void TraceEvents::write_thread_state_change(Thread *t) {
+  assert(t != NULL, "null thread");
+
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(thread_id) + sizeof(thread_state));
+  wr.put_event_type(event_thread_state_change);
+  wr.put_timestamp(time_now());
+  wr.put_thread_id(thread_id_for(t));
+  wr.put_thread_state(TraceManager::metadata()->thread_state(t));
+}
+
+void TraceEvents::write_thread_interrupt(Thread *t) {
+  assert(t != NULL, "null thread");
+
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(thread_id));
+  wr.put_event_type(event_thread_interrupt);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(TraceManager::metadata()->next_global_seq());
+  wr.put_thread_id(thread_id_for(t));
+}
+
+void TraceEvents::write_thread_exit() {
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp));
+  wr.put_event_type(event_thread_exit);
+  wr.put_timestamp(time_now());
+}
+
+void TraceEvents::write_thread_park_begin(JavaThread *t, bool is_absolute, timestamp park_time) {
+  assert(t != NULL, "null thread");
+
+  if (!can_write())
+    return;
+
+  Klass* klass = NULL;
+  object_id oid = 0;
+  {
+    oop blocker = t->current_park_blocker();
+    if (blocker != NULL) {
+      klass = blocker->klass();
+      oid = object_id_for(blocker);
+      assert(oid != 0, "just checking");
+    }
+  } // blocker oop may not be valid anymore after object_id_for()
+
+  // recheck necessary, because object_id_for can cause a safepoint check
+  if (!can_write())
+    return;
+
+  class_id cid = 0;
+  if (klass != NULL) {
+    cid = retrieve_class_id_or_write_metadata(klass);
+    assert(cid != 0, "just checking");
+  }
+
+  assert(t->has_last_Java_frame(), "must have");
+  stack_id stid = retrieve_stack_id_or_write_metadata(t);
+
+  int nesting_level = t->nesting_level();
+  if (nesting_level > UINT8_MAX) {
+    nesting_level = UINT8_MAX;
+  }
+
+  No_Safepoint_Verifier nsv(true, false);
+  seq_num seq = TraceManager::metadata()->next_global_seq();
+  t->set_park_last_global_seq(seq);
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(object_id) + sizeof(class_id) + sizeof(stack_id) + sizeof(u1) + sizeof(u1) + sizeof(timestamp));
+  wr.put_event_type(event_thread_park_begin);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(seq);
+  wr.put_object_id(oid);
+  wr.put_class_id(cid);
+  wr.put_stack_id(stid);
+  wr.put_u1(nesting_level);
+  wr.put_u1(is_absolute);
+  wr.put_timestamp(park_time);
+}
+
+void TraceEvents::write_thread_park_end(Thread *t, seq_num seq, seq_num unpark_seq, park_return_code return_code) {
+  assert(t != NULL, "null thread");
+  assert(return_code > _park_return_code_min && return_code < _park_return_code_max, "invalid return_code");
+
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(seq_num) + sizeof(u1));
+  wr.put_event_type(event_thread_park_end);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(seq);
+  wr.put_seq_num(unpark_seq);
+  wr.put_u1(return_code);
+}
+
+void TraceEvents::write_thread_unpark(thread_id thread, seq_num seq, seq_num chained_seq) {
+  if (!can_write())
+    return;
+
+  stack_id stid = 0;
+  Thread *t = Thread::current();
+  if (t->is_Java_thread()) {
+    JavaThread *jt = (JavaThread*) t;
+    assert(jt->has_last_Java_frame(), "must have");
+    stid = retrieve_stack_id_or_write_metadata(jt);
+  }
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(seq_num) + sizeof(thread_id) + sizeof(stack_id));
+  wr.put_event_type(event_thread_unpark);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(seq);
+  wr.put_seq_num(chained_seq);
+  wr.put_thread_id(thread);
+  wr.put_stack_id(stid);
+}
+
+void TraceEvents::write_monitor_inflate(ObjectMonitor* m, seq_num seq) {
+  if (!can_write())
+    return;
+
+  class_id cid = retrieve_class_id_or_write_metadata(((oop)m->object())->klass());
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(objmonitor_id) + sizeof(object_id) + sizeof(class_id));
+  wr.put_event_type(event_monitor_inflate);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(seq);
+  wr.put_objmonitor_id(objmonitor_id_for(m));
+  wr.put_object_id(objmonitor_object_id_for(m));
+  wr.put_class_id(cid);
+}
+
+void TraceEvents::write_monitor_deflate(ObjectMonitor* m) {
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(objmonitor_id));
+  wr.put_event_type(event_monitor_deflate);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(m->next_trace_seq());
+  wr.put_objmonitor_id(objmonitor_id_for(m));
+}
+
+TraceTypes::class_id TraceEvents::retrieve_class_id_or_write_metadata(Klass *klass) {
+  assert(can_write(), "caller's responsibility");
+
+  bool is_new;
+  class_id id = TraceManager::metadata()->class_id(klass, is_new);
+  if (is_new) {
+    ResourceMark rm;
+    const char *name = klass->name()->as_utf8();
+
+    const size_t name_nbytes = TraceWriter::nbytes_for_utf8str(name, 256);
+    TraceWriter wr(sizeof(event_type) + sizeof(class_id) + sizeof(classloader_id) + name_nbytes);
+    wr.put_event_type(event_class_metadata);
+    wr.put_class_id(id);
+    wr.put_classloader_id(classloader_id_for(klass->class_loader_data()));
+    wr.put_utf8str(name, name_nbytes);
+  }
+  return id;
+}
+
+TraceTypes::method_id TraceEvents::retrieve_method_id_or_write_metadata(Method *method) {
+  assert(can_write(), "caller's responsibility");
+
+  bool is_new;
+  method_id id = TraceManager::metadata()->method_id(method, is_new);
+  if (is_new) {
+    class_id holder_id = retrieve_class_id_or_write_metadata(method->method_holder());
+
+    ResourceMark rm;
+    const char *name = method->name()->as_utf8(),
+               *sig  = method->signature()->as_utf8();
+
+    const size_t name_nbytes = TraceWriter::nbytes_for_utf8str(name, 256),
+                 sig_nbytes  = TraceWriter::nbytes_for_utf8str(sig, 1024);
+    TraceWriter wr(sizeof(event_type) + sizeof(method_id) + sizeof(class_id) + name_nbytes + sig_nbytes);
+    wr.put_event_type(event_method_metadata);
+    wr.put_method_id(id);
+    wr.put_class_id(holder_id);
+    wr.put_utf8str(name, name_nbytes);
+    wr.put_utf8str(sig, sig_nbytes);
+  }
+  return id;
+}
+
+TraceTypes::stack_id TraceEvents::retrieve_stack_id_or_write_metadata(JavaThread *t, stack_id preallocated_id) {
+  class StatsUpdate : StackObj {
+  public:
+    bool  truncated;
+    jlong frames, reused_memento_frames;
+    StatsUpdate() : truncated(false), frames(0), reused_memento_frames(0) {}
+    ~StatsUpdate() {
+      TraceManager::update_stack_trace_stats(truncated, frames, reused_memento_frames);
+    }
+  };
+
+  assert(can_write(), "caller's responsibility");
+
+  if (!EnableEventTracingStackTraces) {
+    assert(preallocated_id == 0, "no preallocation allowed");
+    return (TraceTypes::stack_id) 0;
+  }
+
+  StatsUpdate stats;
+
+  bool have_memento = (t->memento_original_return_address() != NULL);
+  const CachedTraceStack *memento_stack = NULL;
+  if (have_memento) {
+    // we have patched a stack frame earlier and it has not returned yet
+    memento_stack = t->memento_stack_trace();
+    // NOTE: after metadata has been purged, memento stack trace is NULL (safe to ignore)
+  }
+
+  TraceStackBuilder sb;
+  bool new_memento = false;
+
+  RegisterMap rm(t, false);
+  frame fr = t->last_frame();
+  int index = -1;
+  while (!fr.is_first_frame() && !sb.is_full()) {
+    if ((fr.cb() != NULL && fr.cb()->is_nmethod()) || fr.is_interpreted_frame()) {
+      index++;
+      sb.add_frame(&fr);
+
+      if (memento_stack != NULL) {
+        // if the memento stack trace is truncated, we can't use it if we have fewer
+        // frames above the patched frame or we'll be missing frames at the bottom
+        if (!memento_stack->is_truncated() || index >= EventTracingStackMementoFrame) {
+          if (fr.is_memento_marked(t)) {
+            break;
+          }
+        }
+      } else if (!have_memento && index == EventTracingStackMementoFrame) {
+        fr.memento_mark(t);
+        new_memento = true;
+      }
+    }
+    fr = fr.sender(&rm);
+  }
+
+  if (!fr.is_first_frame() && sb.is_full()) {
+    sb.set_truncated();
+  }
+
+  CompositeTraceStack cps(sb);
+
+  if (memento_stack != NULL && !fr.is_first_frame()) {
+    // reached memento frame
+
+#ifdef ASSERT
+    // another stack walk for validation
+    TraceStackBuilder sb0;
+    RegisterMap rm0(t, false);
+    frame fr0 = t->last_frame();
+    while (!fr0.is_first_frame() && !sb0.is_full()) {
+      if (fr0.is_interpreted_frame() || fr0.is_compiled_frame() || fr0.is_native_frame()) {
+        sb0.add_frame(&fr0);
+      }
+      fr0 = fr0.sender(&rm0);
+    }
+    if (!fr0.is_first_frame()) {
+      assert(sb0.is_full(), "incomplete stack walk");
+      sb0.set_truncated();
+    }
+    CompositeTraceStack cps0(sb0);
+#endif
+
+    if (index == EventTracingStackMementoFrame && sb.range_equals(0, memento_stack, 0, index + 1)) {
+      // top frames are equal, exact match
+      assert(cps0.equals(memento_stack), "sanity");
+      stats.frames = memento_stack->count();
+      stats.reused_memento_frames = stats.frames - sb.count(); // still had to walk the top frames
+      stats.truncated = memento_stack->is_truncated();
+      return memento_stack->id();
+    }
+
+    // complete with frames from cache
+    cps.set_bottom(memento_stack, EventTracingStackMementoFrame + 1);
+    stats.reused_memento_frames = cps.count() - sb.count();
+
+    assert(cps.equals(cps0), "sanity");
+  }
+
+  stats.frames = cps.count();
+  stats.truncated = cps.is_truncated();
+
+  bool is_known;
+  const CachedTraceStack *cached = TraceManager::metadata()->get_or_try_add_stack(cps, is_known, preallocated_id);
+  TraceTypes::stack_id id;
+  if (cached != NULL) {
+    id = cached->id();
+    assert(is_known || preallocated_id == 0 || cached->id() == preallocated_id, "sanity");
+  } else {
+    // insert failed (full)
+    id = preallocated_id;
+    if (id == 0) {
+      // allocate a one-time id
+      // FIXME: wasteful, since get_or_try_add_stack() likely also allocated an id and threw it away
+      id = TraceManager::metadata()->next_stack_id();
+    }
+  }
+  if (!is_known) {
+    write_stack_metadata(id, cps);
+  } else if (preallocated_id != 0) {
+    write_identical_stacks_metadata(preallocated_id, cached->id());
+  }
+  if (new_memento) {
+    t->set_memento_stack_trace(cached);
+  }
+  return id;
+}
+
+void TraceEvents::write_stack_metadata(stack_id id, const CompositeTraceStack &ts) {
+  assert(can_write(), "caller's responsibility");
+
+  // TODO: with heavy inlining, stack traces can become so large
+  // that they no longer fit in a single trace buffer
+
+  size_t count = 0;
+
+  // make methods known
+  TraceStackVframeIterator it(ts);
+  while (!it.has_next()) {
+    it.next();
+    retrieve_class_id_or_write_metadata(it.method()->method_holder());
+    retrieve_method_id_or_write_metadata(it.method());
+    count++;
+  }
+  it.reset();
+
+  TraceWriter wr(sizeof(event_type) + sizeof(stack_id) + sizeof(u1) + sizeof(u2) + count * (sizeof(method_id) + sizeof(method_bci)));
+  wr.put_event_type(event_stack_metadata);
+  wr.put_stack_id(id);
+  wr.put_u1(ts.is_truncated() ? 0 : 1);
+  assert(count == (u2) count, "must fit");
+  wr.put_u2(count);
+  while (!it.has_next()) {
+    it.next();
+    wr.put_method_id(method_id_for(it.method()));
+    wr.put_method_bci(it.bci());
+  }
+}
+
+void TraceEvents::write_identical_stacks_metadata(stack_id id, stack_id known) {
+  assert(can_write(), "caller's responsibility");
+
+  TraceWriter wr(sizeof(event_type) + sizeof(stack_id) + sizeof(stack_id));
+  wr.put_event_type(event_identical_stacks_metadata);
+  wr.put_stack_id(id);
+  wr.put_stack_id(known);
+}
+
+void TraceEvents::write_class_loader_unload(ClassLoaderData* cld) {
+  if(!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(classloader_id));
+  wr.put_event_type(event_class_loader_unload);
+  wr.put_classloader_id(classloader_id_for(cld));
+}
+
+void TraceEvents::write_monitor_contended_enter(ObjectMonitor *m, monitor_enter_wait wait) {
+  if (!can_write())
+    return;
+
+  stack_id stid = 0;
+  if (JavaThread::current()->has_last_Java_frame()) {
+    stid = retrieve_stack_id_or_write_metadata(JavaThread::current());
+  }
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(objmonitor_id) + sizeof(stack_id) + sizeof(u1));
+  wr.put_event_type(event_monitor_contended_enter);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(m->next_trace_seq());
+  wr.put_objmonitor_id(objmonitor_id_for(m));
+  wr.put_stack_id(stid);
+  wr.put_u1(wait);
+}
+
+void TraceEvents::write_monitor_contended_entered(ObjectMonitor *m, monitor_entered_flags flags) {
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(objmonitor_id) + sizeof(u1));
+  wr.put_event_type(event_monitor_contended_entered);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(m->next_trace_seq());
+  wr.put_objmonitor_id(objmonitor_id_for(m));
+  wr.put_u1(flags);
+}
+
+void TraceEvents::write_monitor_contended_exited(ObjectMonitor *m, seq_num seq, stack_id preallocated_stack_id, bool resolve_stack) {
+  if (!can_write())
+    return;
+
+  TraceTypes::stack_id stid = preallocated_stack_id;
+  if (resolve_stack) {
+    if (JavaThread::current()->has_last_Java_frame()) {
+      stid = retrieve_stack_id_or_write_metadata(JavaThread::current(), preallocated_stack_id);
+    } else if (preallocated_stack_id != 0) {
+      // we preallocated a stack id, so we must resolve it to the unknown stack
+      write_identical_stacks_metadata(preallocated_stack_id, 0);
+    }
+  }
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(objmonitor_id) + sizeof(stack_id));
+  wr.put_event_type(event_monitor_contended_exited);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(seq);
+  wr.put_objmonitor_id(objmonitor_id_for(m));
+  wr.put_stack_id(stid);
+}
+
+void TraceEvents::write_monitor_dummy(ObjectMonitor *m, seq_num seq) {
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(seq_num) + sizeof(objmonitor_id));
+  wr.put_event_type(event_monitor_dummy);
+  wr.put_seq_num(seq);
+  wr.put_objmonitor_id(objmonitor_id_for(m));
+}
+
+void TraceEvents::write_safepoint_begin(safepoint_reason reason) {
+  assert(reason > _safepoint_reason_min && reason < _safepoint_reason_max, "invalid reason");
+
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(u1));
+  wr.put_event_type(event_safepoint_begin);
+  wr.put_timestamp(time_now());
+  wr.put_u1((u1) reason);
+}
+
+void TraceEvents::write_safepoint_end(u4 vmops_processed) {
+  if (!can_write())
+    return;
+
+  if (vmops_processed > UINT8_MAX)
+    vmops_processed = UINT8_MAX;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(u1));
+  wr.put_event_type(event_safepoint_end);
+  wr.put_timestamp(time_now());
+  wr.put_u1((u1) vmops_processed);
+}
+
+void TraceEvents::write_vm_end() {
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp));
+  wr.put_event_type(event_vm_end);
+  wr.put_timestamp(time_now());
+}
+
+void TraceEvents::write_metadata_reset() {
+  if (!can_write())
+    return;
+
+  TraceWriter wr(sizeof(event_type));
+  wr.put_event_type(event_metadata_reset);
+}
+
+void TraceEvents::write_group(JavaThread* t, seq_num park_global_seq_begin_ref, oop source) {
+  assert(t != NULL, "null thread");
+  assert(source != NULL, "null source");
+
+  if (!can_write())
+    return;
+
+  Klass* klass = source->klass();
+  object_id oid = object_id_for(source);
+  source = NULL; // source oop may not be valid anymore after object_id_for()
+  assert(oid != 0, "just checking");
+
+  // recheck necessary, because object_id_for() can cause a safepoint check
+  if (!can_write())
+    return;
+
+  class_id cid = retrieve_class_id_or_write_metadata(klass);
+  assert(cid != 0, "just checking");
+
+  TraceWriter wr(sizeof(event_type) + sizeof(timestamp) + sizeof(seq_num) + sizeof(seq_num) + sizeof(object_id) + sizeof(class_id));
+  wr.put_event_type(event_group);
+  wr.put_timestamp(time_now());
+  wr.put_seq_num(TraceManager::metadata()->next_global_seq());
+  wr.put_seq_num(park_global_seq_begin_ref);
+  wr.put_object_id(oid);
+  wr.put_class_id(cid);
+}
+
+void TraceEventThreadParkEnd::do_write() {
+  TraceEvents::write_thread_park_end(_thread, _seq, _unpark_seq, _return_code);
+}
+
+void TraceEventThreadUnpark::do_write() {
+  assert(_chained_seq != -1, "chained_seq not set");
+  TraceEvents::write_thread_unpark(_thread_id, _seq, _chained_seq);
+}
diff --git a/src/share/vm/evtrace/traceEvents.hpp b/src/share/vm/evtrace/traceEvents.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceEvents.hpp
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEEVENTS_HPP
+#define SHARE_VM_EVTRACE_TRACEEVENTS_HPP
+
+#include "evtrace/traceManager.hpp"
+#include "evtrace/traceMetadata.hpp"
+
+#include "memory/allocation.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/thread.hpp"
+
+class TraceWriter;
+class Klass;
+class ClassLoaderData;
+
+class TraceEvents: private TraceTypes // for mere convenience
+{
+public:
+  static void initialize();
+
+  static void write_thread_start();
+  static void write_thread_name_change(Thread *t);
+  static void write_thread_state_change(Thread *t);
+  static void write_thread_interrupt(Thread *t);
+  static void write_thread_exit();
+
+  static void write_thread_park_begin(JavaThread *t, bool is_absolute, timestamp park_time);
+  static void write_thread_park_end(Thread *t, seq_num seq, seq_num unpark_seq, park_return_code return_code);
+  static void write_thread_unpark(thread_id thread, seq_num seq, seq_num chained_seq);
+
+  static void write_monitor_inflate(ObjectMonitor *m, seq_num seq);
+  static void write_monitor_deflate(ObjectMonitor *m);
+  static void write_monitor_contended_enter(ObjectMonitor *m, monitor_enter_wait wait);
+  static void write_monitor_contended_entered(ObjectMonitor *m, monitor_entered_flags flags);
+  static void write_monitor_contended_exited(ObjectMonitor *m, seq_num seq, stack_id preallocated_stack_id, bool resolve_stack);
+  static void write_monitor_dummy(ObjectMonitor *m, seq_num seq);
+
+  static void write_class_loader_unload(ClassLoaderData *cld);
+
+  static void write_safepoint_begin(safepoint_reason reason);
+  static void write_safepoint_end(u4 vmops_processed);
+
+  static void write_vm_end();
+
+  static void write_metadata_reset();
+
+  static void write_group(JavaThread* t, seq_num park_seq_begin_ref, oop source);
+
+private:
+  TraceEvents() { }
+
+  static bool can_write();
+
+  static class_id  retrieve_class_id_or_write_metadata(Klass *k);
+  static method_id retrieve_method_id_or_write_metadata(Method *m);
+  static stack_id  retrieve_stack_id_or_write_metadata(JavaThread *t, stack_id preallocated_id = 0);
+  static void      write_stack_metadata(stack_id id, const CompositeTraceStack &ts);
+  static void      write_identical_stacks_metadata(stack_id id, stack_id known);
+
+  static timestamp        time_now();
+  static thread_id        thread_id_for(Thread *t);
+  static object_id        object_id_for(oop obj);
+  static objmonitor_id    objmonitor_id_for(ObjectMonitor *om);
+  static object_id        objmonitor_object_id_for(ObjectMonitor *om);
+  static classloader_id   classloader_id_for(ClassLoaderData *cld);
+  static method_id        method_id_for(Method *m);
+};
+
+class TraceEventThreadParkEnd: public StackObj {
+private:
+  No_Safepoint_Verifier         _nsv;
+  bool                          _enabled;
+  bool                          _filled;
+  Thread                       *_thread;
+  TraceTypes::seq_num           _seq;
+  TraceTypes::seq_num           _unpark_seq;
+  TraceTypes::park_return_code  _return_code;
+
+  void do_write();
+
+public:
+  TraceEventThreadParkEnd(Thread *t)
+  : _nsv(false, false), _thread(t), _filled(false)
+  {
+    assert(t != NULL, "null thread");
+    _enabled = EnableEventTracing && EnableEventTracingParkEvents && t->park_priority() >= 0;
+  }
+
+  void fill(TraceTypes::seq_num unpark_seq, TraceTypes::park_return_code return_code) {
+    assert(!_filled, "already filled");
+    _nsv.enable();
+    _enabled = _enabled && TraceManager::is_initialized();
+    if (_enabled) {
+      _seq = TraceManager::metadata()->next_global_seq();
+      _thread->set_park_last_global_seq(_seq);
+      _unpark_seq = unpark_seq;
+      _return_code = return_code;
+    }
+    _filled = true;
+  }
+
+  ~TraceEventThreadParkEnd() {
+    assert(_filled, "must have been filled");
+    if (_enabled) {
+      do_write();
+    }
+  }
+};
+
+class TraceEventThreadUnpark: public StackObj {
+private:
+  No_Safepoint_Verifier  _nsv;
+  bool                   _enabled;
+  TraceTypes::thread_id  _thread_id;
+  TraceTypes::seq_num    _seq;
+  TraceTypes::seq_num    _chained_seq;
+
+  void do_write();
+
+public:
+  TraceEventThreadUnpark(Thread *t) : _nsv(true, false), _chained_seq(-1) {
+    assert(t != NULL, "null thread");
+    _enabled = EnableEventTracing && EnableEventTracingParkEvents && TraceManager::is_initialized();
+    if (_enabled) {
+      _thread_id = TraceManager::metadata()->thread_id(t);
+      _seq = TraceManager::metadata()->next_global_seq();
+    } else {
+      _thread_id = 0;
+      _seq = 1;
+    }
+  }
+
+  ~TraceEventThreadUnpark() {
+    if (_enabled) {
+      do_write();
+    }
+  }
+
+  TraceTypes::seq_num seq() { return _seq; }
+
+  void   set_chained_seq(TraceTypes::seq_num chained_seq) { _chained_seq = chained_seq; }
+};
+
+class TraceEventMonitorContendedExited: public StackObj {
+private:
+  No_Safepoint_Verifier _nsv;
+  bool                  _enabled;
+  ObjectMonitor        *_monitor;
+  TraceTypes::seq_num   _seq;
+  TraceTypes::stack_id *_stack_id_at;
+  TraceTypes::stack_id  _preallocated_stack_id;
+  bool                  _resolve_stack;
+
+public:
+  TraceEventMonitorContendedExited(ObjectMonitor *m)
+  : _nsv(true, false), _enabled(false), _monitor(m), _seq(0), _stack_id_at(NULL), _resolve_stack(true)
+  {
+    if (EnableEventTracing && EventTracingStrictMonitorEventOrder) {
+      _seq = _monitor->next_trace_seq();
+    }
+  }
+
+  ~TraceEventMonitorContendedExited() {
+    if (_enabled) {
+      TraceTypes::stack_id id = 0;
+      if (_stack_id_at != NULL) {
+        if (*_stack_id_at == 0 && EnableEventTracingStackTraces) {
+          *_stack_id_at = TraceManager::metadata()->next_stack_id();
+        }
+        id = *_stack_id_at;
+      }
+      TraceEvents::write_monitor_contended_exited(_monitor, _seq, id, _resolve_stack);
+    } else if (EnableEventTracing && EventTracingStrictMonitorEventOrder && TraceManager::is_initialized()) {
+      assert(!_resolve_stack || _stack_id_at == NULL || *_stack_id_at == 0,
+             "event must be enabled if there is a stack id to resolve");
+
+      // must consume eagerly acquired sequence number
+      TraceEvents::write_monitor_dummy(_monitor, _seq);
+    }
+  }
+
+  void set_use_or_preallocate_stack_id_at(TraceTypes::stack_id *p) {
+    assert (_stack_id_at == NULL, "set only once");
+    _stack_id_at = p;
+  }
+
+  void set_use_stack_id(TraceTypes::stack_id id) {
+    _preallocated_stack_id = id;
+    set_use_or_preallocate_stack_id_at(&_preallocated_stack_id);
+  }
+
+  void set_resolve_stack(bool resolve) {
+    _resolve_stack = resolve;
+  }
+
+  void enable() {
+    if (EnableEventTracing && TraceManager::is_initialized()) {
+      if (!_enabled && !EventTracingStrictMonitorEventOrder) {
+        // lazily acquire sequence number, racing with other threads which try
+        // to spin-acquire the monitor and then write contended-entered events
+        _seq = _monitor->next_trace_seq();
+      }
+      _enabled = true;
+    }
+  }
+};
+
+// for convenience, so this is the only file to include for writing events
+#include "evtrace/traceWriter.hpp"
+#include "evtrace/traceMacros.hpp"
+
+#endif /* SHARE_VM_EVTRACE_TRACEEVENTS_HPP */
diff --git a/src/share/vm/evtrace/traceJavaBridge.cpp b/src/share/vm/evtrace/traceJavaBridge.cpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceJavaBridge.cpp
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "evtrace/traceJavaBridge.hpp"
+
+#include "evtrace/traceBuffer.hpp"
+#include "evtrace/traceBufferQueue.hpp"
+#include "evtrace/traceEvents.hpp"
+#include "evtrace/traceWriter.hpp"
+#include "evtrace/traceReaderThread.hpp"
+
+#include "prims/jni.h"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/javaCalls.hpp"
+
+#define TEVT_ENTRY(result_type, header) \
+  JVM_ENTRY(result_type, header)
+
+#define TEVT_END JVM_END
+
+TEVT_ENTRY(jobject, TEvT_DequeueBuffer(JNIEnv *env, jobject self, jlong queue_handle, const jboolean should_block))
+  TraceBufferQueue *queue = (TraceBufferQueue *) queue_handle;
+
+  TraceBuffer *buffer;
+
+  if (thread->is_TraceReader_thread()) {
+    // we do this to avoid interrupting the thread at a bad time
+    ((TraceReaderThread *) thread)->set_is_polling_queue(true);
+  }
+  jlong delay = 4;
+  for (;;) {
+    buffer = queue->try_dequeue();
+    if (buffer != NULL || !should_block || !TraceManager::is_initialized()) {
+      break;
+    }
+
+    os::sleep(thread, delay, true);
+    if (delay <= 64) {
+      delay = 2 * delay;
+    }
+  }
+  if (thread->is_TraceReader_thread()) {
+    ((TraceReaderThread *) thread)->set_is_polling_queue(false);
+  }
+
+  jobject bufobj = NULL;
+  if (buffer != NULL) {
+    ThreadToNativeFromVM ttn(thread);
+
+    jlong handle = (jlong) ((uintptr_t) buffer); // sign extension paranoia
+    size_t length = buffer->filled_size();
+    jobject bytebuf = env->NewDirectByteBuffer(buffer->data, length);
+    jclass clazz = env->FindClass("sun/evtracing/TraceBuffer");
+    jmethodID method = env->GetMethodID(clazz, "<init>", "(JLjava/nio/ByteBuffer;J)V");
+    assert(method != NULL, "must be successful");
+    bufobj = env->NewObject(clazz, method, handle, bytebuf, buffer->owner_id);
+    assert(bufobj != NULL, "must succeed");
+  }
+  return bufobj;
+TEVT_END
+
+TEVT_ENTRY(void, TEvT_EnqueueBuffer(JNIEnv *env, jobject self, jlong queue_handle, jlong buffer_handle))
+  TraceBufferQueue *queue = (TraceBufferQueue *) queue_handle;
+  queue->enqueue((TraceBuffer *) buffer_handle);
+TEVT_END
+
+TEVT_ENTRY(void, TEvT_ResetAndEnqueueBuffer(JNIEnv *env, jobject self, jlong queue_handle, jlong buffer_handle))
+  TraceBuffer *buffer = (TraceBuffer *) buffer_handle;
+  buffer->reset();
+  TraceBufferQueue *queue = (TraceBufferQueue *) queue_handle;
+  queue->enqueue((TraceBuffer *) buffer_handle);
+TEVT_END
+
+TEVT_ENTRY(void, TEvT_FreeBuffer(JNIEnv *env, jobject self, jlong buffer_handle))
+  TraceBuffer *buffer = (TraceBuffer *) buffer_handle;
+  TraceManager::free_buffer(buffer);
+TEVT_END
+
+TEVT_ENTRY(jlong, TEvT_QueueCount(JNIEnv *env, jobject self, jlong queue_handle))
+  TraceBufferQueue *queue = (TraceBufferQueue *) queue_handle;
+  return (jlong) queue->count();TEVT_END
+
+TEVT_ENTRY(void, TEvT_WriteGroupEvent(JNIEnv *env, jobject self, jlong park_global_seq_begin_ref, jobject source))
+  TraceEvents::write_group(thread, park_global_seq_begin_ref, instanceOop(JNIHandles::resolve_non_null(source)));
+TEVT_END
+
+TEVT_ENTRY(jstring, TEvT_GetConfiguration(JNIEnv *env, jobject self))
+  jstring config = NULL;
+  if (EventTracingConfiguration != NULL && EventTracingConfiguration[0] != '\0') {
+    ThreadToNativeFromVM ttn(thread);
+    config = env->NewStringUTF(EventTracingConfiguration);
+  }
+  return config;
+TEVT_END
+
+TEVT_ENTRY(void, TEvT_PutNativeStatistics(JNIEnv *env, jobject self, jobject map))
+  JavaTraceStatistics jts(env, thread, instanceHandle(instanceOop(JNIHandles::resolve_non_null(map))), CHECK);
+  TraceManager::write_stats(&jts);
+TEVT_END
+
+JavaTraceStatistics::JavaTraceStatistics(JNIEnv* env, JavaThread* thread, instanceHandle map, TRAPS)
+: _env(env),
+  THREAD(THREAD),
+  _hashmap_obj(map)
+{
+}
+
+void JavaTraceStatistics::add_entry(const char* name, jdouble value) {
+  assert(name != NULL, "must not be NULL");
+
+  // assume utf8 string to save native transition required by java_lang_String::create_from_platform_dependent_str
+  Handle name_str = java_lang_String::create_from_str(name, CHECK);
+  oop value_oop = java_lang_boxing_object::create(T_DOUBLE, (jvalue*) &value, CHECK);
+  Handle value_handle(THREAD, value_oop);
+
+  JavaValue result(T_OBJECT);
+  JavaCalls::call_virtual(&result,
+                          _hashmap_obj,
+                          _hashmap_obj->klass(),
+                          vmSymbols::put_name(),
+                          vmSymbols::object_object_object_signature(),
+                          name_str,
+                          value_handle,
+                          CHECK);
+}
+
+TEVT_ENTRY(void, TEvT_ResetNativeStatistics(JNIEnv *env, jobject self))
+  TraceManager::reset_stats();
+TEVT_END
+
+TEVT_ENTRY(void, TEvT_ResetNativeMetadata(JNIEnv *env, jobject self))
+  TraceManager::reset_metadata();
+TEVT_END
+
+TEVT_ENTRY(void, TEvT_ReclaimBuffers(JNIEnv *env, jobject self, jboolean wait_until_processed))
+  TraceManager::reclaim_buffers_in_safepoint(wait_until_processed);
+TEVT_END
+
+#define CC (char*)  /*cast a literal from (const char*)*/
+#define FN_PTR(f) CAST_FROM_FN_PTR(void*, &f)
+
+static JNINativeMethod tevtmethods[] = {
+  {CC"dequeueBuffer",             CC"(JZ)Lsun/evtracing/TraceBuffer;",   FN_PTR(TEvT_DequeueBuffer)},
+  {CC"enqueueBuffer",             CC"(JJ)V",                             FN_PTR(TEvT_EnqueueBuffer)},
+  {CC"resetAndEnqueueBuffer",     CC"(JJ)V",                             FN_PTR(TEvT_ResetAndEnqueueBuffer)},
+  {CC"freeBuffer",                CC"(J)V",                              FN_PTR(TEvT_FreeBuffer)},
+  {CC"queueCount",                CC"(J)J",                              FN_PTR(TEvT_QueueCount)   },
+  {CC"writeGroupEvent",           CC"(JLjava/lang/Object;)V",            FN_PTR(TEvT_WriteGroupEvent)},
+  {CC"getConfiguration",          CC"()Ljava/lang/String;",              FN_PTR(TEvT_GetConfiguration)},
+  {CC"putNativeStatistics",       CC"(Ljava/util/Map;)V",                FN_PTR(TEvT_PutNativeStatistics)},
+  {CC"resetNativeStatistics",     CC"()V",                               FN_PTR(TEvT_ResetNativeStatistics)},
+  {CC"resetNativeMetadata",       CC"()V",                               FN_PTR(TEvT_ResetNativeMetadata)},
+  {CC"reclaimBuffers",            CC"(Z)V",                              FN_PTR(TEvT_ReclaimBuffers)},
+};
+
+JVM_ENTRY(jboolean, JVM_RegisterEventTracingMethods(JNIEnv* env, jclass tevtclass))
+  if (EnableEventTracing) {
+    ThreadToNativeFromVM ttnfv(thread);
+    int ok = env->RegisterNatives(tevtclass, tevtmethods, sizeof(tevtmethods) / sizeof(JNINativeMethod));
+    guarantee(ok == 0, "register event tracing natives");
+  }
+  return EnableEventTracing;
+JVM_END
diff --git a/src/share/vm/evtrace/traceJavaBridge.hpp b/src/share/vm/evtrace/traceJavaBridge.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceJavaBridge.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEJAVABRIDGE_HPP
+#define SHARE_VM_EVTRACE_TRACEJAVABRIDGE_HPP
+
+#include "evtrace/traceManager.hpp"
+
+class JavaTraceStatistics: public TraceStatistics {
+private:
+  JNIEnv* _env;
+  TRAPS;
+  instanceHandle _hashmap_obj;
+
+public:
+  JavaTraceStatistics(JNIEnv* env, JavaThread* thread, instanceHandle map_obj, TRAPS);
+
+  virtual void add_entry(const char* name, jdouble value);
+};
+
+#endif /* SHARE_VM_EVTRACE_TRACEJAVABRIDGE_HPP */
diff --git a/src/share/vm/evtrace/traceMacros.hpp b/src/share/vm/evtrace/traceMacros.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceMacros.hpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEMACROS_HPP
+#define SHARE_VM_EVTRACE_TRACEMACROS_HPP
+
+#include "runtime/atomic.hpp"
+
+#define EVTRACE_DECLARE_TRACKED_CLASS_FIELDS \
+  jint _tracing_known;
+
+#define EVTRACE_INIT_TRACKED_CLASS \
+  do { _tracing_known = 0; } while (0)
+
+#define EVTRACE_DEFINE_TRACKED_CLASS_METHODS                 \
+  bool is_tracing_known() const {                            \
+    return _tracing_known;                                   \
+  }                                                          \
+                                                             \
+  bool atomic_mark_tracing_known() {                         \
+    return (Atomic::cmpxchg(1, &_tracing_known, 0) == 0);    \
+  }                                                          \
+                                                             \
+  void reset_tracing_known() {                               \
+    /* permitted at safepoint only, no CAS required */       \
+    _tracing_known = 0;                                      \
+  }
+
+#endif /* SHARE_VM_EVTRACE_TRACEMACROS_HPP */
diff --git a/src/share/vm/evtrace/traceManager.cpp b/src/share/vm/evtrace/traceManager.cpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceManager.cpp
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "evtrace/traceManager.hpp"
+
+#include "evtrace/traceBuffer.hpp"
+#include "evtrace/traceBufferQueue.hpp"
+#include "evtrace/traceReaderThread.hpp"
+#include "evtrace/traceEvents.hpp"
+
+#include "runtime/javaCalls.hpp"
+#include "runtime/thread.hpp"
+
+volatile bool      TraceManager::_is_initialized = false;
+TraceReaderThread *TraceManager::_thread         = NULL;
+Monitor           *TraceManager::_thread_mtx     = NULL;
+volatile bool      TraceManager::_thread_running = false;
+TraceBufferQueue  *TraceManager::_flush_queue    = NULL;
+TraceBufferQueue  *TraceManager::_free_queue     = NULL;
+TraceMetadata     *TraceManager::_metadata       = NULL;
+bool               TraceManager::_classes_unloaded_in_current_safepoint = false;
+DEBUG_ONLY(int     TraceManager::_current_safepoint_counter);
+
+// statistics
+volatile jlong TraceManager::_buffer_count             = 0xdead;
+volatile jlong TraceManager::_max_buffer_count         = 0xdead;
+volatile jlong TraceManager::_allocated_buffer_count   = 0xdead;
+volatile jlong TraceManager::_submitted_trace_bytes    = 0xdead;
+jlong          TraceManager::_reclaimed_trace_bytes    = 0xdead;
+volatile jlong TraceManager::_reclaim_time_nanos       = 0xdead;
+jlong          TraceManager::_stored_free_enqueue_ops  = 0xdead;
+jlong          TraceManager::_stored_free_dequeue_ops  = 0xdead;
+jlong          TraceManager::_stored_flush_enqueue_ops = 0xdead;
+jlong          TraceManager::_stored_flush_dequeue_ops = 0xdead;
+volatile jlong TraceManager::_total_stack_traces       = 0xdead;
+volatile jlong TraceManager::_truncated_stack_traces   = 0xdead;
+volatile jlong TraceManager::_reused_memento_stack_traces = 0xdead;
+volatile jlong TraceManager::_total_stack_frames       = 0xdead;
+volatile jlong TraceManager::_reused_memento_stack_frames = 0xdead;
+
+void TraceManager::initialize() {
+  assert(!_is_initialized, "already initialized");
+  assert(_flush_queue == NULL, "flush queue exists");
+  assert(_free_queue == NULL, "free queue exists");
+  assert(_thread == NULL, "thread exists");
+  assert(!_thread_running, "thread running");
+  assert(_metadata == NULL, "metadata exists");
+  assert(!_classes_unloaded_in_current_safepoint, "must be unset");
+
+  // nothing should be allocated before this point
+  // NOTE: requires -XX:NativeMemoryTracking=summary or =detail
+  assert(MallocMemorySummary::as_snapshot()->by_type(mtEventTracing)->malloc_size() == 0, "premature event tracing allocations");
+
+  guarantee(EventTracingStackDepthLimit <= TRACE_STACK_MAX_FRAMES, "stack depth limit is too high");
+  if (EventTracingStackDepthLimit == 0) {
+    EnableEventTracingStackTraces = false;
+  }
+
+  TraceEvents::initialize();
+
+  _metadata = new TraceMetadata();
+
+  _thread_mtx = new Monitor(Mutex::nonleaf + 1, "TraceManager::_thread_mtx");
+
+  _buffer_count = 0;
+  _flush_queue = new TraceBufferQueue();
+
+  if (EnableEventTracingBufferReuse) {
+    _free_queue = new TraceBufferQueue();
+
+    for (uintx i = 0; i < EventTracingPreallocatedBuffers; i++) {
+      _free_queue->enqueue(allocate_buffer());
+    }
+    assert((jlong)_free_queue->count() == (jlong) EventTracingPreallocatedBuffers, "sanity");
+  }
+
+  _is_initialized = true;
+
+  reset_stats();
+}
+
+Handle TraceManager::create_queue_object(TraceBufferQueue *q, instanceKlassHandle klass, TRAPS) {
+  JavaValue result(T_OBJECT);
+  result.set_jobject(NULL);
+  if (q != NULL) {
+    JavaCallArguments args;
+    args.push_long((jlong) q);
+    JavaCalls::call_static(&result,
+                           klass,
+                           vmSymbols::TraceBufferQueue_from_handle_method_name(),
+                           vmSymbols::TraceBufferQueue_from_handle_method_signature(),
+                           &args,
+                           CHECK_NH);
+  }
+  return Handle(THREAD, (oop) result.get_jobject());
+}
+
+void TraceManager::start_threads(TRAPS) {
+  assert(_thread == NULL && !_thread_running, "trace reader thread exists");
+
+  Klass* qk = SystemDictionary::resolve_or_fail(vmSymbols::sun_evtracing_TraceBufferQueue(), true, CHECK);
+  instanceKlassHandle q_klass(THREAD, qk);
+  q_klass->initialize(CHECK);
+
+  Handle flushq = create_queue_object(_flush_queue, q_klass, CHECK);
+  Handle freeq = create_queue_object(_free_queue, q_klass, CHECK);
+
+  _thread = TraceReaderThread::start(flushq(), freeq(), THREAD);
+  _thread_running = true;
+}
+
+class TraceManager::ReclaimTraceBuffersClosure : public ThreadClosure {
+  bool  _deinitialize;
+  jlong _reclaimed_bytes;
+
+public:
+  ReclaimTraceBuffersClosure(bool deinit)
+  : _deinitialize(deinit), _reclaimed_bytes(0)
+  {
+  }
+
+  ~ReclaimTraceBuffersClosure() { }
+
+  virtual void do_thread(Thread *t) {
+    assert(t != NULL, "null thread");
+
+    TraceBuffer *buffer = t->trace_buffer();
+    if (buffer != NULL && (_deinitialize || buffer->filled_size() > 0)) {
+      TraceManager::pre_submit_buffer(buffer);
+      _reclaimed_bytes += buffer->filled_size();
+      TraceManager::_flush_queue->enqueue(buffer);
+
+      buffer = NULL;
+      if (!_deinitialize && EnableEventTracingBufferReuse) {
+        buffer = TraceManager::_free_queue->try_dequeue();
+        if (buffer != NULL) {
+          buffer->owner = t;
+        }
+      }
+      t->set_trace_buffer(buffer);
+    }
+  }
+
+  jlong reclaimed_bytes() { return _reclaimed_bytes; }
+};
+
+class TraceManager::VM_ReclaimTraceBuffers : public VM_Operation {
+  bool  _deinitialize;
+  jlong _queue_fill_mark;
+
+public:
+  VM_ReclaimTraceBuffers(bool deinit) : _deinitialize(deinit) {}
+  VMOp_Type type() const { return VMOp_ReclaimTraceBuffers; }
+  void doit();
+
+  jlong queue_fill_mark() { return _queue_fill_mark; }
+};
+
+void TraceManager::VM_ReclaimTraceBuffers::doit() {
+  assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sanity");
+
+  if (_deinitialize) {
+    TraceEvents::write_vm_end();
+
+    // stop threads from acquiring buffers
+    TraceManager::_is_initialized = false;
+  }
+
+  TraceManager::ReclaimTraceBuffersClosure cl(_deinitialize);
+  Threads::threads_do(&cl);
+
+  _queue_fill_mark = TraceManager::_flush_queue->fill_mark();
+  TraceManager::_reclaimed_trace_bytes += cl.reclaimed_bytes();
+
+  if (_thread != NULL && _thread->is_polling_queue()) {
+    // reader thread is sleeping in poll loop, interrupt
+
+    // we don't want to interrupt the thread elsewhere, for example during
+    // NIO channel operations, when an interrupt shuts down the channel
+    Thread::interrupt(_thread);
+  }
+}
+
+void TraceManager::reclaim_buffers_in_safepoint(bool wait_until_processed) {
+  assert_initialized();
+
+  jlong reclaim_start = os::javaTimeNanos();
+
+  jlong fill_mark = do_reclaim_buffers_in_safepoint(false);
+
+  if (wait_until_processed) {
+    // NOTE: this doesn't actually wait until the last buffer has been
+    //       *processed*, only until the reader thread has *dequeued*
+    //       the last reclaimed buffer (usually when the second-to-last
+    //       buffer has been processed)
+    while (!_flush_queue->has_dequeued_at_mark(fill_mark)) {
+      os::sleep(Thread::current(), 1, true);
+    }
+  }
+
+  if (EnableEventTracingDiagnostics) {
+    // FIXME: counter value depends on whether callers set wait_until_processed
+    jlong duration = os::javaTimeNanos() - reclaim_start;
+    Atomic::add_ptr(duration, &_reclaim_time_nanos);
+  }
+}
+
+jlong TraceManager::do_reclaim_buffers_in_safepoint(bool deinit) {
+  VM_ReclaimTraceBuffers op(deinit);
+  VMThread::execute(&op);
+
+  return op.queue_fill_mark();
+}
+
+class TraceManager::VM_ResetTraceMetadata : public VM_Operation {
+public:
+  VM_ResetTraceMetadata() {}
+  VMOp_Type type() const { return VMOp_ResetTraceMetadata; }
+  void doit();
+  bool allow_nested_vm_operations() const { return true; }
+};
+
+void TraceManager::VM_ResetTraceMetadata::doit() {
+  assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sanity");
+
+  VM_ReclaimTraceBuffers reclaim_op(false);
+  VMThread::execute(&reclaim_op);
+
+  if (_thread != NULL && _thread->is_polling_queue() && _thread->trace_buffer() != NULL) {
+    // The ReclaimTraceBuffers VM op has interrupted the reader thread, which
+    // generated a ThreadInterrupt event containing a sequence number.
+    // Therefore, this event must be submitted before the metadata reset event.
+
+    submit_buffer(Thread::current()->trace_buffer());
+    Thread::current()->set_trace_buffer(NULL);
+  }
+
+  TraceManager::_metadata->purge_all();
+
+  TraceEvents::write_metadata_reset();
+  if (Thread::current()->trace_buffer() != NULL) {
+    submit_buffer(Thread::current()->trace_buffer());
+    Thread::current()->set_trace_buffer(NULL);
+  }
+}
+
+void TraceManager::reset_metadata() {
+  VM_ResetTraceMetadata op;
+  VMThread::execute(&op);
+}
+
+void TraceManager::nmethod_is_unloading(const nmethod *nm) {
+  if (_is_initialized) {
+    _metadata->purge_unloading_nmethod(nm);
+  }
+}
+
+void TraceManager::class_loader_is_unloading(ClassLoaderData *loader) {
+  assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sanity");
+
+  if (!_is_initialized)
+    return;
+
+  // When classes and methods are unloaded during garbage collection, their
+  // memory is reclaimed and other classes and methods can be loaded in their
+  // place. Since we use addresses as unique identifiers, we need to observe
+  // such unload events and purge any references to unloaded metadata from
+  // our records.
+  //
+  // Trace analysis must also be aware of unloads as well since any trace
+  // events after an unload can use the same identifiers to refer to different
+  // data. Therefore, we reclaim the trace buffers from all threads at this
+  // point. We then write one or multiple unload events to the trace buffer of
+  // the VM thread, and finally enqueue that buffer before the end of the
+  // safepoint. Therefore, when processing the buffers in the order in which
+  // they are enqueued, trace analysis encounters the necessary unload events
+  // before any events after the unload.
+
+  if (!_classes_unloaded_in_current_safepoint) {
+    // G1 doesn't allow nested VM ops
+    VM_ReclaimTraceBuffers(false).doit();
+    _classes_unloaded_in_current_safepoint = true;
+    DEBUG_ONLY(_current_safepoint_counter = SafepointSynchronize::_safepoint_counter);
+  } else {
+    assert(_current_safepoint_counter == SafepointSynchronize::_safepoint_counter, "different safepoint");
+  }
+
+  TraceEvents::write_class_loader_unload(loader);
+
+  _metadata->purge_unloading_classes(loader);
+}
+
+void TraceManager::do_work_before_safepoint_end() {
+  assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sanity");
+
+  if (_is_initialized) {
+    VM_ReclaimTraceBuffers(false).doit(); // don't bother with VM op
+
+    if (_classes_unloaded_in_current_safepoint) {
+      assert(_current_safepoint_counter == SafepointSynchronize::_safepoint_counter, "different safepoint");
+      _classes_unloaded_in_current_safepoint = false;
+    }
+
+    _metadata->do_maintenance();
+  }
+}
+
+void TraceManager::thread_is_exiting(TraceReaderThread* t) {
+  guarantee(!_is_initialized, "thread exiting prematurely");
+  guarantee(t == _thread, "unexpected thread");
+  guarantee(_thread_running, "not running?");
+
+  MonitorLockerEx ml(_thread_mtx);
+  _thread_running = false;
+  ml.notify_all();
+}
+
+void TraceManager::finish_and_destroy(TRAPS) {
+  assert_initialized();
+  assert(_thread != NULL, "thread not initialized");
+
+  jlong reclaim_start = os::javaTimeNanos();
+
+  // This just sets a flag that the reader thread only checks once queue
+  // operations fail after the reclaim op
+  _thread->shutdown(CHECK);
+
+  _thread_mtx->lock();
+
+  do_reclaim_buffers_in_safepoint(true);
+
+  while(_thread_running) {
+    _thread_mtx->wait(); // wait for trace reader thread to call back
+  }
+  _thread_mtx->unlock();
+  delete _thread_mtx;
+  _thread_mtx = NULL;
+  _thread = NULL; // NOTE: thread deletes itself on exit
+
+  if (EnableEventTracingDiagnostics) {
+    _reclaim_time_nanos += (os::javaTimeNanos() - reclaim_start);
+  }
+
+  // We keep these around to provide statistics to agents after deinitialization (during shutdown)
+  // FIXME: we should fill a single TraceStatistics object instead of all those variables
+  if (EnableEventTracingBufferReuse) {
+    _stored_free_enqueue_ops =  _free_queue->enqueue_ops()  - _stored_free_enqueue_ops;
+    _stored_free_dequeue_ops =  _free_queue->dequeue_ops()  - _stored_free_dequeue_ops;
+  }
+  _stored_flush_enqueue_ops = _flush_queue->enqueue_ops() - _stored_flush_enqueue_ops;
+  _stored_flush_dequeue_ops = _flush_queue->dequeue_ops() - _stored_flush_dequeue_ops;
+
+  assert(_flush_queue->is_empty(), "flush queue not empty");
+  delete _flush_queue;
+  _flush_queue = NULL;
+
+  if (EnableEventTracingBufferReuse) {
+    while (!_free_queue->is_empty()) {
+      TraceBuffer *buffer = _free_queue->try_dequeue();
+      assert(buffer != NULL, "null buffer from non-empty queue");
+      free_buffer(buffer);
+    }
+    delete _free_queue;
+    _free_queue = NULL;
+  }
+  assert(_free_queue == NULL, "sanity");
+
+  delete _metadata;
+  _metadata = NULL;
+
+  // all event tracing memory must be deallocated here
+  // NOTE: requires -XX:NativeMemoryTracking=summary or =detail
+  assert(MallocMemorySummary::as_snapshot()->by_type(mtEventTracing)->malloc_size() == 0, "memory leak");
+}
+
+void TraceManager::write_stats(TraceStatistics *stats) {
+  assert(!_is_initialized || _flush_queue != NULL, "sanity");
+
+  if (EnableEventTracingBufferReuse) {
+    jlong free_enqueue_ops, free_dequeue_ops;
+    free_enqueue_ops =  _is_initialized ? _free_queue->enqueue_ops()  - _stored_free_enqueue_ops  : _stored_free_enqueue_ops;
+    free_dequeue_ops =  _is_initialized ? _free_queue->dequeue_ops()  - _stored_free_dequeue_ops  : _stored_free_dequeue_ops;
+    stats->add_entry("free_enqueue_ops", free_enqueue_ops);
+    stats->add_entry("free_dequeue_ops", free_dequeue_ops);
+  }
+
+  jlong flush_enqueue_ops, flush_dequeue_ops;
+  flush_enqueue_ops = _is_initialized ? _flush_queue->enqueue_ops() - _stored_flush_enqueue_ops : _stored_flush_enqueue_ops;
+  flush_dequeue_ops = _is_initialized ? _flush_queue->dequeue_ops() - _stored_flush_dequeue_ops : _stored_flush_dequeue_ops;
+  stats->add_entry("flush_enqueue_ops", flush_enqueue_ops);
+  stats->add_entry("flush_dequeue_ops", flush_dequeue_ops);
+
+  stats->add_entry("mean_buffer_capacity", EventTracingBufferCapacity);
+  stats->add_entry("reclaimed_trace_bytes", _reclaimed_trace_bytes);
+
+  if (EnableEventTracingDiagnostics) {
+    stats->add_entry("submitted_trace_bytes", _submitted_trace_bytes);
+    stats->add_entry("reclaim_time_nanos", _reclaim_time_nanos);
+    stats->add_entry("stack_cache_lookups", _metadata->stack_cache_lookups());
+    stats->add_entry("stack_cache_lookup_misses", _metadata->stack_cache_lookup_misses());
+    stats->add_entry("stack_cache_lookup_collisions", _metadata->stack_cache_lookup_collisions());
+    stats->add_entry("stack_cache_probes", _metadata->stack_cache_probes());
+    stats->add_entry("stack_cache_probe_collisions", _metadata->stack_cache_probe_collisions());
+    stats->add_entry("stack_cache_purge_millis", _metadata->stack_cache_purge_millis());
+    stats->add_entry("stack_cache_maintenance_millis", _metadata->stack_cache_maintenance_millis());
+    stats->add_entry("total_stack_traces", _total_stack_traces);
+    stats->add_entry("truncated_stack_traces", _truncated_stack_traces);
+    stats->add_entry("reused_memento_stack_traces", _reused_memento_stack_traces);
+    stats->add_entry("total_stack_frames", _total_stack_frames);
+    stats->add_entry("reused_memento_stack_frames", _reused_memento_stack_frames);
+    stats->add_entry("buffer_count", _buffer_count);
+    stats->add_entry("max_buffer_count", _max_buffer_count);
+    stats->add_entry("allocated_buffer_count", _allocated_buffer_count);
+  }
+}
+
+void TraceManager::reset_stats() {
+  assert(!_is_initialized || _flush_queue != NULL, "sanity");
+
+  if (_is_initialized) {
+    if (EnableEventTracingBufferReuse) {
+      _stored_free_enqueue_ops  = _free_queue->enqueue_ops();
+      _stored_free_dequeue_ops  = _free_queue->dequeue_ops();
+    }
+
+    _stored_flush_enqueue_ops = _flush_queue->enqueue_ops();
+    _stored_flush_dequeue_ops = _flush_queue->dequeue_ops();
+  }
+
+  _reclaimed_trace_bytes = 0;
+
+  if (EnableEventTracingDiagnostics) {
+    _submitted_trace_bytes = 0;
+    _reclaim_time_nanos = 0;
+    _total_stack_traces = 0;
+    _truncated_stack_traces = 0;
+    _reused_memento_stack_traces = 0;
+    _total_stack_frames = 0;
+    _reused_memento_stack_frames = 0;
+    _max_buffer_count = _buffer_count;
+    _allocated_buffer_count = 0;
+
+    _metadata->reset_stack_cache_stats();
+  }
+}
diff --git a/src/share/vm/evtrace/traceManager.hpp b/src/share/vm/evtrace/traceManager.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceManager.hpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEMANAGER_HPP
+#define SHARE_VM_EVTRACE_TRACEMANAGER_HPP
+
+#include "runtime/handles.hpp"
+#include "utilities/exceptions.hpp"
+
+class TraceBuffer;
+class TraceBufferQueue;
+class TraceReaderThread;
+class TraceMetadata;
+class Monitor;
+
+class TraceStatistics {
+public:
+  virtual ~TraceStatistics() {}
+
+  virtual void add_entry(const char* name, jdouble value) = 0;
+};
+
+class TraceManager {
+private:
+  static void assert_initialized();
+
+  static volatile bool      _is_initialized;
+  static TraceReaderThread *_thread;
+  static volatile bool      _thread_running;
+  static Monitor           *_thread_mtx;
+  static TraceBufferQueue  *_free_queue;
+  static TraceBufferQueue  *_flush_queue;
+  static TraceMetadata     *_metadata;
+  static bool               _classes_unloaded_in_current_safepoint;
+  DEBUG_ONLY(static int     _current_safepoint_counter);
+
+  // statistics
+  static volatile jlong _buffer_count, _max_buffer_count, _allocated_buffer_count;
+  static volatile jlong _submitted_trace_bytes;
+  static jlong          _reclaimed_trace_bytes;
+  static volatile jlong _reclaim_time_nanos;
+  static jlong          _stored_free_dequeue_ops, _stored_free_enqueue_ops;
+  static jlong          _stored_flush_dequeue_ops, _stored_flush_enqueue_ops;
+  static volatile jlong _total_stack_traces, _truncated_stack_traces, _reused_memento_stack_traces, _total_stack_frames, _reused_memento_stack_frames;
+
+  static Handle create_queue_object(TraceBufferQueue *q, instanceKlassHandle klass, TRAPS);
+
+  static jlong do_reclaim_buffers_in_safepoint(bool deinit);
+  static void pre_submit_buffer(TraceBuffer *buffer);
+
+  static TraceBuffer *allocate_buffer();
+
+public:
+  class VM_ReclaimTraceBuffers;
+  class VM_ResetTraceMetadata;
+  class VM_ResetTraceStatistics;
+  class ReclaimTraceBuffersClosure;
+
+  static void initialize();
+  static bool is_initialized();
+  static void start_threads(TRAPS);
+  static void reclaim_buffers_in_safepoint(bool wait_until_processed);
+  static void finish_and_destroy(TRAPS);
+
+  static TraceBuffer * request_buffer();
+  static void submit_buffer(TraceBuffer *buffer);
+  static void free_buffer(TraceBuffer *buffer);
+
+  static TraceMetadata * metadata();
+  static void reset_metadata();
+
+  static void do_work_before_safepoint_end();
+
+  static void nmethod_is_unloading(const nmethod *nm);
+  static void class_loader_is_unloading(ClassLoaderData *loader);
+
+  static void thread_is_exiting(TraceReaderThread *t);
+
+  // statistics
+  static void write_stats(TraceStatistics *stats);
+  static void reset_stats();
+  static void update_stack_trace_stats(bool truncated, jlong total_frames, jlong memento_frames);
+};
+
+#include "evtrace/traceManager.inline.hpp"
+
+#endif /* SHARE_VM_EVTRACE_TRACEMANAGER_HPP */
diff --git a/src/share/vm/evtrace/traceManager.inline.hpp b/src/share/vm/evtrace/traceManager.inline.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceManager.inline.hpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEMANAGER_INLINE_HPP
+#define SHARE_VM_EVTRACE_TRACEMANAGER_INLINE_HPP
+
+#include "evtrace/traceBuffer.hpp"
+#include "evtrace/traceBufferQueue.hpp"
+#include "evtrace/traceMetadata.hpp"
+
+inline bool TraceManager::is_initialized() {
+  return _is_initialized;
+}
+
+inline void TraceManager::assert_initialized() {
+  assert(_is_initialized, "not initialized");
+  assert(_free_queue != NULL || !EnableEventTracingBufferReuse, "freeQueue not initialized");
+  assert(_flush_queue != NULL, "flushQueue not initialized");
+}
+
+inline TraceBuffer *TraceManager::allocate_buffer() {
+  // tradeoff: small buffer sizes reduce the time it takes to process a
+  // buffer and make it available again, but cause more queue operations
+  // (and possible contentions). large buffers are wasteful for short-lived
+  // threads, but cause less queue contention and reclaim operations
+  // waiting for buffers to be processed can take significantly longer.
+  size_t capacity = EventTracingBufferCapacity;
+  if (EnableEventTracingRandomizedBufferCapacity) {
+    // use random buffer sizes to avoid that threads which do similar work
+    // submit and request buffers all at once
+    capacity = round_to(capacity / 2 + ((size_t) os::random()) % capacity, 128);
+  }
+  TraceBuffer *buffer = new (capacity) TraceBuffer(capacity);
+  if (EnableEventTracingDiagnostics) {
+    Atomic::inc_ptr(&_allocated_buffer_count);
+    jlong count = Atomic::add_ptr(1, &_buffer_count);
+    jlong max;
+    do {
+      max = _max_buffer_count;
+    } while (count > max && Atomic::cmpxchg(count, &_max_buffer_count, max) != max);
+  }
+  return buffer;
+}
+
+inline void TraceManager::free_buffer(TraceBuffer *buffer) {
+  assert(buffer != NULL, "sanity");
+  delete buffer;
+
+  if (EnableEventTracingDiagnostics) {
+    Atomic::dec_ptr(&_buffer_count);
+  }
+}
+
+inline TraceBuffer *TraceManager::request_buffer() {
+  assert_initialized();
+
+  TraceBuffer *buffer = NULL;
+  if (EnableEventTracingBufferReuse) {
+    buffer = _free_queue->try_dequeue();
+  }
+  if (buffer == NULL) {
+    buffer = allocate_buffer();
+  }
+  return buffer;
+}
+
+inline void TraceManager::pre_submit_buffer(TraceBuffer *buffer) {
+  assert(buffer != NULL, "no buffer given");
+  assert(buffer->owner_id == 0, "must not be set at this point");
+  buffer->owner_id = _metadata->thread_id(buffer->owner);
+  buffer->owner = NULL;
+}
+
+inline void TraceManager::submit_buffer(TraceBuffer *buffer) {
+  assert_initialized();
+  assert(buffer != NULL, "buffer is NULL");
+
+  TraceManager::pre_submit_buffer(buffer);
+  size_t bytes = buffer->filled_size();
+  _flush_queue->enqueue(buffer);
+
+  if (EnableEventTracingDiagnostics) {
+    Atomic::add(bytes, &_submitted_trace_bytes);
+  }
+}
+
+inline TraceMetadata *TraceManager::metadata() {
+  assert_initialized();
+  return _metadata;
+}
+
+inline void TraceManager::update_stack_trace_stats(bool truncated, jlong total_frames, jlong memento_frames) {
+  assert_initialized();
+  if (EnableEventTracingDiagnostics) {
+    assert(total_frames > 0 && memento_frames >= 0, "sanity");
+    Atomic::inc_ptr(&_total_stack_traces);
+    if (truncated) {
+      Atomic::inc_ptr(&_truncated_stack_traces);
+    }
+    Atomic::add_ptr(total_frames, &_total_stack_frames);
+    if (memento_frames != 0) {
+      assert(memento_frames < total_frames, "sanity");
+      Atomic::inc_ptr(&_reused_memento_stack_traces);
+      Atomic::add_ptr(memento_frames, &_reused_memento_stack_frames);
+    }
+  }
+}
+
+#endif /* SHARE_VM_EVTRACE_TRACEMANAGER_INLINE_HPP */
diff --git a/src/share/vm/evtrace/traceMetadata.cpp b/src/share/vm/evtrace/traceMetadata.cpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceMetadata.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "evtrace/traceMetadata.hpp"
+
+TraceMetadata::TraceMetadata() {
+  _stack_cache = new TraceStackCache(this);
+  _last_stack_id = 0;
+  _last_global_seq = 0;
+}
+
+TraceMetadata::~TraceMetadata() {
+  delete _stack_cache;
+}
+
+class TraceMetadata::MarkKlassUnknownClosure: public KlassClosure {
+  static void mark_unknown(Method *m) {
+    m->reset_tracing_known();
+  }
+
+public:
+  virtual void do_klass(Klass *k) {
+    k->reset_tracing_known();
+    if (k->oop_is_instance()) {
+      InstanceKlass::cast(k)->methods_do(mark_unknown);
+    }
+  }
+};
+
+class TraceMetadata::ClearInvalidatedMementoStacksClosure: public ThreadClosure {
+  virtual void do_thread(Thread *t) {
+    assert(t != NULL, "null thread");
+    if (t->memento_stack_trace() != NULL && !t->memento_stack_trace()->is_valid()) {
+      t->set_memento_stack_trace(NULL);
+    }
+  }
+};
+
+void TraceMetadata::purge_all() {
+  assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "must be done in VM thread at safepoint");
+
+  MarkKlassUnknownClosure mark_unknown;
+  ClassLoaderDataGraph::classes_do(&mark_unknown);
+
+  _stack_cache->purge_all();
+}
+
+void TraceMetadata::do_maintenance() {
+  assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "must be done in VM thread at safepoint");
+
+  if (_stack_cache->has_invalid_stacks()) {
+    ClearInvalidatedMementoStacksClosure clear_invalid;
+    Threads::threads_do(&clear_invalid);
+  }
+
+  _stack_cache->do_maintenance();
+  assert(!_stack_cache->has_invalid_stacks(), "sanity");
+}
diff --git a/src/share/vm/evtrace/traceMetadata.hpp b/src/share/vm/evtrace/traceMetadata.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceMetadata.hpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEMETADATA_HPP
+#define SHARE_VM_EVTRACE_TRACEMETADATA_HPP
+
+#include "evtrace/traceStack.hpp"
+#include "evtrace/traceTypes.hpp"
+
+#include "memory/allocation.hpp"
+#include "oops/oopsHierarchy.hpp"
+
+class ObjectMonitor;
+class Klass;
+class Method;
+
+class TraceMetadata: public CHeapObj<mtEventTracing> {
+private:
+  TraceStackCache *_stack_cache;
+  volatile intptr_t _last_stack_id;
+  volatile intptr_t _last_global_seq;
+
+  class MarkKlassUnknownClosure;
+
+  class ClearInvalidatedMementoStacksClosure;
+
+public:
+  TraceMetadata();
+  virtual ~TraceMetadata();
+
+  TraceTypes::timestamp         time_now();
+  TraceTypes::thread_id         thread_id(Thread *t);
+  TraceTypes::object_id         object_id(oop obj);
+  TraceTypes::objmonitor_id     objmonitor_id(ObjectMonitor *m);
+  TraceTypes::object_id         objmonitor_object_id(ObjectMonitor *m);
+  TraceTypes::thread_state      thread_state(Thread *t);
+  TraceTypes::classloader_id    classloader_id(ClassLoaderData *c);
+  TraceTypes::class_id          class_id(Klass *k, bool &added);
+  TraceTypes::method_id         method_id(Method *m, bool &added);
+  const CachedTraceStack *      get_or_try_add_stack(CompositeTraceStack &ts, bool &known, TraceTypes::stack_id preallocated_id = 0);
+  TraceTypes::stack_id          next_stack_id();
+  TraceTypes::seq_num           next_global_seq();
+
+  void  purge_all();
+  void  purge_unloading_classes(ClassLoaderData *loader);
+  void  purge_unloading_nmethod(const nmethod *nm);
+
+  void  do_maintenance();
+
+  jlong stack_cache_lookups()            { return _stack_cache->lookups();            }
+  jlong stack_cache_lookup_misses()      { return _stack_cache->lookup_misses();      }
+  jlong stack_cache_lookup_collisions()  { return _stack_cache->lookup_collisions();  }
+  jlong stack_cache_probes()             { return _stack_cache->probes();             }
+  jlong stack_cache_probe_collisions()   { return _stack_cache->probe_collisions();   }
+  jlong stack_cache_purge_millis()       { return _stack_cache->purge_millis();       }
+  jlong stack_cache_maintenance_millis() { return _stack_cache->maintenance_millis(); }
+  void reset_stack_cache_stats()         { _stack_cache->reset_stats();               }
+};
+
+#include "evtrace/traceMetadata.inline.hpp"
+
+#endif /* SHARE_VM_EVTRACE_TRACEMETADATA_HPP */
diff --git a/src/share/vm/evtrace/traceMetadata.inline.hpp b/src/share/vm/evtrace/traceMetadata.inline.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceMetadata.inline.hpp
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEMETADATA_INLINE_HPP
+#define SHARE_VM_EVTRACE_TRACEMETADATA_INLINE_HPP
+
+#include "runtime/thread.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/objectMonitor.inline.hpp"
+#include "jvmtifiles/jvmtiEnv.hpp"
+
+inline TraceTypes::timestamp TraceMetadata::time_now() {
+  return (TraceTypes::timestamp) os::javaTimeNanos();
+}
+
+inline TraceTypes::thread_id TraceMetadata::thread_id(Thread *t) {
+  assert(t != NULL, "thread is NULL");
+  if (t->is_Java_thread()) {
+    JavaThread *jt = (JavaThread *) t;
+    return (TraceTypes::thread_id) java_lang_Thread::thread_id(jt->threadObj());
+  }
+  return (TraceTypes::thread_id) -t->osthread()->thread_identifier();
+}
+
+inline TraceTypes::object_id TraceMetadata::object_id(oop obj) {
+  assert(obj != NULL, "object is NULL");
+  return (TraceTypes::object_id) obj->identity_hash();
+}
+
+inline TraceTypes::objmonitor_id TraceMetadata::objmonitor_id(ObjectMonitor* m) {
+  assert(m != NULL, "object monitor is NULL");
+  // NOTE: object monitors are native objects and are not moved by the garbage collector, so
+  // they can be identified by their address. However, monitors are allocated from pools and
+  // recycled after an object dies or on deflation. We must take this lifecycle into account
+  // when associating object monitors with objects.
+  return (TraceTypes::objmonitor_id) (intptr_t) m;
+}
+
+inline TraceTypes::object_id TraceMetadata::objmonitor_object_id(ObjectMonitor* m) {
+  assert(m != NULL, "object monitor is NULL");
+  if (m->header()->has_no_hash()) {
+    oop obj = (oop) m->object();
+    return (TraceTypes::object_id) obj->identity_hash();
+  }
+  return (TraceTypes::object_id) m->header()->hash();
+}
+
+inline TraceTypes::thread_state TraceMetadata::thread_state(Thread *t) {
+  assert(t != NULL, "thread is NULL");
+  assert(t->is_Java_thread(), "only Java threads supported");
+
+  // just use JVMTI thread states
+  JavaThread *jt = (JavaThread *) t;
+  TraceTypes::thread_state state = 0;
+  if (jt->threadObj() != NULL) { // provides most state bits
+    state = (TraceTypes::thread_state) java_lang_Thread::get_thread_status(jt->threadObj());
+  }
+  // additional state bits
+  if (jt->is_ext_suspended() || jt->is_external_suspend()) { // same as is_being_ext_suspended() but without locking
+    state |= JVMTI_THREAD_STATE_SUSPENDED;
+  }
+  JavaThreadState jts = jt->thread_state();
+  if (jts == _thread_in_native) {
+    state |= JVMTI_THREAD_STATE_IN_NATIVE;
+  }
+  OSThread* osThread = jt->osthread();
+  if (osThread != NULL && osThread->interrupted()) {
+    state |= JVMTI_THREAD_STATE_INTERRUPTED;
+  }
+  return state;
+}
+
+inline TraceTypes::classloader_id TraceMetadata::classloader_id(ClassLoaderData* cld) {
+  return (TraceTypes::classloader_id) (intptr_t) cld;
+}
+
+inline TraceTypes::class_id TraceMetadata::class_id(Klass *klass, bool &added) {
+  added = false;
+  if (!klass->is_tracing_known()) {
+    added = klass->atomic_mark_tracing_known();
+  }
+  return (TraceTypes::class_id) (intptr_t) klass;
+}
+
+inline TraceTypes::method_id TraceMetadata::method_id(Method* method, bool& added) {
+  assert(method->method_holder()->is_tracing_known(), "klass must be known");
+
+  added = false;
+  if (!method->is_tracing_known()) {
+    added = method->atomic_mark_tracing_known();
+  }
+  return (TraceTypes::method_id) (intptr_t) method;
+}
+
+inline TraceTypes::stack_id TraceMetadata::next_stack_id() {
+  assert(EnableEventTracingStackTraces, "stack traces not enabled");
+  return (TraceTypes::stack_id) Atomic::add_ptr(1, &_last_stack_id);
+}
+
+inline TraceTypes::seq_num TraceMetadata::next_global_seq() {
+  return (TraceTypes::seq_num) Atomic::add_ptr(1, &_last_global_seq);
+}
+
+inline const CachedTraceStack * TraceMetadata::get_or_try_add_stack(CompositeTraceStack &ts, bool &known, TraceTypes::stack_id preallocated_id) {
+  return _stack_cache->get_or_try_add(ts, known, preallocated_id);
+}
+
+inline void TraceMetadata::purge_unloading_classes(ClassLoaderData *loader) {
+  _stack_cache->purge_unloading_classes(loader);
+}
+
+inline void TraceMetadata::purge_unloading_nmethod(const nmethod *nm) {
+  _stack_cache->purge_unloading_nmethod(nm);
+}
+
+#endif /* SHARE_VM_EVTRACE_TRACEMETADATA_INLINE_HPP */
diff --git a/src/share/vm/evtrace/traceReaderThread.cpp b/src/share/vm/evtrace/traceReaderThread.cpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceReaderThread.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "evtrace/traceReaderThread.hpp"
+
+#include "evtrace/traceManager.hpp"
+
+#include "runtime/javaCalls.hpp"
+
+TraceReaderThread::TraceReaderThread(oop obj)
+: JavaThread(trace_reader_thread_entry),
+  _object(obj),
+  _is_polling_queue(false)
+{
+}
+
+TraceReaderThread::~TraceReaderThread() {
+  TraceManager::thread_is_exiting(this);
+}
+
+void TraceReaderThread::trace_reader_thread_entry(JavaThread* thread, TRAPS) {
+  TraceReaderThread *self = (TraceReaderThread *) thread;
+  JavaValue result(T_VOID);
+  Klass* klass = SystemDictionary::resolve_or_fail(vmSymbols::sun_evtracing_TraceReaderThread(), true, CHECK);
+  JavaCalls::call_virtual(&result,
+                          self->_object,
+                          klass,
+                          vmSymbols::run_method_name(),
+                          vmSymbols::void_method_signature(),
+                          CHECK);
+}
+
+TraceReaderThread* TraceReaderThread::start(Handle flushq, Handle freeq, TRAPS) {
+  Klass* rt_klass = SystemDictionary::resolve_or_fail(vmSymbols::sun_evtracing_TraceReaderThread(), true, CHECK_NULL);
+  instanceKlassHandle rt_khandle(THREAD, rt_klass);
+  rt_khandle->initialize(CHECK_NULL);
+  instanceHandle rt_obj = rt_khandle->allocate_instance_handle(CHECK_NULL);
+
+  JavaValue result(T_VOID);
+  JavaCalls::call_special(&result,
+                          rt_obj,
+                          rt_khandle,
+                          vmSymbols::object_initializer_name(),
+                          vmSymbols::TraceReaderThread_constructor_signature(),
+                          flushq,
+                          freeq,
+                          CHECK_NULL);
+
+  Klass* t_klass = SystemDictionary::resolve_or_fail(vmSymbols::java_lang_Thread(), true, CHECK_NULL);
+  instanceKlassHandle t_khandle(THREAD, t_klass);
+  instanceHandle t_obj = t_khandle->allocate_instance_handle(CHECK_NULL);
+
+  const char thread_name[] = "Trace Reader Thread";
+  Handle string = java_lang_String::create_from_str(thread_name, CHECK_NULL);
+
+  // Initialize thread_oop to put it into the system threadGroup
+  Handle thread_group(THREAD, Universe::system_thread_group());
+  JavaCalls::call_special(&result,
+                          t_obj,
+                          t_khandle,
+                          vmSymbols::object_initializer_name(),
+                          vmSymbols::threadgroup_string_void_signature(),
+                          thread_group,
+                          string,
+                          CHECK_NULL);
+
+  TraceReaderThread *trt = NULL;
+  {
+    MutexLocker mu(Threads_lock);
+    trt = new TraceReaderThread(rt_obj());
+
+    if (trt == NULL || trt->osthread() == NULL) {
+      vm_exit_during_initialization("java.lang.OutOfMemoryError", "unable to create new native thread");
+    }
+    java_lang_Thread::set_thread(t_obj(), trt);
+    java_lang_Thread::set_daemon(t_obj());
+
+    trt->set_threadObj(t_obj());
+    Threads::add(trt);
+    Thread::start(trt);
+  }
+  return trt;
+}
+
+void TraceReaderThread::shutdown(TRAPS) {
+  Klass* rt_klass = SystemDictionary::resolve_or_fail(vmSymbols::sun_evtracing_TraceReaderThread(), true, CHECK);
+  instanceKlassHandle rt_khandle(THREAD, rt_klass);
+
+  JavaValue result(T_VOID);
+  JavaCalls::call_virtual(&result,
+                          Handle(_object),
+                          rt_khandle,
+                          vmSymbols::shutdown_method_name(),
+                          vmSymbols::void_method_signature(),
+                          CHECK);
+}
+
+void TraceReaderThread::oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf) {
+  JavaThread::oops_do(f, cld_f, cf);
+
+  f->do_oop(&_object);
+}
diff --git a/src/share/vm/evtrace/traceReaderThread.hpp b/src/share/vm/evtrace/traceReaderThread.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceReaderThread.hpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEREADERTHREAD_HPP
+#define SHARE_VM_EVTRACE_TRACEREADERTHREAD_HPP
+
+#include "runtime/thread.hpp"
+
+class TraceReaderThread: public JavaThread {
+private:
+  oop _object;
+  volatile bool _is_polling_queue;
+
+  TraceReaderThread(oop obj);
+
+  static void trace_reader_thread_entry(JavaThread* thread, TRAPS);
+
+public:
+  virtual ~TraceReaderThread();
+
+  virtual bool is_TraceReader_thread() const { return true; }
+
+  void oops_do(OopClosure* f, CLDClosure* cld_f, CodeBlobClosure* cf);
+
+  static TraceReaderThread * start(Handle flushq, Handle freeq, TRAPS);
+
+  bool is_polling_queue() { return _is_polling_queue; }
+  void set_is_polling_queue(bool polling) {
+    assert(_is_polling_queue != polling, "flip only");
+    _is_polling_queue = polling;
+  }
+
+  void shutdown(TRAPS);
+};
+
+#endif /* SHARE_VM_EVTRACE_TRACEREADERTHREAD_HPP */
diff --git a/src/share/vm/evtrace/traceStack.cpp b/src/share/vm/evtrace/traceStack.cpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceStack.cpp
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "evtrace/traceStack.hpp"
+
+#include "evtrace/traceEvents.hpp"
+#include "evtrace/traceMetadata.hpp"
+
+#include "runtime/vframe.hpp"
+#include "runtime/init.hpp"
+
+//
+// TraceStackBuilder
+//
+
+void TraceStackBuilder::add(const TraceStackFrame &f) {
+  assert(!is_full(), "already full");
+  _frames[_count] = f;
+  _hash ^= f.hash();
+  _count++;
+}
+
+void TraceStackBuilder::add_frame(const frame *fr) {
+  TraceStackFrame f;
+  f.is_compiled = fr->cb()->is_nmethod();
+  if (f.is_compiled) {
+    assert(fr->is_compiled_frame() || fr->is_native_frame(), "unsupported frame type");
+    f.compiled.pc = fr->pc();
+    f.compiled.nm = (nmethod *) fr->cb();
+  } else {
+    assert(fr->is_interpreted_frame(), "unsupported frame type");
+    f.interpreted.method = fr->interpreter_frame_method();
+    f.interpreted.bci = fr->interpreter_frame_bci();
+  }
+  add(f);
+}
+
+bool TraceStackBuilder::range_equals(size_t offset, const CachedTraceStack *cts, size_t cts_offset, size_t count) const {
+  for (size_t i = 0; i < count; i++) {
+    if (!frame_at(offset + i)->equals(*cts->frame_at(cts_offset + i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+//
+// CachedTraceStack
+//
+
+CachedTraceStack *CachedTraceStack::create(TraceTypes::stack_id id, const CompositeTraceStack &ts) {
+  assert (in_bytes(byte_offset_of(CachedTraceStack, _frames)) == sizeof(CachedTraceStack),
+      "variable-sized frame array must be last field");
+  return new (ts.count()) CachedTraceStack(id, ts);
+}
+
+void* CachedTraceStack::operator new(size_t size, size_t nframes) throw () {
+  return CHeapObj<mtEventTracing>::operator new(size + sizeof(_frames[0]) * nframes, CALLER_PC);
+}
+
+void CachedTraceStack::operator delete(void* p) {
+  CHeapObj<mtEventTracing>::operator delete(p);
+}
+
+CachedTraceStack::CachedTraceStack(TraceTypes::stack_id id, const CompositeTraceStack& ts)
+  : _id(id),
+    _count(ts.count()),
+    _hash(ts.hash()),
+    _truncated(ts.is_truncated()),
+    _valid(true)
+{
+  for (size_t i = 0; i < ts.count(); i++) {
+    _frames[i] = *ts.frame_at(i);
+  }
+}
+
+bool CachedTraceStack::has_interpreted_method_from_classloader(const ClassLoaderData* loader) const {
+  assert(is_valid(), "sanity");
+  for (size_t i = 0; i < _count; i++) {
+    if (!_frames[i].is_compiled && _frames[i].interpreted.method->method_holder()->class_loader_data() == loader) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CachedTraceStack::has_nmethod(const nmethod *nm) const {
+  assert(is_valid(), "sanity");
+  for (size_t i = 0; i < _count; i++) {
+    if (_frames[i].is_compiled && _frames[i].compiled.nm == nm) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool CachedTraceStack::range_equals(size_t offset, const CachedTraceStack *other, size_t other_offset, size_t count) const {
+  for (size_t i = 0; i < count; i++) {
+    if (!frame_at(offset + i)->equals(*other->frame_at(other_offset + i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+//
+// CompositeTraceStack
+//
+
+void CompositeTraceStack::set_bottom(const CachedTraceStack *bottom, size_t offset) {
+  _hash = _top.hash();
+  _count = _top.count();
+  _truncated = _top.is_truncated();
+
+  _bottom = bottom;
+  _bottom_offset = offset;
+  if (bottom != NULL) {
+    if (_top.count() < EventTracingStackDepthLimit) {
+      assert(!_top.is_truncated(), "missing frames between top and bottom");
+      _count += bottom->count() - offset;
+      _truncated = _truncated || bottom->is_truncated();
+      if (_count > EventTracingStackDepthLimit) {
+        _truncated = true;
+        _count = EventTracingStackDepthLimit;
+      }
+      _hash ^= bottom->hash();
+      // drop frames before offset from hash
+      for (size_t i = 0; i < offset; i++) {
+        _hash ^= bottom->frame_at(i)->hash();
+      }
+      // drop truncated frames from hash
+      for (size_t i = EventTracingStackDepthLimit - _top.count() + offset; i < _bottom->count(); i++) {
+        _hash ^= bottom->frame_at(i)->hash();
+      }
+    } else {
+      _truncated = _truncated || (bottom->count() > offset);
+    }
+  }
+
+#ifdef ASSERT
+  intptr_t h = 0;
+  for (size_t i = 0; i < count(); i++) {
+    h ^= frame_at(i)->hash();
+  }
+  assert(h == hash(), "hash mismatch");
+#endif
+}
+
+bool CompositeTraceStack::equals(const CachedTraceStack* cts) const {
+  if (hash() != cts->hash() || count() != cts->count() || is_truncated() != cts->is_truncated()) {
+    return false;
+  }
+  return _top.range_equals(0, cts, 0, _top.count())
+      && (_bottom == NULL || _bottom->range_equals(_bottom_offset, cts, _top.count(), count() - _top.count()));
+}
+
+bool CompositeTraceStack::equals(const CompositeTraceStack &other) const {
+  if (hash() != other.hash() || count() != other.count() || is_truncated() != other.is_truncated()) {
+    return false;
+  }
+  for (size_t i = 0; i < count(); i++) {
+    if (!frame_at(i)->equals(*other.frame_at(i))) {
+      return false;
+    }
+  }
+  return true;
+}
+
+//
+// TraceStackCache
+//
+
+TraceStackCache::TraceStackCache(TraceMetadata *tm)
+{
+  _metadata = tm;
+
+  _table = NULL;
+  _count = 0;
+  _has_invalid_stacks = false;
+
+  _lookup_counter = _lookup_miss_counter = _lookup_collision_counter = _probe_counter = _probe_collision_counter = 0;
+
+  _size = (1 << 12);
+  assert(is_power_of_2(_size), "sanity");
+  _table = (CachedTraceStack **) os::malloc(_size * sizeof(_table[0]), mtEventTracing, CURRENT_PC);
+  memset(_table, 0, _size * sizeof(_table[0]));
+}
+
+TraceStackCache::~TraceStackCache() {
+  for (size_t i = 0; i < _size; i++) {
+    CachedTraceStack *p = _table[i];
+    while (p != NULL) {
+      CachedTraceStack *next = p->cache_next();
+      delete p;
+      p = next;
+    }
+  }
+  os::free(_table, mtEventTracing);
+}
+
+void TraceStackCache::add_for_rehash(CachedTraceStack *cts) {
+  const size_t mask = (_size - 1);
+  intptr_t index = cts->hash() & mask;
+  cts->set_cache_next(_table[index]);
+  _table[index] = cts;
+  _count++;
+}
+
+inline void TraceStackCache::update_counters_after_lookup(bool present, jlong probes, jlong collisions) {
+  if (EnableEventTracingDiagnostics) {
+    Atomic::inc_ptr(&_lookup_counter);
+    Atomic::add_ptr(probes, &_probe_counter);
+    if (!present) {
+      Atomic::inc_ptr(&_lookup_miss_counter);
+    }
+    if (collisions > 0) {
+      Atomic::inc_ptr(&_lookup_collision_counter);
+      Atomic::add_ptr(collisions, &_probe_collision_counter);
+    }
+  }
+}
+
+const CachedTraceStack * TraceStackCache::get_or_try_add(const CompositeTraceStack &ts, bool &known, TraceTypes::stack_id preallocated_id) {
+  jlong probes = 0, collisions = 0;
+
+  CachedTraceStack *created = NULL;
+
+  const size_t mask = (_size - 1);
+  const size_t index = ts.hash() & mask;
+  // XXX: probably need barriers here on non-x86
+  for(;;) {
+    CachedTraceStack *head = _table[index];
+    if (head == NULL) {
+      probes++;
+    }
+    CachedTraceStack *p = head;
+    while (p != NULL) {
+      probes++;
+      if (ts.hash() == p->hash()) {
+        if (ts.equals(p)) {
+          delete created;
+          known = true;
+          update_counters_after_lookup(true, probes, collisions);
+          return p;
+        } else {
+          collisions++;
+        }
+      }
+      p = p->cache_next();
+    }
+    // not found
+    if (created == NULL) {
+      TraceTypes::stack_id id = preallocated_id;
+      if (id == 0) {
+        id = _metadata->next_stack_id();
+      }
+      created = CachedTraceStack::create(id, ts);
+    }
+    created->set_cache_next(head);
+    if (Atomic::cmpxchg_ptr(created, &_table[index], head) == head) {
+      Atomic::inc_ptr(&_count);
+      known = false;
+      update_counters_after_lookup(false, probes, collisions);
+      return created;
+    }
+    // head of collision chain changed: walk the entire chain again in the next
+    // next iteration to check whether the stack trace has been inserted by
+    // another thread (head is not enough, multiple threads may have inserted)
+  }
+}
+
+class TraceStackCache::CachedTraceStackPredicate {
+public:
+  virtual bool test(CachedTraceStack *cts) = 0;
+};
+
+inline void TraceStackCache::purge_matches(TraceStackCache::CachedTraceStackPredicate *pr) {
+  if (EnableEventTracingDiagnostics) {
+    _purge_timer.start();
+  }
+
+  for (size_t i = 0; i < _size; i++) {
+    CachedTraceStack *p = _table[i];
+    while (p != NULL) {
+      if (p->is_valid() && pr->test(p)) {
+        p->invalidate();
+        _has_invalid_stacks = true;
+      }
+      p = p->cache_next();
+    }
+  }
+
+  if (EnableEventTracingDiagnostics) {
+    _purge_timer.stop();
+  }
+}
+
+class TraceStackCache::UnloadingClassPredicate : public TraceStackCache::CachedTraceStackPredicate {
+  const ClassLoaderData *_loader;
+public:
+  UnloadingClassPredicate(const ClassLoaderData *loader) : _loader(loader) { }
+  virtual bool test(CachedTraceStack *stack) {
+    return stack->has_interpreted_method_from_classloader(_loader);
+  }
+};
+
+void TraceStackCache::purge_unloading_classes(const ClassLoaderData* loader) {
+  assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "must be done in VM thread at safepoint");
+
+  // NOTE: only purges stack traces with *interpreted* frames that refer to
+  // unloading classes. nmethods with code from unloaded classes are unloaded
+  // in a later step, at which point we also unload the stack traces that
+  // contain those nmethods.
+  UnloadingClassPredicate pr(loader);
+  purge_matches(&pr);
+}
+
+class TraceStackCache::UnloadingNmethodPredicate : public TraceStackCache::CachedTraceStackPredicate {
+  const nmethod *_nmethod;
+public:
+  UnloadingNmethodPredicate(const nmethod *nm) : _nmethod(nm) { }
+  virtual bool test(CachedTraceStack *stack) {
+    return stack->has_nmethod(_nmethod);
+  }
+};
+
+void TraceStackCache::purge_unloading_nmethod(const nmethod *nm) {
+  UnloadingNmethodPredicate pr(nm);
+  purge_matches(&pr);
+}
+
+class TraceStackCache::AnyPredicate : public TraceStackCache::CachedTraceStackPredicate {
+public:
+  virtual bool test(CachedTraceStack *stack) {
+    return true;
+  }
+};
+
+void TraceStackCache::purge_all() {
+  AnyPredicate pr;
+  purge_matches(&pr);
+}
+
+void TraceStackCache::do_maintenance() {
+  assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "must be done in VM thread at safepoint");
+
+  bool should_grow = (_count / (float) _size > 0.7f);
+  if (should_grow || has_invalid_stacks()) {
+    if (EnableEventTracingDiagnostics) {
+      _maintenance_timer.start();
+    }
+
+    CachedTraceStack **old_table = _table;
+    size_t old_capacity = _size;
+
+    if (should_grow) {
+      _size <<= 1;
+      assert(is_power_of_2(_size), "sanity");
+    }
+    _count = 0;
+    _table = (CachedTraceStack **) os::malloc(_size * sizeof(_table[0]), mtEventTracing, CURRENT_PC);
+    memset(_table, 0, _size * sizeof(_table[0]));
+    for (size_t i = 0; i < old_capacity; i++) {
+      CachedTraceStack *p = old_table[i];
+      while (p != NULL) {
+        CachedTraceStack *next = p->cache_next();
+        if (p->is_valid()) {
+          add_for_rehash(p);
+        } else {
+          delete p;
+        }
+        p = next;
+      }
+    }
+    os::free(old_table, mtEventTracing);
+
+    if (EnableEventTracingDiagnostics) {
+      _maintenance_timer.stop();
+    }
+
+    _has_invalid_stacks = false;
+  }
+}
+
+void TraceStackCache::reset_stats() {
+  _lookup_counter = _lookup_miss_counter = _lookup_collision_counter = _probe_counter = _probe_collision_counter = 0;
+
+  // these are only used in a safepoint: we should be fine either way
+  _purge_timer.reset();
+  _maintenance_timer.reset();
+}
diff --git a/src/share/vm/evtrace/traceStack.hpp b/src/share/vm/evtrace/traceStack.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceStack.hpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACESTACK_HPP
+#define SHARE_VM_EVTRACE_TRACESTACK_HPP
+
+#include "evtrace/traceTypes.hpp"
+
+#include "memory/allocation.hpp"
+#include "runtime/timer.hpp"
+
+class vframeStreamCommon;
+class Method;
+class CachedTraceStack;
+class TraceMetadata;
+
+#define TRACE_STACK_MAX_FRAMES 128
+
+class CompositeTraceStack;
+
+struct TraceStackFrame {
+  bool is_compiled;
+  union {
+    struct {
+      nmethod *nm;
+      address  pc;
+    } compiled;
+    struct {
+      Method *method;
+      int     bci;
+    } interpreted;
+  };
+
+  intptr_t hash() const;
+  bool equals(const TraceStackFrame& other) const;
+};
+
+class TraceStackBuilder: StackObj {
+private:
+  TraceStackFrame _frames[TRACE_STACK_MAX_FRAMES];
+  size_t   _count;
+  bool     _truncated;
+  intptr_t _hash;
+
+  void add(const TraceStackFrame &f);
+
+public:
+  TraceStackBuilder();
+
+  void add_frame(const frame *fr);
+  void set_truncated() { _truncated = true; }
+
+  const TraceStackFrame *frame_at(size_t index) const;
+
+  size_t   count() const        { return _count;     }
+  bool     is_full() const      { return (_count == EventTracingStackDepthLimit); }
+  bool     is_truncated() const { return _truncated; }
+  intptr_t hash() const         { return _hash;      }
+
+  bool     range_equals(size_t offset, const CachedTraceStack *cts, size_t cts_offset, size_t count) const;
+};
+
+class CachedTraceStack: CHeapObj<mtEventTracing> {
+public:
+  static CachedTraceStack *create(TraceTypes::stack_id id, const CompositeTraceStack &ts);
+
+  const TraceStackFrame *frame_at(size_t index) const;
+
+  TraceTypes::stack_id  id() const { return _id;        }
+  size_t   count() const           { return _count;     }
+  bool     is_truncated() const    { return _truncated; }
+  intptr_t hash() const            { return _hash;      }
+
+  bool     has_interpreted_method_from_classloader(const ClassLoaderData *loader) const;
+  bool     has_nmethod(const nmethod *nm) const;
+  bool     range_equals(size_t offset, const CachedTraceStack *other, size_t other_offset, size_t count) const;
+
+  bool     is_valid() const { return _valid; }
+  void     invalidate();
+
+  CachedTraceStack *cache_next();
+  void     set_cache_next(CachedTraceStack *next);
+
+  void operator delete(void* p);
+
+private:
+  void* operator new(size_t size, size_t nframes) throw ();
+  CachedTraceStack(TraceTypes::stack_id id, const CompositeTraceStack &ts);
+
+  CachedTraceStack * volatile _cache_next;
+
+  const TraceTypes::stack_id  _id;
+  const intptr_t              _hash;
+  const size_t                _count;
+  const bool                  _truncated;
+  bool                        _valid;
+  TraceStackFrame             _frames[0];
+};
+
+class CompositeTraceStack: StackObj {
+private:
+  const TraceStackBuilder &_top;
+  const CachedTraceStack  *_bottom;
+  size_t    _bottom_offset;
+  intptr_t  _hash;
+  size_t    _count;
+  bool      _truncated;
+
+public:
+  CompositeTraceStack(TraceStackBuilder &top);
+
+  void set_bottom(const CachedTraceStack *cts, size_t offset);
+
+  bool equals(const CachedTraceStack *cts) const;
+  bool equals(const CompositeTraceStack &other) const;
+
+  const TraceStackFrame *frame_at(size_t index) const;
+
+  intptr_t hash() const         { return _hash;      }
+  size_t   count() const        { return _count;     }
+  bool     is_truncated() const { return _truncated; }
+};
+
+class TraceStackVframeIterator: StackObj {
+private:
+  const CompositeTraceStack &_ts;
+  int      _index;
+  int      _decode_offset;
+  Method  *_method;
+  int      _bci;
+
+  void fill_from_compiled_frame();
+
+public:
+  TraceStackVframeIterator(const CompositeTraceStack &ts);
+
+  bool has_next();
+  void next();
+  void reset();
+
+  Method *method() { return _method; }
+  int     bci()    { return _bci;    }
+};
+
+class TraceStackCache: public CHeapObj<mtEventTracing> {
+private:
+  TraceMetadata *_metadata;
+
+  CachedTraceStack **_table;
+  volatile intptr_t  _count;
+  size_t             _size;
+  volatile bool      _has_invalid_stacks;
+
+  // statistics
+  elapsedTimer   _purge_timer;
+  elapsedTimer   _maintenance_timer;
+  volatile jlong _lookup_counter;
+  volatile jlong _lookup_miss_counter;
+  volatile jlong _lookup_collision_counter;
+  volatile jlong _probe_counter;
+  volatile jlong _probe_collision_counter;
+
+  void add_for_rehash(CachedTraceStack *cts);
+
+  class CachedTraceStackPredicate;
+  class UnloadingClassPredicate;
+  class UnloadingNmethodPredicate;
+  class AnyPredicate;
+  void purge_matches(CachedTraceStackPredicate *pr);
+
+  void update_counters_after_lookup(bool present, jlong probes, jlong collisions);
+
+public:
+  TraceStackCache(TraceMetadata *tm);
+  virtual ~TraceStackCache();
+
+  const CachedTraceStack * get_or_try_add(const CompositeTraceStack &ts, bool &known, TraceTypes::stack_id preallocated_id = 0);
+
+  void purge_unloading_classes(const ClassLoaderData *loader);
+  void purge_unloading_nmethod(const nmethod *nm);
+  void purge_all();
+
+  bool has_invalid_stacks() const { return _has_invalid_stacks; }
+  void do_maintenance();
+
+  jlong lookups()            { return _lookup_counter;                   }
+  jlong lookup_misses()      { return _lookup_miss_counter;              }
+  jlong lookup_collisions()  { return _lookup_collision_counter;         }
+  jlong probes()             { return _probe_counter;                    }
+  jlong probe_collisions()   { return _probe_collision_counter;          }
+  jlong purge_millis()       { return _purge_timer.milliseconds();       }
+  jlong maintenance_millis() { return _maintenance_timer.milliseconds(); }
+  void reset_stats();
+};
+
+#include "evtrace/traceStack.inline.hpp"
+
+#endif /* SHARE_VM_EVTRACE_TRACESTACK_HPP */
diff --git a/src/share/vm/evtrace/traceStack.inline.hpp b/src/share/vm/evtrace/traceStack.inline.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceStack.inline.hpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACESTACK_INLINE_HPP
+#define SHARE_VM_EVTRACE_TRACESTACK_INLINE_HPP
+
+#include "runtime/vframe.hpp"
+#include "services/memTracker.hpp"
+
+//
+// TraceStackFrame
+//
+
+inline intptr_t TraceStackFrame::hash() const {
+  intptr_t h = 0;
+  if (is_compiled) {
+    h = (intptr_t) compiled.pc;
+  } else {
+    h = 31 * (intptr_t) interpreted.method + interpreted.bci;
+  }
+  h = 31 * h + (is_compiled ? 1 : 0);
+  return h;
+}
+
+inline bool TraceStackFrame::equals(const TraceStackFrame &other) const {
+  bool eql = false;
+  if (is_compiled == other.is_compiled) {
+    if (is_compiled) {
+      eql = (compiled.pc == other.compiled.pc);
+      assert(!eql || compiled.nm == other.compiled.nm, "sanity");
+    } else {
+      eql = (interpreted.method == other.interpreted.method && interpreted.bci == other.interpreted.bci);
+    }
+  }
+  return eql;
+}
+
+//
+// TraceStackBuilder
+//
+
+inline TraceStackBuilder::TraceStackBuilder() {
+  assert(EventTracingStackDepthLimit <= TRACE_STACK_MAX_FRAMES, "stack depth limit too high");
+  _hash  = 0;
+  _count = 0;
+  _truncated = false;
+}
+
+inline const TraceStackFrame* TraceStackBuilder::frame_at(size_t index) const {
+  assert(index < _count, "range");
+  return &_frames[index];
+}
+
+//
+// CachedTraceStack
+//
+
+inline const TraceStackFrame *CachedTraceStack::frame_at(size_t index) const {
+  assert(index < _count, "range");
+  return &_frames[index];
+}
+
+inline void CachedTraceStack::invalidate() {
+  assert(_valid, "only once");
+  _valid = false;
+}
+
+inline CachedTraceStack *CachedTraceStack::cache_next() {
+  return _cache_next;
+}
+
+inline void CachedTraceStack::set_cache_next(CachedTraceStack *next) {
+  _cache_next = next;
+}
+
+//
+// CompositeTraceStack
+//
+
+inline CompositeTraceStack::CompositeTraceStack(TraceStackBuilder& top)
+: _top(top)
+{
+  set_bottom(NULL, 0);
+}
+
+inline const TraceStackFrame *CompositeTraceStack::frame_at(size_t index) const {
+  assert (index < count(), "range");
+  if (index < _top.count()) {
+    return _top.frame_at(index);
+  }
+  return _bottom->frame_at(index - _top.count() + _bottom_offset);
+}
+
+//
+// TraceStackVframeIterator
+//
+
+inline TraceStackVframeIterator::TraceStackVframeIterator(const CompositeTraceStack& ts)
+: _ts(ts)
+{
+  reset();
+}
+
+inline bool TraceStackVframeIterator::has_next() {
+  return (_decode_offset == DebugInformationRecorder::serialized_null && _index + 1 == (int)_ts.count());
+}
+
+inline void TraceStackVframeIterator::fill_from_compiled_frame() {
+  assert(_decode_offset != DebugInformationRecorder::serialized_null, "sanity");
+
+  const TraceStackFrame *frame = _ts.frame_at(_index);
+  assert(frame->is_compiled, "sanity");
+
+  DebugInfoReadStream buffer(frame->compiled.nm, _decode_offset);
+  int sender_offset = buffer.read_int();
+  _method           = buffer.read_method();
+  _bci              = buffer.read_bci();
+
+  _decode_offset = sender_offset;
+}
+
+inline void TraceStackVframeIterator::next() {
+  assert(!has_next(), "at end");
+  if (_decode_offset != DebugInformationRecorder::serialized_null) {
+    fill_from_compiled_frame();
+  } else {
+    _index++;
+    const TraceStackFrame *frame = _ts.frame_at(_index);
+    if (frame->is_compiled) {
+      nmethod *nm = frame->compiled.nm;
+      // Use pc_desc_near() because extra (non-safepoint) debug information is typically shared
+      // by several instructions and only emitted at the last instruction. Note that this COULD
+      // lead to wrong stack traces when there isn't any debug information for the current location
+      // and we pick up that of some other location which might even have different inlining.
+      // This should rarely occur and we have modified C1 and C2 so at least it shouldn't happen
+      // for monitorexit.
+      PcDesc *desc = nm->pc_desc_near(frame->compiled.pc);
+      if (!nm->is_native_method() && desc != NULL && desc->scope_decode_offset() != DebugInformationRecorder::serialized_null) {
+        _decode_offset = desc->scope_decode_offset();
+        fill_from_compiled_frame();
+      } else {
+        // Either this is a native method or we don't have a valid PcDesc. The latter can happen
+        // when the compiler did not emit debug information for the current location. This occurs
+        // for the instructions to exit the monitor in return and exception/unwind handlers of
+        // synchronized methods (which don't have an explicit monitorexit bytecode instruction).
+        // This does not seem to occur with inlined synchronized methods, so we just report the
+        // top-level method without a bci for this frame.
+        _method        = nm->method();
+        _bci           = 0;
+        _decode_offset = DebugInformationRecorder::serialized_null;
+      }
+    } else {
+      _method        = frame->interpreted.method;
+      _bci           = frame->interpreted.bci;
+      _decode_offset = DebugInformationRecorder::serialized_null;
+    }
+  }
+}
+
+inline void TraceStackVframeIterator::reset() {
+  _index = -1;
+  _decode_offset = DebugInformationRecorder::serialized_null;
+  _method = NULL;
+  _bci = -1;
+}
+
+#endif /* SHARE_VM_EVTRACE_TRACESTACK_INLINE_HPP */
diff --git a/src/share/vm/evtrace/traceTypes.hpp b/src/share/vm/evtrace/traceTypes.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceTypes.hpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACETYPES_HPP
+#define SHARE_VM_EVTRACE_TRACETYPES_HPP
+
+#include "utilities/globalDefinitions.hpp"
+
+#define TRACE_TYPES_DO(fun)         \
+          fun(u1, event_type      ) \
+          fun(s8, timestamp       ) \
+          fun(s8, seq_num         ) \
+          fun(s8, thread_id       ) \
+          fun(s4, thread_state    ) \
+          fun(s4, object_id       ) \
+          fun(s8, objmonitor_id   ) \
+          fun(s8, classloader_id  ) \
+          fun(s8, class_id        ) \
+          fun(s8, method_id       ) \
+          fun(s4, method_bci      ) \
+          fun(s8, stack_id        )
+
+// Scalar trace stream types
+class TraceTypes {
+public:
+
+#define TRACE_TYPE_DECLARE(primitive, name) \
+  typedef primitive name;
+TRACE_TYPES_DO(TRACE_TYPE_DECLARE)
+#undef TRACE_TYPE_DECLARE
+
+  enum park_return_code {
+    _park_return_code_min = -1,
+    park_normal,
+    park_timedout,
+    park_immediate_fast,
+    park_interrupted_fast,
+    park_no_wait_time,
+    park_interrupted_slow,
+    park_locked,
+    park_immediate_slow,
+    park_unknown,
+    _park_return_code_max,
+  };
+
+  enum safepoint_reason {
+    _safepoint_reason_min = -1,
+    safepoint_periodic,
+    safepoint_for_vm_op,
+    _safepoint_reason_max,
+  };
+
+  enum monitor_enter_wait {
+    _monitor_enter_wait_min = -1,
+    enter_no_wait,
+    enter_after_wait_notify,
+    enter_after_wait_timeout,
+    enter_after_wait_other, // interrupt or spurious
+    _monitor_enter_wait_max,
+  };
+
+  enum monitor_entered_flags {
+    _monitor_entered_flags_min = -1,
+    entered_flags_none = 0,
+    entered_queued = (1 << 0),
+    entered_parked = (1 << 1),
+    _monitor_entered_flags_max,
+  };
+
+  enum event {
+    _event_min = -1,
+    event_thread_start,
+    event_thread_name_change,
+    event_thread_state_change,
+    event_thread_interrupt,
+    event_thread_exit,
+    event_thread_park_begin,
+    event_thread_park_end,
+    event_thread_unpark,
+    event_monitor_inflate,
+    event_monitor_deflate,
+    event_monitor_contended_enter,
+    event_monitor_contended_entered,
+    event_monitor_contended_exited,
+    event_monitor_dummy,
+    event_class_metadata,
+    event_method_metadata,
+    event_stack_metadata,
+    event_identical_stacks_metadata,
+    event_class_loader_unload,
+    event_safepoint_begin,
+    event_safepoint_end,
+    event_vm_end,
+    event_metadata_reset,
+    event_group,
+    _event_max,
+  };
+
+protected:
+  TraceTypes() { }
+};
+
+#endif /* SHARE_VM_EVTRACE_TRACETYPES_HPP */
diff --git a/src/share/vm/evtrace/traceWriter.hpp b/src/share/vm/evtrace/traceWriter.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceWriter.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEWRITER_H
+#define SHARE_VM_EVTRACE_TRACEWRITER_H
+
+#include "memory/allocation.hpp"
+#include "memory/gcLocker.hpp"
+
+class TraceWriterBase: public StackObj {
+private:
+  u1  *_writer_top;
+  No_Safepoint_Verifier _nsv;
+
+  void assert_reserved(size_t nbytes);
+  void reserve(size_t nbytes);
+
+protected:
+  TraceWriterBase(size_t nbytes);
+  ~TraceWriterBase();
+
+public:
+  void put_u1(u1 v);
+  void put_u2(u2 v);
+  void put_u4(u4 v);
+  void put_u8(u8 v);
+  void put_s4(s4 v);
+  void put_s8(s8 v);
+  void put_data(void *data, size_t length);
+  void put_utf8str(const char *s, size_t bytes);
+  static size_t nbytes_for_utf8str(const char *s, size_t maxbytes);
+};
+
+class TraceWriter: public TraceWriterBase {
+public:
+  TraceWriter(size_t nbytes) : TraceWriterBase(nbytes) { }
+
+#define TRACE_TYPE_DEFINE_PUT_METHOD(primitive, name) \
+  void put_##name(TraceTypes::name val) { put_##primitive(val); }
+TRACE_TYPES_DO(TRACE_TYPE_DEFINE_PUT_METHOD)
+#undef TRACE_TYPE_DEFINE_PUT_METHOD
+};
+
+#include "evtrace/traceWriter.inline.hpp"
+
+#endif /* SHARE_VM_EVTRACE_TRACEWRITER_H */
diff --git a/src/share/vm/evtrace/traceWriter.inline.hpp b/src/share/vm/evtrace/traceWriter.inline.hpp
new file mode 100644
--- /dev/null
+++ b/src/share/vm/evtrace/traceWriter.inline.hpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2014, 2015, Dynatrace and/or its affiliates. All rights reserved.
+ *
+ * This file is part of the Lock Contention Tracing Subsystem for the HotSpot
+ * Virtual Machine, which is developed at Christian Doppler Laboratory on
+ * Monitoring and Evolution of Very-Large-Scale Software Systems. Please
+ * contact us at <http://mevss.jku.at/> if you need additional information
+ * or have any questions.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef SHARE_VM_EVTRACE_TRACEWRITER_INLINE_HPP
+#define SHARE_VM_EVTRACE_TRACEWRITER_INLINE_HPP
+
+#include "evtrace/traceBuffer.hpp"
+#include "evtrace/traceManager.hpp"
+
+//
+// TraceWriterBase
+//
+
+inline TraceWriterBase::TraceWriterBase(size_t nbytes)
+: _writer_top(NULL),
+  _nsv(true, false)
+{
+  assert(!SafepointSynchronize::is_at_safepoint() || Thread::current()->is_VM_thread(),
+      "only the VM thread may write events during a safepoint");
+  assert(!Thread::current()->trace_active(), "tracing already in use");
+  debug_only(Thread::current()->toggle_trace_active();)
+  reserve(nbytes);
+}
+
+inline TraceWriterBase::~TraceWriterBase() {
+  assert(_writer_top == NULL || Thread::current()->trace_buffer() == NULL
+      || _writer_top == Thread::current()->trace_buffer()->top,
+      "must have used up reserved space");
+  assert(Thread::current()->trace_active(), "tracing must be active");
+  debug_only(Thread::current()->toggle_trace_active();)
+}
+
+inline void TraceWriterBase::assert_reserved(size_t nbytes) {
+  assert(_writer_top != NULL, "no space reserved");
+  assert(Thread::current()->trace_buffer() != NULL, "thread has no trace buffer");
+  assert(_writer_top + nbytes <= Thread::current()->trace_buffer()->top, "not enough space reserved");
+}
+
+inline void TraceWriterBase::reserve(size_t nbytes) {
+  if (!TraceManager::is_initialized()) {
+    assert(_writer_top == NULL, "have pointer into buffer, but tracing is not initialized");
+    return;
+  }
+
+  assert(_writer_top == NULL || Thread::current()->trace_buffer() == NULL
+      || _writer_top == Thread::current()->trace_buffer()->top,
+      "must finish writing before reserving more space");
+
+  // NOTE: our No_Safepoint_Verifier ensures that no safepoint can happen
+  // between submitting/requesting a buffer and setting it as the thread's
+  // trace buffer
+
+  TraceBuffer *buf = Thread::current()->trace_buffer();
+  if (buf != NULL) {
+    _writer_top = buf->top;
+    if (!buf->reserve(nbytes)) {
+      // does not fit
+      _writer_top = NULL;
+      TraceManager::submit_buffer(Thread::current()->trace_buffer());
+      Thread::current()->set_trace_buffer(NULL);
+      buf = NULL;
+    }
+  }
+  if (buf == NULL && TraceManager::is_initialized()) {
+    buf = TraceManager::request_buffer();
+    buf->owner = Thread::current();
+    _writer_top = buf->top;
+    guarantee(buf->reserve(nbytes), "newly requested buffer does not fit the event");
+    Thread::current()->set_trace_buffer(buf);
+  }
+}
+
+inline void TraceWriterBase::put_u1(u1 v) {
+  assert_reserved(1);
+  *_writer_top++ = v;
+}
+
+inline void TraceWriterBase::put_u2(u2 v) {
+  assert_reserved(2);
+  *(u2 *) _writer_top = v;
+  _writer_top += 2;
+}
+
+inline void TraceWriterBase::put_u4(u4 v) {
+  assert_reserved(4);
+  *(u4 *) _writer_top = v;
+  _writer_top += 4;
+}
+
+inline void TraceWriterBase::put_u8(u8 v) {
+  assert_reserved(8);
+  *(u8 *) _writer_top = v;
+  _writer_top += 8;
+}
+
+inline void TraceWriterBase::put_s4(s4 v) {
+  assert_reserved(4);
+  *(s4 *) _writer_top = v;
+  _writer_top += 4;
+}
+
+inline void TraceWriterBase::put_s8(s8 v) {
+  assert_reserved(8);
+  *(s8 *) _writer_top = v;
+  _writer_top += 8;
+}
+
+inline void TraceWriterBase::put_data(void *data, size_t length) {
+  assert_reserved(length);
+  memcpy(_writer_top, data, length);
+  _writer_top += length;
+}
+
+inline void TraceWriterBase::put_utf8str(const char *s, size_t bytes) {
+  assert_reserved(bytes);
+
+  assert(bytes > 2, "invalid length");
+  bytes -= 2;
+  assert(bytes == (size_t) ((s2) bytes), "string length must fit in u2");
+  *(s2 *) _writer_top = (s2) bytes;
+  _writer_top += 2;
+  if (s != NULL) {
+    memcpy(_writer_top, s, bytes);
+    _writer_top += bytes;
+  }
+}
+
+inline size_t TraceWriterBase::nbytes_for_utf8str(const char* s, size_t maxbytes) {
+  assert(s != NULL, "null string");
+
+  size_t nbytes = 0;
+  if (s != NULL) {
+    jchar c;
+    const char *end = s + maxbytes;
+    for (;;) {
+      const char *q = s;
+      s = UTF8::next(s, &c);
+      if (c == 0 || s >= end) {
+        break;
+      }
+      nbytes += (size_t) (s - q);
+    }
+    assert(nbytes == (u2) nbytes, "string length must fit in u2");
+  }
+  nbytes += 2; // for u2 with length
+  return nbytes;
+}
+
+#endif /* SHARE_VM_EVTRACE_TRACEWRITER_INLINE_HPP */
diff --git a/src/share/vm/memory/allocation.hpp b/src/share/vm/memory/allocation.hpp
--- a/src/share/vm/memory/allocation.hpp
+++ b/src/share/vm/memory/allocation.hpp
@@ -154,8 +154,9 @@
   mtChunk             = 0x0C,  // chunk that holds content of arenas
   mtTest              = 0x0D,  // Test type for verifying NMT
   mtTracing           = 0x0E,  // memory used for Tracing
-  mtNone              = 0x0F,  // undefined
-  mt_number_of_types  = 0x10   // number of memory types (mtDontTrack
+  mtEventTracing      = 0x0F,  // memory used for event tracing
+  mtNone              = 0x10,  // undefined
+  mt_number_of_types  = 0x11   // number of memory types (mtDontTrack
                                  // is not included as validate type)
 };
 
diff --git a/src/share/vm/memory/gcLocker.hpp b/src/share/vm/memory/gcLocker.hpp
--- a/src/share/vm/memory/gcLocker.hpp
+++ b/src/share/vm/memory/gcLocker.hpp
@@ -224,22 +224,36 @@
 #ifdef ASSERT
   No_Safepoint_Verifier(bool activated = true, bool verifygc = true ) :
     No_GC_Verifier(verifygc),
-    _activated(activated) {
+    _activated(false) {
     _thread = Thread::current();
-    if (_activated) {
-      _thread->_allow_allocation_count++;
-      _thread->_allow_safepoint_count++;
+    if (activated) {
+      enable();
     }
   }
 
+  void enable() {
+    assert(!_activated, "expected");
+    _thread->_allow_allocation_count++;
+    _thread->_allow_safepoint_count++;
+    _activated = true;
+  }
+
+  void disable() {
+    assert(_activated, "expected");
+    _thread->_allow_allocation_count--;
+    _thread->_allow_safepoint_count--;
+    _activated = false;
+  }
+
   ~No_Safepoint_Verifier() {
     if (_activated) {
-      _thread->_allow_allocation_count--;
-      _thread->_allow_safepoint_count--;
+      disable();
     }
   }
 #else
   No_Safepoint_Verifier(bool activated = true, bool verifygc = true) : No_GC_Verifier(verifygc){}
+  void enable() {}
+  void disable() {}
   ~No_Safepoint_Verifier() {}
 #endif
 };
diff --git a/src/share/vm/oops/klass.cpp b/src/share/vm/oops/klass.cpp
--- a/src/share/vm/oops/klass.cpp
+++ b/src/share/vm/oops/klass.cpp
@@ -185,6 +185,7 @@
   set_next_sibling(NULL);
   set_next_link(NULL);
   TRACE_INIT_ID(this);
+  EVTRACE_INIT_TRACKED_CLASS;
 
   set_prototype_header(markOopDesc::prototype());
   set_biased_lock_revocation_count(0);
diff --git a/src/share/vm/oops/klass.hpp b/src/share/vm/oops/klass.hpp
--- a/src/share/vm/oops/klass.hpp
+++ b/src/share/vm/oops/klass.hpp
@@ -33,6 +33,7 @@
 #include "oops/metadata.hpp"
 #include "oops/oop.hpp"
 #include "trace/traceMacros.hpp"
+#include "evtrace/traceMacros.hpp"
 #include "utilities/accessFlags.hpp"
 #include "utilities/macros.hpp"
 #if INCLUDE_ALL_GCS
@@ -172,6 +173,8 @@
 
   TRACE_DEFINE_KLASS_TRACE_ID;
 
+  EVTRACE_DECLARE_TRACKED_CLASS_FIELDS;
+
   // Remembered sets support for the oops in the klasses.
   jbyte _modified_oops;             // Card Table Equivalent (YC/CMS support)
   jbyte _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
@@ -596,6 +599,8 @@
 
   TRACE_DEFINE_KLASS_METHODS;
 
+  EVTRACE_DEFINE_TRACKED_CLASS_METHODS;
+
   // garbage collection support
   virtual void oops_do(OopClosure* cl);
 
diff --git a/src/share/vm/oops/method.cpp b/src/share/vm/oops/method.cpp
--- a/src/share/vm/oops/method.cpp
+++ b/src/share/vm/oops/method.cpp
@@ -107,6 +107,8 @@
   }
 
   NOT_PRODUCT(set_compiled_invocation_count(0);)
+
+  EVTRACE_INIT_TRACKED_CLASS;
 }
 
 // Release Method*.  The nmethod will be gone when we get here because
diff --git a/src/share/vm/oops/method.hpp b/src/share/vm/oops/method.hpp
--- a/src/share/vm/oops/method.hpp
+++ b/src/share/vm/oops/method.hpp
@@ -37,6 +37,7 @@
 #include "oops/typeArrayOop.hpp"
 #include "utilities/accessFlags.hpp"
 #include "utilities/growableArray.hpp"
+#include "evtrace/traceMacros.hpp"
 
 // A Method* represents a Java method.
 //
@@ -117,6 +118,8 @@
                     _dont_inline      : 1,
                                       : 3;
 
+  EVTRACE_DECLARE_TRACKED_CLASS_FIELDS;
+
 #ifndef PRODUCT
   int               _compiled_invocation_count;  // Number of nmethod invocations so far (for perf. debugging)
 #endif
@@ -786,6 +789,9 @@
   void set_dont_inline(bool x)      {        _dont_inline = x;      }
   bool  is_hidden()                 { return _hidden;               }
   void set_hidden(bool x)           {        _hidden = x;           }
+
+  EVTRACE_DEFINE_TRACKED_CLASS_METHODS;
+
   ConstMethod::MethodType method_type() const {
       return _constMethod->method_type();
   }
diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp
--- a/src/share/vm/opto/macro.cpp
+++ b/src/share/vm/opto/macro.cpp
@@ -2388,7 +2388,7 @@
   // Optimize test; set region slot 2
   Node *slow_path = opt_bits_test(ctrl, region, 2, funlock, 0, 0);
 
-  CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box );
+  CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), OptoRuntime::complete_monitor_unlocking_wrapper()->entry_point(), "complete_monitor_unlocking_wrapper", slow_path, obj, box );
 
   extract_call_projections(call);
 
diff --git a/src/share/vm/opto/output.cpp b/src/share/vm/opto/output.cpp
--- a/src/share/vm/opto/output.cpp
+++ b/src/share/vm/opto/output.cpp
@@ -984,7 +984,8 @@
 class NonSafepointEmitter {
   Compile*  C;
   JVMState* _pending_jvms;
-  int       _pending_offset;
+  int       _pending_begin_offset;
+  int       _pending_end_offset;
 
   void emit_non_safepoint();
 
@@ -992,7 +993,8 @@
   NonSafepointEmitter(Compile* compile) {
     this->C = compile;
     _pending_jvms = NULL;
-    _pending_offset = 0;
+    _pending_begin_offset = 0;
+    _pending_end_offset = 0;
   }
 
   void observe_instruction(Node* n, int pc_offset) {
@@ -1003,17 +1005,22 @@
     if (_pending_jvms != NULL &&
         _pending_jvms->same_calls_as(nn->jvms())) {
       // Repeated JVMS?  Stretch it up here.
-      _pending_offset = pc_offset;
+      _pending_end_offset = pc_offset;
     } else {
-      if (_pending_jvms != NULL &&
-          _pending_offset < pc_offset) {
-        emit_non_safepoint();
+      if (_pending_jvms != NULL) {
+        if (_pending_end_offset < pc_offset) {
+          emit_non_safepoint();
+        } else if (_pending_begin_offset < pc_offset) {
+          // stretch as far as possible before the conflict
+          _pending_end_offset = pc_offset - 1;
+          emit_non_safepoint();
+        }
       }
       _pending_jvms = NULL;
       if (pc_offset > C->debug_info()->last_pc_offset()) {
         // This is the only way _pending_jvms can become non-NULL:
         _pending_jvms = nn->jvms();
-        _pending_offset = pc_offset;
+        _pending_begin_offset = _pending_end_offset = pc_offset;
       }
     }
   }
@@ -1022,7 +1029,7 @@
   void observe_safepoint(JVMState* jvms, int pc_offset) {
     if (_pending_jvms != NULL &&
         !_pending_jvms->same_calls_as(jvms) &&
-        _pending_offset < pc_offset) {
+        _pending_end_offset < pc_offset) {
       emit_non_safepoint();
     }
     _pending_jvms = NULL;
@@ -1038,7 +1045,7 @@
 
 void NonSafepointEmitter::emit_non_safepoint() {
   JVMState* youngest_jvms = _pending_jvms;
-  int       pc_offset     = _pending_offset;
+  int       pc_offset     = _pending_end_offset;
 
   // Clear it now:
   _pending_jvms = NULL;
diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp
--- a/src/share/vm/opto/runtime.cpp
+++ b/src/share/vm/opto/runtime.cpp
@@ -127,6 +127,8 @@
 
 ExceptionBlob* OptoRuntime::_exception_blob;
 
+RuntimeStub* OptoRuntime::_complete_monitor_unlocking_wrapper;
+
 // This should be called in an assertion at the start of OptoRuntime routines
 // which are entered from compiled code (all of them)
 #ifdef ASSERT
@@ -148,6 +150,8 @@
 
   generate_exception_blob();
 
+  generate_complete_monitor_unlocking_wrapper();
+
   // Note: tls: Means fetching the return oop out of the thread-local storage
   //
   //   variable/name                       type-function-gen              , runtime method                  ,fncy_jp, tls,save_args,retpc
diff --git a/src/share/vm/opto/runtime.hpp b/src/share/vm/opto/runtime.hpp
--- a/src/share/vm/opto/runtime.hpp
+++ b/src/share/vm/opto/runtime.hpp
@@ -198,6 +198,9 @@
   static ExceptionBlob*       _exception_blob;
   static void generate_exception_blob();
 
+  static RuntimeStub*         _complete_monitor_unlocking_wrapper;
+  static void generate_complete_monitor_unlocking_wrapper();
+
   static void register_finalizer(oopDesc* obj, JavaThread* thread);
 
   // zaping dead locals, either from Java frames or from native frames
@@ -252,6 +255,8 @@
 
   static ExceptionBlob*    exception_blob()                      { return _exception_blob; }
 
+  static RuntimeStub*      complete_monitor_unlocking_wrapper()  { return _complete_monitor_unlocking_wrapper; }
+
   // Leaf routines helping with method data update
   static void profile_receiver_type_C(DataLayout* data, oopDesc* receiver);
 
diff --git a/src/share/vm/prims/forte.cpp b/src/share/vm/prims/forte.cpp
--- a/src/share/vm/prims/forte.cpp
+++ b/src/share/vm/prims/forte.cpp
@@ -179,7 +179,7 @@
 
   // This PcDesc is useful however we must adjust the frame's pc
   // so that the vframeStream lookups will use this same pc
-  fr->set_pc(pc_desc->real_pc(nm));
+  fr->set_pc(thread, pc_desc->real_pc(nm));
   return true;
 }
 
diff --git a/src/share/vm/prims/jni.cpp b/src/share/vm/prims/jni.cpp
--- a/src/share/vm/prims/jni.cpp
+++ b/src/share/vm/prims/jni.cpp
@@ -76,6 +76,7 @@
 #include "services/memTracker.hpp"
 #include "services/runtimeService.hpp"
 #include "trace/tracing.hpp"
+#include "evtrace/traceEvents.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
@@ -5208,6 +5209,10 @@
     // Tracks the time application was running before GC
     RuntimeService::record_application_start();
 
+    if (EnableEventTracing) {
+      TraceEvents::write_thread_start();
+    }
+
     // Notify JVMTI
     if (JvmtiExport::should_post_thread_life()) {
        JvmtiExport::post_thread_start(thread);
@@ -5423,6 +5428,10 @@
   java_lang_Thread::set_thread_status(thread->threadObj(),
               java_lang_Thread::RUNNABLE);
 
+  if (EnableEventTracing) {
+    TraceEvents::write_thread_start();
+  }
+
   // Notify the debugger
   if (JvmtiExport::should_post_thread_life()) {
     JvmtiExport::post_thread_start(thread);
@@ -5517,6 +5526,10 @@
   // middel of a safepoint operation
   ThreadStateTransition::transition_from_native(thread, _thread_in_vm);
 
+  if (EnableEventTracing) {
+    TraceEvents::write_thread_exit();
+  }
+
   // XXX: Note that JavaThread::exit() call below removes the guards on the
   // stack pages set up via enable_stack_{red,yellow}_zone() calls
   // above in jni_AttachCurrentThread. Unfortunately, while the setting
diff --git a/src/share/vm/prims/jvm.cpp b/src/share/vm/prims/jvm.cpp
--- a/src/share/vm/prims/jvm.cpp
+++ b/src/share/vm/prims/jvm.cpp
@@ -66,6 +66,7 @@
 #include "services/management.hpp"
 #include "services/threadService.hpp"
 #include "trace/tracing.hpp"
+#include "evtrace/traceEvents.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/dtrace.hpp"
@@ -3345,6 +3346,9 @@
   ResourceMark rm(THREAD);
   oop java_thread = JNIHandles::resolve_non_null(jthread);
   JavaThread* thr = java_lang_Thread::thread(java_thread);
+  if (EnableEventTracing) {
+    TraceEvents::write_thread_name_change(thr);
+  }
   // Thread naming only supported for the current thread, doesn't work for
   // target threads.
   if (Thread::current() == thr && !thr->has_attached_via_jni()) {
diff --git a/src/share/vm/prims/nativeLookup.cpp b/src/share/vm/prims/nativeLookup.cpp
--- a/src/share/vm/prims/nativeLookup.cpp
+++ b/src/share/vm/prims/nativeLookup.cpp
@@ -126,6 +126,7 @@
   void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls);
   void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass);
   void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass);
+  void JNICALL JVM_RegisterEventTracingMethods(JNIEnv *env, jclass evtclass);
 }
 
 #define CC (char*)  /* cast a literal from (const char*) */
@@ -136,6 +137,7 @@
   { CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) },
   { CC"Java_sun_misc_Perf_registerNatives",                        NULL, FN_PTR(JVM_RegisterPerfMethods)         },
   { CC"Java_sun_hotspot_WhiteBox_registerNatives",                 NULL, FN_PTR(JVM_RegisterWhiteBoxMethods)     },
+  { CC"Java_sun_evtracing_EventTracing_registerNatives",           NULL, FN_PTR(JVM_RegisterEventTracingMethods) },
 };
 
 static address lookup_special_native(char* jni_name) {
diff --git a/src/share/vm/prims/unsafe.cpp b/src/share/vm/prims/unsafe.cpp
--- a/src/share/vm/prims/unsafe.cpp
+++ b/src/share/vm/prims/unsafe.cpp
@@ -1252,9 +1252,12 @@
 UNSAFE_ENTRY(void, Unsafe_Unpark(JNIEnv *env, jobject unsafe, jobject jthread))
   UnsafeWrapper("Unsafe_Unpark");
   Parker* p = NULL;
+  oop java_thread = NULL;
+  JavaThread* thr = NULL;
   if (jthread != NULL) {
-    oop java_thread = JNIHandles::resolve_non_null(jthread);
+    java_thread = JNIHandles::resolve_non_null(jthread);
     if (java_thread != NULL) {
+      thr = java_lang_Thread::thread(java_thread);
       jlong lp = java_lang_Thread::park_event(java_thread);
       if (lp != 0) {
         // This cast is OK even though the jlong might have been read
@@ -1266,7 +1269,7 @@
         MutexLocker mu(Threads_lock);
         java_thread = JNIHandles::resolve_non_null(jthread);
         if (java_thread != NULL) {
-          JavaThread* thr = java_lang_Thread::thread(java_thread);
+          thr = java_lang_Thread::thread(java_thread);
           if (thr != NULL) {
             p = thr->parker();
             if (p != NULL) { // Bind to Java thread for next time.
@@ -1277,14 +1280,14 @@
       }
     }
   }
-  if (p != NULL) {
+  if (p != NULL && thr != NULL) {
 #ifndef USDT2
     HS_DTRACE_PROBE1(hotspot, thread__unpark, p);
 #else /* USDT2 */
     HOTSPOT_THREAD_UNPARK(
                           (uintptr_t) p);
 #endif /* USDT2 */
-    p->unpark();
+    p->unpark(thr);
   }
 UNSAFE_END
 
diff --git a/src/share/vm/runtime/deoptimization.cpp b/src/share/vm/runtime/deoptimization.cpp
--- a/src/share/vm/runtime/deoptimization.cpp
+++ b/src/share/vm/runtime/deoptimization.cpp
@@ -484,7 +484,7 @@
   // since the frame will "magically" show the original pc before the deopt
   // and we'd undo the deopt.
 
-  frame_pcs[0] = deopt_sender.raw_pc();
+  frame_pcs[0] = deopt_sender.raw_pc(thread);
 
 #ifndef SHARK
   assert(CodeCache::find_blob_unsafe(frame_pcs[0]) != NULL, "bad pc");
diff --git a/src/share/vm/runtime/frame.cpp b/src/share/vm/runtime/frame.cpp
--- a/src/share/vm/runtime/frame.cpp
+++ b/src/share/vm/runtime/frame.cpp
@@ -144,28 +144,40 @@
 // hardware would want to see in the native frame. The only user (at this point)
 // is deoptimization. It likely no one else should ever use it.
 
-address frame::raw_pc() const {
-  if (is_deoptimized_frame()) {
-    nmethod* nm = cb()->as_nmethod_or_null();
-    if (nm->is_method_handle_return(pc()))
-      return nm->deopt_mh_handler_begin() - pc_return_offset;
-    else
-      return nm->deopt_handler_begin() - pc_return_offset;
+address frame::raw_pc(Thread* thread) const {
+  // On Intel the return_address is always the word on the stack
+  address ret_pc = *(address*)(sp() - 1);
+  if (SharedRuntime::is_memento_stack_trace_return_handler(ret_pc)) {
+    assert(thread->memento_original_return_address() != NULL, "memento original return address must be set if patched");
+    return ret_pc;
   } else {
-    return (pc() - pc_return_offset);
+    if (is_deoptimized_frame()) {
+      nmethod* nm = cb()->as_nmethod_or_null();
+      if (nm->is_method_handle_return(pc()))
+        return nm->deopt_mh_handler_begin() - pc_return_offset;
+      else
+        return nm->deopt_handler_begin() - pc_return_offset;
+    } else {
+      return (pc() - pc_return_offset);
+    }
   }
 }
 
 // Change the pc in a frame object. This does not change the actual pc in
 // actual frame. To do that use patch_pc.
 //
-void frame::set_pc(address   newpc ) {
+void frame::set_pc(Thread* thread, address newpc) {
 #ifdef ASSERT
   if (_cb != NULL && _cb->is_nmethod()) {
     assert(!((nmethod*)_cb)->is_deopt_pc(_pc), "invariant violation");
   }
 #endif // ASSERT
 
+  if (SharedRuntime::is_memento_stack_trace_return_handler(newpc)) {
+    newpc = thread->memento_original_return_address();
+    assert(newpc != NULL, "memento original return address must be set if patched");
+  }
+
   // Unsafe to use the is_deoptimzed tester after changing pc
   _deopt_state = unknown;
   _pc = newpc;
diff --git a/src/share/vm/runtime/frame.hpp b/src/share/vm/runtime/frame.hpp
--- a/src/share/vm/runtime/frame.hpp
+++ b/src/share/vm/runtime/frame.hpp
@@ -102,9 +102,9 @@
   // that happens for deoptimized frames. In addition it makes the value the
   // hardware would want to see in the native frame. The only user (at this point)
   // is deoptimization. It likely no one else should ever use it.
-  address raw_pc() const;
+  address raw_pc(Thread* thread) const;
 
-  void set_pc( address   newpc );
+  void set_pc(Thread* thread, address newpc);
 
   intptr_t* sp() const           { return _sp; }
   void set_sp( intptr_t* newsp ) { _sp = newsp; }
@@ -115,6 +115,10 @@
   // patching operations
   void   patch_pc(Thread* thread, address pc);
 
+  address* raw_sender_pc_addr();
+  void memento_mark(Thread* thread);
+  bool is_memento_marked(Thread* thread);
+
   // Every frame needs to return a unique id which distinguishes it from all other frames.
   // For sparc and ia32 use sp. ia64 can have memory frames that are empty so multiple frames
   // will have identical sp values. For ia64 the bsp (fp) value will serve. No real frame
diff --git a/src/share/vm/runtime/globals.hpp b/src/share/vm/runtime/globals.hpp
--- a/src/share/vm/runtime/globals.hpp
+++ b/src/share/vm/runtime/globals.hpp
@@ -3943,7 +3943,44 @@
           "Enable event-based tracing")                                     \
                                                                             \
   product(bool, UseLockedTracing, false,                                    \
-          "Use locked-tracing when doing event-based tracing")
+          "Use locked-tracing when doing event-based tracing")              \
+                                                                            \
+  product(bool, EnableEventTracing, false,                                  \
+          "Enable event tracing")                                           \
+                                                                            \
+  product(bool, EnableEventTracingParkEvents, true,                         \
+          "Enable tracing of park events")                                  \
+                                                                            \
+  product(bool, EnableEventTracingDiagnostics, false,                       \
+          "Enable extra diagnostic counters and timers for event tracing")  \
+                                                                            \
+  product(ccstr, EventTracingConfiguration, NULL,                           \
+          "Configuration string for event tracing")                         \
+                                                                            \
+  product(bool, EnableEventTracingBufferReuse, false,                       \
+          "Recycle allocated event tracing buffers")                        \
+                                                                            \
+  product(uintx, EventTracingPreallocatedBuffers, 0,                        \
+          "Number of preallocated event tracing buffers")                   \
+                                                                            \
+  product(uintx, EventTracingBufferCapacity, 16384,                         \
+          "Capacity (in bytes) of event tracing buffers")                   \
+                                                                            \
+  product(bool, EnableEventTracingRandomizedBufferCapacity, true,           \
+          "Enable randomization of event tracing buffer sizes (+/- 50%)")   \
+                                                                            \
+  product(uintx, EventTracingStackDepthLimit, 128,                          \
+          "Maximum number of raw (no inlining) stack frames to walk")       \
+                                                                            \
+  product(intx, EventTracingStackMementoFrame, 1,                           \
+          "Patched raw frame to detect stack changes (top = 0, off = -1)")  \
+                                                                            \
+  product(bool, EnableEventTracingStackTraces, true,                        \
+          "Enable stack traces for trace events")                           \
+                                                                            \
+  product(bool, EventTracingStrictMonitorEventOrder, false,                 \
+          "Ensure order of monitor events at the cost of extra events")
+
 
 /*
  *  Macros for factoring of globals
diff --git a/src/share/vm/runtime/java.cpp b/src/share/vm/runtime/java.cpp
--- a/src/share/vm/runtime/java.cpp
+++ b/src/share/vm/runtime/java.cpp
@@ -59,6 +59,8 @@
 #include "runtime/vm_operations.hpp"
 #include "services/memTracker.hpp"
 #include "trace/tracing.hpp"
+#include "evtrace/traceEvents.hpp"
+#include "evtrace/traceManager.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/globalDefinitions.hpp"
 #include "utilities/histogram.hpp"
@@ -486,6 +488,13 @@
     os::infinite_sleep();
   }
 
+  if (EnableEventTracing)
+  {
+    HandleMark hm;
+    EXCEPTION_MARK;
+    TraceManager::finish_and_destroy(CHECK);
+  }
+
   // Terminate watcher thread - must before disenrolling any periodic task
   if (PeriodicTask::num_tasks() > 0)
     WatcherThread::stop();
diff --git a/src/share/vm/runtime/objectMonitor.cpp b/src/share/vm/runtime/objectMonitor.cpp
--- a/src/share/vm/runtime/objectMonitor.cpp
+++ b/src/share/vm/runtime/objectMonitor.cpp
@@ -39,6 +39,7 @@
 #include "services/threadService.hpp"
 #include "trace/tracing.hpp"
 #include "trace/traceMacros.hpp"
+#include "evtrace/traceEvents.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/preserveException.hpp"
@@ -314,7 +315,7 @@
   }
 }
 
-void ATTR ObjectMonitor::enter(TRAPS) {
+void ATTR ObjectMonitor::enter(int after_wait, TRAPS) {
   // The following code is ordered to check the most common cases first
   // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors.
   Thread * const Self = THREAD ;
@@ -377,9 +378,14 @@
 
   EventJavaMonitorEnter event;
 
+  int trace_flags = 0;
   { // Change java thread status to indicate blocked on monitor enter.
     JavaThreadBlockedOnMonitorEnterState jtbmes(jt, this);
 
+    if (EnableEventTracing) {
+      TraceEvents::write_monitor_contended_enter(this, (TraceTypes::monitor_enter_wait) after_wait);
+    }
+
     DTRACE_MONITOR_PROBE(contended__enter, this, object(), jt);
     if (JvmtiExport::should_post_monitor_contended_enter()) {
       JvmtiExport::post_monitor_contended_enter(jt, this);
@@ -402,7 +408,7 @@
       // cleared by handle_special_suspend_equivalent_condition()
       // or java_suspend_self()
 
-      EnterI (THREAD) ;
+      trace_flags |= EnterI (THREAD) ;
 
       if (!ExitSuspendEquivalent(jt)) break ;
 
@@ -452,6 +458,10 @@
   // yet to acquire the lock.  While spinning that thread could
   // spinning we could increment JVMStat counters, etc.
 
+  if (EnableEventTracing) {
+    TraceEvents::write_monitor_contended_entered(this, (TraceTypes::monitor_entered_flags) trace_flags);
+  }
+
   DTRACE_MONITOR_PROBE(contended__entered, this, object(), jt);
   if (JvmtiExport::should_post_monitor_contended_entered()) {
     JvmtiExport::post_monitor_contended_entered(jt, this);
@@ -498,7 +508,9 @@
    }
 }
 
-void ATTR ObjectMonitor::EnterI (TRAPS) {
+int ATTR ObjectMonitor::EnterI (TRAPS) {
+    int trace_flags = 0;
+
     Thread * Self = THREAD ;
     assert (Self->is_Java_thread(), "invariant") ;
     assert (((JavaThread *) Self)->thread_state() == _thread_blocked   , "invariant") ;
@@ -508,7 +520,7 @@
         assert (_succ != Self              , "invariant") ;
         assert (_owner == Self             , "invariant") ;
         assert (_Responsible != Self       , "invariant") ;
-        return ;
+        return trace_flags;
     }
 
     DeferredInitialize () ;
@@ -524,7 +536,7 @@
         assert (_owner == Self        , "invariant") ;
         assert (_succ != Self         , "invariant") ;
         assert (_Responsible != Self  , "invariant") ;
-        return ;
+        return trace_flags;
     }
 
     // The Spin failed -- Enqueue and park the thread ...
@@ -562,10 +574,12 @@
             assert (_succ != Self         , "invariant") ;
             assert (_owner == Self        , "invariant") ;
             assert (_Responsible != Self  , "invariant") ;
-            return ;
+            return trace_flags;
         }
     }
 
+    trace_flags |= TraceTypes::entered_queued;
+
     // Check for cxq|EntryList edge transition to non-null.  This indicates
     // the onset of contention.  While contention persists exiting threads
     // will use a ST:MEMBAR:LD 1-1 exit protocol.  When contention abates exit
@@ -631,6 +645,8 @@
             Self->_ParkEvent->park() ;
         }
 
+        trace_flags |= TraceTypes::entered_parked;
+
         if (TryLock(Self) > 0) break ;
 
         // The lock is still contested.
@@ -736,7 +752,7 @@
     if (SyncFlags & 8) {
        OrderAccess::fence() ;
     }
-    return ;
+    return trace_flags;
 }
 
 // ReenterI() is a specialized inline form of the latter half of the
@@ -952,7 +968,7 @@
 // Both impinge on OS scalability.  Given that, at most one thread parked on
 // a monitor will use a timer.
 
-void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) {
+void ATTR ObjectMonitor::exit(intptr_t *exit_stack_id_for_wait, bool not_suspended, TRAPS) {
    Thread * Self = THREAD ;
    if (THREAD != _owner) {
      if (THREAD->is_lock_owned((address) _owner)) {
@@ -997,191 +1013,109 @@
    }
 #endif
 
+   TraceEventMonitorContendedExited event(this);
+   if (exit_stack_id_for_wait != NULL) {
+     // This is a temporary exit for Object.wait().
+     // We don't want to use the current stack trace as the lock site, so if we
+     // end up writing the event, we allocate a stack id that we resolve later
+     // when the monitor is really exited. When there are multiple waits, we
+     // reuse the first preallocated stack id.
+     event.set_use_or_preallocate_stack_id_at((TraceTypes::stack_id *) exit_stack_id_for_wait);
+     event.set_resolve_stack(false);
+   } else {
+     // true exit
+     event.set_resolve_stack(true);
+     if (_trace_exit_stack != 0) {
+       event.set_use_stack_id(_trace_exit_stack);
+       event.enable(); // always write the exit event to resolve the stack
+     }
+   }
+   if ((intptr_t(_EntryList) | intptr_t(_cxq)) != 0) {
+      // there are queued threads -- we are definitely writing a trace event
+      event.enable();
+   }
+
+   _trace_exit_stack = 0;
+
    for (;;) {
       assert (THREAD == _owner, "invariant") ;
 
+      //
+      // NOTE: we have removed all code paths for ExitPolicy != 0 and QMode != 0
+      //       knob values for simplicity of event tracing.
+      //
 
-      if (Knob_ExitPolicy == 0) {
-         // release semantics: prior loads and stores from within the critical section
-         // must not float (reorder) past the following store that drops the lock.
-         // On SPARC that requires MEMBAR #loadstore|#storestore.
-         // But of course in TSO #loadstore|#storestore is not required.
-         // I'd like to write one of the following:
-         // A.  OrderAccess::release() ; _owner = NULL
-         // B.  OrderAccess::loadstore(); OrderAccess::storestore(); _owner = NULL;
-         // Unfortunately OrderAccess::release() and OrderAccess::loadstore() both
-         // store into a _dummy variable.  That store is not needed, but can result
-         // in massive wasteful coherency traffic on classic SMP systems.
-         // Instead, I use release_store(), which is implemented as just a simple
-         // ST on x64, x86 and SPARC.
-         OrderAccess::release_store_ptr (&_owner, NULL) ;   // drop the lock
-         OrderAccess::storeload() ;                         // See if we need to wake a successor
-         if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != NULL) {
-            TEVENT (Inflated exit - simple egress) ;
-            return ;
-         }
-         TEVENT (Inflated exit - complex egress) ;
+      // release semantics: prior loads and stores from within the critical section
+      // must not float (reorder) past the following store that drops the lock.
+      // On SPARC that requires MEMBAR #loadstore|#storestore.
+      // But of course in TSO #loadstore|#storestore is not required.
+      // I'd like to write one of the following:
+      // A.  OrderAccess::release() ; _owner = NULL
+      // B.  OrderAccess::loadstore(); OrderAccess::storestore(); _owner = NULL;
+      // Unfortunately OrderAccess::release() and OrderAccess::loadstore() both
+      // store into a _dummy variable.  That store is not needed, but can result
+      // in massive wasteful coherency traffic on classic SMP systems.
+      // Instead, I use release_store(), which is implemented as just a simple
+      // ST on x64, x86 and SPARC.
+      OrderAccess::release_store_ptr (&_owner, NULL) ;   // drop the lock
+      OrderAccess::storeload() ;                         // See if we need to wake a successor
+      bool queues_empty = ((intptr_t(_EntryList) | intptr_t(_cxq)) == 0);
+      bool have_succ = (_succ != NULL);
+      if (!queues_empty) {
+    	  // some thread might have entered itself on _cxq in the meantime
+    	  event.enable();
+      }
+      if (queues_empty || have_succ) {
+          TEVENT (Inflated exit - simple egress) ;
+          return ;
+      }
+      TEVENT (Inflated exit - complex egress) ;
 
-         // Normally the exiting thread is responsible for ensuring succession,
-         // but if other successors are ready or other entering threads are spinning
-         // then this thread can simply store NULL into _owner and exit without
-         // waking a successor.  The existence of spinners or ready successors
-         // guarantees proper succession (liveness).  Responsibility passes to the
-         // ready or running successors.  The exiting thread delegates the duty.
-         // More precisely, if a successor already exists this thread is absolved
-         // of the responsibility of waking (unparking) one.
-         //
-         // The _succ variable is critical to reducing futile wakeup frequency.
-         // _succ identifies the "heir presumptive" thread that has been made
-         // ready (unparked) but that has not yet run.  We need only one such
-         // successor thread to guarantee progress.
-         // See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf
-         // section 3.3 "Futile Wakeup Throttling" for details.
-         //
-         // Note that spinners in Enter() also set _succ non-null.
-         // In the current implementation spinners opportunistically set
-         // _succ so that exiting threads might avoid waking a successor.
-         // Another less appealing alternative would be for the exiting thread
-         // to drop the lock and then spin briefly to see if a spinner managed
-         // to acquire the lock.  If so, the exiting thread could exit
-         // immediately without waking a successor, otherwise the exiting
-         // thread would need to dequeue and wake a successor.
-         // (Note that we'd need to make the post-drop spin short, but no
-         // shorter than the worst-case round-trip cache-line migration time.
-         // The dropped lock needs to become visible to the spinner, and then
-         // the acquisition of the lock by the spinner must become visible to
-         // the exiting thread).
-         //
+      // Normally the exiting thread is responsible for ensuring succession,
+      // but if other successors are ready or other entering threads are spinning
+      // then this thread can simply store NULL into _owner and exit without
+      // waking a successor.  The existence of spinners or ready successors
+      // guarantees proper succession (liveness).  Responsibility passes to the
+      // ready or running successors.  The exiting thread delegates the duty.
+      // More precisely, if a successor already exists this thread is absolved
+      // of the responsibility of waking (unparking) one.
+      //
+      // The _succ variable is critical to reducing futile wakeup frequency.
+      // _succ identifies the "heir presumptive" thread that has been made
+      // ready (unparked) but that has not yet run.  We need only one such
+      // successor thread to guarantee progress.
+      // See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf
+      // section 3.3 "Futile Wakeup Throttling" for details.
+      //
+      // Note that spinners in Enter() also set _succ non-null.
+      // In the current implementation spinners opportunistically set
+      // _succ so that exiting threads might avoid waking a successor.
+      // Another less appealing alternative would be for the exiting thread
+      // to drop the lock and then spin briefly to see if a spinner managed
+      // to acquire the lock.  If so, the exiting thread could exit
+      // immediately without waking a successor, otherwise the exiting
+      // thread would need to dequeue and wake a successor.
+      // (Note that we'd need to make the post-drop spin short, but no
+      // shorter than the worst-case round-trip cache-line migration time.
+      // The dropped lock needs to become visible to the spinner, and then
+      // the acquisition of the lock by the spinner must become visible to
+      // the exiting thread).
+      //
 
-         // It appears that an heir-presumptive (successor) must be made ready.
-         // Only the current lock owner can manipulate the EntryList or
-         // drain _cxq, so we need to reacquire the lock.  If we fail
-         // to reacquire the lock the responsibility for ensuring succession
-         // falls to the new owner.
-         //
-         if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
-            return ;
-         }
-         TEVENT (Exit - Reacquired) ;
-      } else {
-         if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != NULL) {
-            OrderAccess::release_store_ptr (&_owner, NULL) ;   // drop the lock
-            OrderAccess::storeload() ;
-            // Ratify the previously observed values.
-            if (_cxq == NULL || _succ != NULL) {
-                TEVENT (Inflated exit - simple egress) ;
-                return ;
-            }
-
-            // inopportune interleaving -- the exiting thread (this thread)
-            // in the fast-exit path raced an entering thread in the slow-enter
-            // path.
-            // We have two choices:
-            // A.  Try to reacquire the lock.
-            //     If the CAS() fails return immediately, otherwise
-            //     we either restart/rerun the exit operation, or simply
-            //     fall-through into the code below which wakes a successor.
-            // B.  If the elements forming the EntryList|cxq are TSM
-            //     we could simply unpark() the lead thread and return
-            //     without having set _succ.
-            if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
-               TEVENT (Inflated exit - reacquired succeeded) ;
-               return ;
-            }
-            TEVENT (Inflated exit - reacquired failed) ;
-         } else {
-            TEVENT (Inflated exit - complex egress) ;
-         }
+      // It appears that an heir-presumptive (successor) must be made ready.
+      // Only the current lock owner can manipulate the EntryList or
+      // drain _cxq, so we need to reacquire the lock.  If we fail
+      // to reacquire the lock the responsibility for ensuring succession
+      // falls to the new owner.
+      //
+      if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
+          return ;
       }
+      TEVENT (Exit - Reacquired) ;
 
       guarantee (_owner == THREAD, "invariant") ;
 
       ObjectWaiter * w = NULL ;
-      int QMode = Knob_QMode ;
-
-      if (QMode == 2 && _cxq != NULL) {
-          // QMode == 2 : cxq has precedence over EntryList.
-          // Try to directly wake a successor from the cxq.
-          // If successful, the successor will need to unlink itself from cxq.
-          w = _cxq ;
-          assert (w != NULL, "invariant") ;
-          assert (w->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
-          ExitEpilog (Self, w) ;
-          return ;
-      }
-
-      if (QMode == 3 && _cxq != NULL) {
-          // Aggressively drain cxq into EntryList at the first opportunity.
-          // This policy ensure that recently-run threads live at the head of EntryList.
-          // Drain _cxq into EntryList - bulk transfer.
-          // First, detach _cxq.
-          // The following loop is tantamount to: w = swap (&cxq, NULL)
-          w = _cxq ;
-          for (;;) {
-             assert (w != NULL, "Invariant") ;
-             ObjectWaiter * u = (ObjectWaiter *) Atomic::cmpxchg_ptr (NULL, &_cxq, w) ;
-             if (u == w) break ;
-             w = u ;
-          }
-          assert (w != NULL              , "invariant") ;
-
-          ObjectWaiter * q = NULL ;
-          ObjectWaiter * p ;
-          for (p = w ; p != NULL ; p = p->_next) {
-              guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
-              p->TState = ObjectWaiter::TS_ENTER ;
-              p->_prev = q ;
-              q = p ;
-          }
-
-          // Append the RATs to the EntryList
-          // TODO: organize EntryList as a CDLL so we can locate the tail in constant-time.
-          ObjectWaiter * Tail ;
-          for (Tail = _EntryList ; Tail != NULL && Tail->_next != NULL ; Tail = Tail->_next) ;
-          if (Tail == NULL) {
-              _EntryList = w ;
-          } else {
-              Tail->_next = w ;
-              w->_prev = Tail ;
-          }
-
-          // Fall thru into code that tries to wake a successor from EntryList
-      }
-
-      if (QMode == 4 && _cxq != NULL) {
-          // Aggressively drain cxq into EntryList at the first opportunity.
-          // This policy ensure that recently-run threads live at the head of EntryList.
-
-          // Drain _cxq into EntryList - bulk transfer.
-          // First, detach _cxq.
-          // The following loop is tantamount to: w = swap (&cxq, NULL)
-          w = _cxq ;
-          for (;;) {
-             assert (w != NULL, "Invariant") ;
-             ObjectWaiter * u = (ObjectWaiter *) Atomic::cmpxchg_ptr (NULL, &_cxq, w) ;
-             if (u == w) break ;
-             w = u ;
-          }
-          assert (w != NULL              , "invariant") ;
-
-          ObjectWaiter * q = NULL ;
-          ObjectWaiter * p ;
-          for (p = w ; p != NULL ; p = p->_next) {
-              guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
-              p->TState = ObjectWaiter::TS_ENTER ;
-              p->_prev = q ;
-              q = p ;
-          }
-
-          // Prepend the RATs to the EntryList
-          if (_EntryList != NULL) {
-              q->_next = _EntryList ;
-              _EntryList->_prev = q ;
-          }
-          _EntryList = w ;
-
-          // Fall thru into code that tries to wake a successor from EntryList
-      }
 
       w = _EntryList  ;
       if (w != NULL) {
@@ -1229,34 +1163,14 @@
       // TODO-FIXME: consider changing EntryList from a DLL to a CDLL so
       // we have faster access to the tail.
 
-      if (QMode == 1) {
-         // QMode == 1 : drain cxq to EntryList, reversing order
-         // We also reverse the order of the list.
-         ObjectWaiter * s = NULL ;
-         ObjectWaiter * t = w ;
-         ObjectWaiter * u = NULL ;
-         while (t != NULL) {
-             guarantee (t->TState == ObjectWaiter::TS_CXQ, "invariant") ;
-             t->TState = ObjectWaiter::TS_ENTER ;
-             u = t->_next ;
-             t->_prev = u ;
-             t->_next = s ;
-             s = t;
-             t = u ;
-         }
-         _EntryList  = s ;
-         assert (s != NULL, "invariant") ;
-      } else {
-         // QMode == 0 or QMode == 2
-         _EntryList = w ;
-         ObjectWaiter * q = NULL ;
-         ObjectWaiter * p ;
-         for (p = w ; p != NULL ; p = p->_next) {
-             guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
-             p->TState = ObjectWaiter::TS_ENTER ;
-             p->_prev = q ;
-             q = p ;
-         }
+      _EntryList = w ;
+      ObjectWaiter * q = NULL ;
+      ObjectWaiter * p ;
+      for (p = w ; p != NULL ; p = p->_next) {
+          guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
+          p->TState = ObjectWaiter::TS_ENTER ;
+          p->_prev = q ;
+          q = p ;
       }
 
       // In 1-0 mode we need: ST EntryList; MEMBAR #storestore; ST _owner = NULL
@@ -1367,7 +1281,7 @@
 // The _owner field is not always the Thread addr even with an
 // inflated monitor, e.g. the monitor can be inflated by a non-owning
 // thread due to contention.
-intptr_t ObjectMonitor::complete_exit(TRAPS) {
+void ObjectMonitor::complete_exit(intptr_t *saved_recursions, intptr_t *saved_trace_exit_stack, TRAPS) {
    Thread * const Self = THREAD;
    assert(Self->is_Java_thread(), "Must be Java thread!");
    JavaThread *jt = (JavaThread *)THREAD;
@@ -1384,16 +1298,17 @@
    }
 
    guarantee(Self == _owner, "complete_exit not owner");
-   intptr_t save = _recursions; // record the old recursion count
-   _recursions = 0;        // set the recursion level to be 0
-   exit (true, Self) ;           // exit the monitor
+   // record old recursion level and exit stack
+   if (saved_recursions != NULL) *saved_recursions = _recursions;
+   if (saved_recursions != NULL) *saved_trace_exit_stack = _trace_exit_stack;
+   _recursions = 0;
+   exit(saved_trace_exit_stack, true, Self);
    guarantee (_owner != Self, "invariant");
-   return save;
 }
 
 // reenter() enters a lock and sets recursion count
 // complete_exit/reenter operate as a wait without waiting
-void ObjectMonitor::reenter(intptr_t recursions, TRAPS) {
+void ObjectMonitor::reenter(intptr_t saved_recursions, intptr_t saved_trace_exit_stack, TRAPS) {
    Thread * const Self = THREAD;
    assert(Self->is_Java_thread(), "Must be Java thread!");
    JavaThread *jt = (JavaThread *)THREAD;
@@ -1401,8 +1316,8 @@
    guarantee(_owner != Self, "reenter already owner");
    enter (THREAD);       // enter the monitor
    guarantee (_recursions == 0, "reenter recursion");
-   _recursions = recursions;
-   return;
+   _recursions = saved_recursions;
+   _trace_exit_stack = saved_trace_exit_stack;
 }
 
 
@@ -1523,10 +1438,11 @@
    if ((SyncFlags & 4) == 0) {
       _Responsible = NULL ;
    }
-   intptr_t save = _recursions; // record the old recursion count
+   intptr_t saved_recursions = _recursions; // record the old recursion count
+   intptr_t saved_trace_exit_stack = _trace_exit_stack;
    _waiters++;                  // increment the number of waiters
    _recursions = 0;             // set the recursion level to be 1
-   exit (true, Self) ;                    // exit the monitor
+   exit(&saved_trace_exit_stack, true, Self); // exit, knows how to handle exit stack
    guarantee (_owner != Self, "invariant") ;
 
    // The thread is on the WaitSet list - now park() it.
@@ -1643,7 +1559,13 @@
      assert (_owner != Self, "invariant") ;
      ObjectWaiter::TStates v = node.TState ;
      if (v == ObjectWaiter::TS_RUN) {
-         enter (Self) ;
+         int after_wait = TraceTypes::enter_after_wait_other;
+         if (node._notified) {
+           after_wait = TraceTypes::enter_after_wait_notify;
+         } else if (ret == OS_TIMEOUT) {
+           after_wait = TraceTypes::enter_after_wait_timeout;
+         }
+         enter (after_wait, Self) ;
      } else {
          guarantee (v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant") ;
          ReenterI (Self, &node) ;
@@ -1662,7 +1584,9 @@
    jt->set_current_waiting_monitor(NULL);
 
    guarantee (_recursions == 0, "invariant") ;
-   _recursions = save;     // restore the old recursion count
+   // restore the saved recursion count and exit stack
+   _recursions = saved_recursions;
+   _trace_exit_stack = saved_trace_exit_stack;
    _waiters--;             // decrement the number of waiters
 
    // Verify a few postconditions
@@ -2527,6 +2451,9 @@
   SETKNOB(FastHSSEC) ;
   #undef SETKNOB
 
+  guarantee(Knob_ExitPolicy == 0, "Sorry, event tracing does not support non-default ExitPolicy");
+  guarantee(Knob_QMode == 0,      "Sorry, event tracing does not support non-default QMode");
+
   if (os::is_MP()) {
      BackOffMask = (1 << Knob_SpinBackOff) - 1 ;
      if (Knob_ReportSettings) ::printf ("BackOffMask=%X\n", BackOffMask) ;
diff --git a/src/share/vm/runtime/objectMonitor.hpp b/src/share/vm/runtime/objectMonitor.hpp
--- a/src/share/vm/runtime/objectMonitor.hpp
+++ b/src/share/vm/runtime/objectMonitor.hpp
@@ -99,6 +99,7 @@
   static int WaitSet_offset_in_bytes()     { return offset_of(ObjectMonitor, _WaitSet) ;   }
   static int Responsible_offset_in_bytes() { return offset_of(ObjectMonitor, _Responsible);}
   static int Spinner_offset_in_bytes()     { return offset_of(ObjectMonitor, _Spinner);    }
+  static int trace_exit_stack_offset_in_bytes() { return offset_of(ObjectMonitor, _trace_exit_stack); }
 
  public:
   // Eventaully we'll make provisions for multiple callbacks, but
@@ -130,6 +131,8 @@
   intptr_t  contentions() const ;
   intptr_t  recursions() const                                         { return _recursions; }
 
+  intptr_t  next_trace_seq()  { return Atomic::add_ptr(1, &_trace_current_seq); }
+
   // JVM/DI GetMonitorInfo() needs this
   ObjectWaiter* first_waiter()                                         { return _WaitSet; }
   ObjectWaiter* next_waiter(ObjectWaiter* o)                           { return o->_next; }
@@ -155,6 +158,8 @@
     _SpinClock    = 0 ;
     OwnerIsThread = 0 ;
     _previous_owner_tid = 0;
+    _trace_current_seq = 0;
+    _trace_exit_stack = 0;
   }
 
   ~ObjectMonitor() {
@@ -178,6 +183,8 @@
     _SpinFreq      = 0 ;
     _SpinClock     = 0 ;
     OwnerIsThread  = 0 ;
+    // tracing: do not reset _trace_current_seq, we also use it
+    // to detect pending events before a monitor is recycled
   }
 
 public:
@@ -202,8 +209,8 @@
   void      notifyAll(TRAPS);
 
 // Use the following at your own risk
-  intptr_t  complete_exit(TRAPS);
-  void      reenter(intptr_t recursions, TRAPS);
+  void      complete_exit(intptr_t *saved_recursions, intptr_t *saved_trace_exit_stack, TRAPS);
+  void      reenter(intptr_t saved_recursions, intptr_t saved_trace_exit_stack, TRAPS);
 
  private:
   void      AddWaiter (ObjectWaiter * waiter) ;
@@ -211,7 +218,9 @@
 
   ObjectWaiter * DequeueWaiter () ;
   void      DequeueSpecificWaiter (ObjectWaiter * waiter) ;
-  void      EnterI (TRAPS) ;
+  void      enter (int after_wait, TRAPS);
+  void      exit(intptr_t *exit_stack_id_for_wait, bool not_suspended, TRAPS);
+  int       EnterI (TRAPS) ;
   void      ReenterI (Thread * Self, ObjectWaiter * SelfNode) ;
   void      UnlinkAfterAcquire (Thread * Self, ObjectWaiter * SelfNode) ;
   int       TryLock (Thread * Self) ;
@@ -275,6 +284,11 @@
   volatile intptr_t  _count;        // reference count to prevent reclaimation/deflation
                                     // at stop-the-world time.  See deflate_idle_monitors().
                                     // _count is approximately |_WaitSet| + |_EntryList|
+
+  // TODO: this class appears to be sensitive to false sharing.
+  //       there might be a better location to place this field.
+  volatile intptr_t  _trace_current_seq;
+  volatile intptr_t  _trace_exit_stack;
  protected:
   volatile intptr_t  _waiters;      // number of waiting threads
  private:
diff --git a/src/share/vm/runtime/objectMonitor.inline.hpp b/src/share/vm/runtime/objectMonitor.inline.hpp
--- a/src/share/vm/runtime/objectMonitor.inline.hpp
+++ b/src/share/vm/runtime/objectMonitor.inline.hpp
@@ -109,5 +109,12 @@
   _recursions = 0;
 }
 
+inline void ObjectMonitor::enter(TRAPS) {
+  enter(0, THREAD);
+}
+
+inline void ObjectMonitor::exit(bool not_suspended, TRAPS) {
+  exit(NULL, 0, THREAD);
+}
 
 #endif // SHARE_VM_RUNTIME_OBJECTMONITOR_INLINE_HPP
diff --git a/src/share/vm/runtime/os.hpp b/src/share/vm/runtime/os.hpp
--- a/src/share/vm/runtime/os.hpp
+++ b/src/share/vm/runtime/os.hpp
@@ -476,7 +476,7 @@
   static int pd_self_suspend_thread(Thread* thread);
 
   static ExtendedPC fetch_frame_from_context(void* ucVoid, intptr_t** sp, intptr_t** fp);
-  static frame      fetch_frame_from_context(void* ucVoid);
+  static frame      fetch_frame_from_context(Thread* thread, void* ucVoid);
 
   static ExtendedPC get_thread_pc(Thread *thread);
   static void breakpoint();
@@ -617,7 +617,7 @@
   // only walk stack if %ebp is used as frame pointer; on ia64, it's not
   // possible to walk C stack without having the unwind table.
   static bool is_first_C_frame(frame *fr);
-  static frame get_sender_for_C_frame(frame *fr);
+  static frame get_sender_for_C_frame(Thread* thread, frame *fr);
 
   // return current frame. pc() and sp() are set to NULL on failure.
   static frame      current_frame();
diff --git a/src/share/vm/runtime/park.cpp b/src/share/vm/runtime/park.cpp
--- a/src/share/vm/runtime/park.cpp
+++ b/src/share/vm/runtime/park.cpp
@@ -148,6 +148,7 @@
   }
   p->AssociatedWith = t ;          // Associate p with t
   p->FreeNext       = NULL ;
+  p->_counter       = 0;
   return p ;
 }
 
@@ -156,7 +157,14 @@
   if (p == NULL) return ;
   guarantee (p->AssociatedWith != NULL, "invariant") ;
   guarantee (p->FreeNext == NULL      , "invariant") ;
-  p->AssociatedWith = NULL ;
+
+  int status = pthread_mutex_lock(p->_mutex);
+  assert(status == 0, "invariant");
+  {
+    p->AssociatedWith = NULL;
+  }
+  pthread_mutex_unlock(p->_mutex);
+  assert(status == 0, "invariant");
 
   Thread::SpinAcquire(&ListLock, "ParkerFreeListRelease");
   {
diff --git a/src/share/vm/runtime/park.hpp b/src/share/vm/runtime/park.hpp
--- a/src/share/vm/runtime/park.hpp
+++ b/src/share/vm/runtime/park.hpp
@@ -47,7 +47,7 @@
 
 class Parker : public os::PlatformParker {
 private:
-  volatile int _counter ;
+  volatile intptr_t _counter ;
   Parker * FreeNext ;
   JavaThread * AssociatedWith ; // Current association
 
@@ -63,7 +63,7 @@
   // For simplicity of interface with Java, all forms of park (indefinite,
   // relative, and absolute) are multiplexed into one call.
   void park(bool isAbsolute, jlong time);
-  void unpark();
+  void unpark(JavaThread *thread);
 
   // Lifecycle operators
   static Parker * Allocate (JavaThread * t) ;
diff --git a/src/share/vm/runtime/safepoint.cpp b/src/share/vm/runtime/safepoint.cpp
--- a/src/share/vm/runtime/safepoint.cpp
+++ b/src/share/vm/runtime/safepoint.cpp
@@ -53,6 +53,7 @@
 #include "services/runtimeService.hpp"
 #include "utilities/events.hpp"
 #include "utilities/macros.hpp"
+#include "evtrace/traceManager.hpp"
 #ifdef TARGET_ARCH_x86
 # include "nativeInst_x86.hpp"
 # include "vmreg_x86.inline.hpp"
@@ -409,6 +410,11 @@
 
   assert(Threads_lock->owned_by_self(), "must hold Threads_lock");
   assert((_safepoint_counter & 0x1) == 1, "must be odd");
+
+  if (EnableEventTracing) {
+    TraceManager::do_work_before_safepoint_end();
+  }
+
   _safepoint_counter ++;
   // memory fence isn't required here since an odd _safepoint_counter
   // value can do no harm and a fence is issued below anyway.
diff --git a/src/share/vm/runtime/sharedRuntime.cpp b/src/share/vm/runtime/sharedRuntime.cpp
--- a/src/share/vm/runtime/sharedRuntime.cpp
+++ b/src/share/vm/runtime/sharedRuntime.cpp
@@ -91,6 +91,8 @@
 RuntimeStub*        SharedRuntime::_resolve_opt_virtual_call_blob;
 RuntimeStub*        SharedRuntime::_resolve_virtual_call_blob;
 RuntimeStub*        SharedRuntime::_resolve_static_call_blob;
+address             SharedRuntime::_memento_stack_trace_return_handler;
+address             SharedRuntime::_memento_stack_trace_exception_handler;
 
 DeoptimizationBlob* SharedRuntime::_deopt_blob;
 SafepointBlob*      SharedRuntime::_polling_page_vectors_safepoint_handler_blob;
@@ -111,6 +113,8 @@
   _resolve_virtual_call_blob           = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_virtual_call_C),       "resolve_virtual_call");
   _resolve_static_call_blob            = generate_resolve_blob(CAST_FROM_FN_PTR(address, SharedRuntime::resolve_static_call_C),        "resolve_static_call");
 
+  generate_memento_stack_trace_blob();
+
 #ifdef COMPILER2
   // Vectors are generated only by C2.
   if (is_wide_vector(MaxVectorSize)) {
@@ -519,6 +523,10 @@
     return Interpreter::rethrow_exception_entry();
   }
 
+  if (SharedRuntime::is_memento_stack_trace_return_handler(return_address)) {
+    return SharedRuntime::get_memento_stack_trace_exception_handler();
+  }
+
   guarantee(blob == NULL || !blob->is_runtime_stub(), "caller should have skipped stub");
   guarantee(!VtableStubs::contains(return_address), "NULL exceptions in vtables should have been handled already!");
 
diff --git a/src/share/vm/runtime/sharedRuntime.hpp b/src/share/vm/runtime/sharedRuntime.hpp
--- a/src/share/vm/runtime/sharedRuntime.hpp
+++ b/src/share/vm/runtime/sharedRuntime.hpp
@@ -61,6 +61,8 @@
   static RuntimeStub*        _resolve_opt_virtual_call_blob;
   static RuntimeStub*        _resolve_virtual_call_blob;
   static RuntimeStub*        _resolve_static_call_blob;
+  static address             _memento_stack_trace_return_handler;
+  static address             _memento_stack_trace_exception_handler;
 
   static DeoptimizationBlob* _deopt_blob;
 
@@ -81,6 +83,7 @@
   enum { POLL_AT_RETURN,  POLL_AT_LOOP, POLL_AT_VECTOR_LOOP };
   static SafepointBlob* generate_handler_blob(address call_ptr, int poll_type);
   static RuntimeStub*   generate_resolve_blob(address destination, const char* name);
+  static void           generate_memento_stack_trace_blob();
 
  public:
   static void generate_stubs(void);
@@ -230,6 +233,11 @@
     return _resolve_static_call_blob->entry_point();
   }
 
+  static address get_memento_stack_trace_return_handler()            { return _memento_stack_trace_return_handler; }
+  static address get_memento_stack_trace_exception_handler()         { return _memento_stack_trace_exception_handler; }
+  static bool is_memento_stack_trace_return_handler(address addr)    { return addr == _memento_stack_trace_return_handler; }
+  static bool is_memento_stack_trace_exception_handler(address addr) { return addr == _memento_stack_trace_exception_handler; }
+
   static SafepointBlob* polling_page_return_handler_blob()     { return _polling_page_return_handler_blob; }
   static SafepointBlob* polling_page_safepoint_handler_blob()  { return _polling_page_safepoint_handler_blob; }
   static SafepointBlob* polling_page_vectors_safepoint_handler_blob()  { return _polling_page_vectors_safepoint_handler_blob; }
diff --git a/src/share/vm/runtime/synchronizer.cpp b/src/share/vm/runtime/synchronizer.cpp
--- a/src/share/vm/runtime/synchronizer.cpp
+++ b/src/share/vm/runtime/synchronizer.cpp
@@ -40,6 +40,7 @@
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
 #include "utilities/preserveException.hpp"
+#include "evtrace/traceEvents.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "os_linux.inline.hpp"
 #endif
@@ -280,7 +281,7 @@
 //  4) reenter lock1 with original recursion count
 //  5) lock lock2
 // NOTE: must use heavy weight monitor to handle complete_exit/reenter()
-intptr_t ObjectSynchronizer::complete_exit(Handle obj, TRAPS) {
+void ObjectSynchronizer::complete_exit(Handle obj, intptr_t *saved_recursions, intptr_t *saved_trace_exit_stack, TRAPS) {
   TEVENT (complete_exit) ;
   if (UseBiasedLocking) {
     BiasedLocking::revoke_and_rebias(obj, false, THREAD);
@@ -289,11 +290,11 @@
 
   ObjectMonitor* monitor = ObjectSynchronizer::inflate(THREAD, obj());
 
-  return monitor->complete_exit(THREAD);
+  monitor->complete_exit(saved_recursions, saved_trace_exit_stack, THREAD);
 }
 
 // NOTE: must use heavy weight monitor to handle complete_exit/reenter()
-void ObjectSynchronizer::reenter(Handle obj, intptr_t recursion, TRAPS) {
+void ObjectSynchronizer::reenter(Handle obj, intptr_t saved_recursions, intptr_t saved_trace_exit_stack, TRAPS) {
   TEVENT (reenter) ;
   if (UseBiasedLocking) {
     BiasedLocking::revoke_and_rebias(obj, false, THREAD);
@@ -302,7 +303,7 @@
 
   ObjectMonitor* monitor = ObjectSynchronizer::inflate(THREAD, obj());
 
-  monitor->reenter(recursion, THREAD);
+  monitor->reenter(saved_recursions, saved_trace_exit_stack, THREAD);
 }
 // -----------------------------------------------------------------------------
 // JNI locks on java objects
@@ -1310,6 +1311,13 @@
           m->set_object(object);
           // TODO-FIXME: assert BasicLock->dhw != 0.
 
+          // must get a sequence number before the monitor is published below
+          No_Safepoint_Verifier nsv(true, false);
+          intptr_t trace_seq;
+          if (EnableEventTracing) {
+            trace_seq = m->next_trace_seq();
+          }
+
           // Must preserve store ordering. The monitor state must
           // be stable at the time of publishing the monitor address.
           guarantee (object->mark() == markOopDesc::INFLATING(), "invariant") ;
@@ -1327,6 +1335,9 @@
                 object->klass()->external_name());
             }
           }
+          if (EnableEventTracing) {
+            TraceEvents::write_monitor_inflate(m, trace_seq);
+          }
           return m ;
       }
 
@@ -1352,12 +1363,24 @@
       m->_Responsible  = NULL ;
       m->_SpinDuration = ObjectMonitor::Knob_SpinLimit ;       // consider: keep metastats by type/class
 
+      // must get a sequence number before the monitor is published below
+      No_Safepoint_Verifier nsv(true, false);
+      intptr_t trace_seq;
+      if (EnableEventTracing) {
+          trace_seq = m->next_trace_seq();
+      }
+
       if (Atomic::cmpxchg_ptr (markOopDesc::encode(m), object->mark_addr(), mark) != mark) {
           m->set_object (NULL) ;
           m->set_owner  (NULL) ;
           m->OwnerIsThread = 0 ;
           m->Recycle() ;
           omRelease (Self, m, true) ;
+
+          if (EnableEventTracing) { // must still consume our sequence number
+             TraceEvents::write_monitor_dummy(m, trace_seq);
+          }
+
           m = NULL ;
           continue ;
           // interference - the markword changed - just retry.
@@ -1377,6 +1400,9 @@
             object->klass()->external_name());
         }
       }
+      if (EnableEventTracing) {
+        TraceEvents::write_monitor_inflate(m, trace_seq);
+      }
       return m ;
   }
 }
@@ -1444,6 +1470,9 @@
                 (void *) obj, (intptr_t) obj->mark(), obj->klass()->external_name());
        }
      }
+     if (EnableEventTracing) {
+       TraceEvents::write_monitor_deflate(mid);
+     }
 
      // Restore the header back to obj
      obj->release_set_mark(mid->header());
@@ -1605,7 +1634,7 @@
   ReleaseJavaMonitorsClosure(Thread* thread) : THREAD(thread) {}
   void do_monitor(ObjectMonitor* mid) {
     if (mid->owner() == THREAD) {
-      (void)mid->complete_exit(CHECK);
+      mid->complete_exit(NULL, NULL, CHECK);
     }
   }
 };
diff --git a/src/share/vm/runtime/synchronizer.hpp b/src/share/vm/runtime/synchronizer.hpp
--- a/src/share/vm/runtime/synchronizer.hpp
+++ b/src/share/vm/runtime/synchronizer.hpp
@@ -80,8 +80,8 @@
   // used by classloading to free classloader object lock,
   // wait on an internal lock, and reclaim original lock
   // with original recursion count
-  static intptr_t complete_exit  (Handle obj,                TRAPS);
-  static void reenter            (Handle obj, intptr_t recursion, TRAPS);
+  static void complete_exit(Handle obj, intptr_t *saved_recursions, intptr_t *saved_trace_exit_stack, TRAPS);
+  static void reenter      (Handle obj, intptr_t saved_recursions, intptr_t saved_trace_exit_stack, TRAPS);
 
   // thread-specific and global objectMonitor free list accessors
 //  static void verifyInUse (Thread * Self) ; too slow for general assert/debug
@@ -156,10 +156,6 @@
   void wait      (TRAPS)      { ObjectSynchronizer::wait     (_obj, 0, CHECK); } // wait forever
   void notify_all(TRAPS)      { ObjectSynchronizer::notifyall(_obj,    CHECK); }
   void waitUninterruptibly (TRAPS) { ObjectSynchronizer::waitUninterruptibly (_obj, 0, CHECK);}
-  // complete_exit gives up lock completely, returning recursion count
-  // reenter reclaims lock with original recursion count
-  intptr_t complete_exit(TRAPS) { return  ObjectSynchronizer::complete_exit(_obj, CHECK_0); }
-  void reenter(intptr_t recursion, TRAPS) { ObjectSynchronizer::reenter(_obj, recursion, CHECK); }
 };
 
 #endif // SHARE_VM_RUNTIME_SYNCHRONIZER_HPP
diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp
--- a/src/share/vm/runtime/thread.cpp
+++ b/src/share/vm/runtime/thread.cpp
@@ -79,6 +79,7 @@
 #include "services/threadService.hpp"
 #include "trace/tracing.hpp"
 #include "trace/traceMacros.hpp"
+#include "evtrace/traceEvents.hpp"
 #include "utilities/defaultStream.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
@@ -257,6 +258,13 @@
   omFreeProvision = 32 ;
   omInUseList = NULL ;
   omInUseCount = 0 ;
+  _trace_buffer = NULL;
+  debug_only(_trace_active = false;)
+  _park_last_global_seq = 0;
+  _park_priority = 0;
+  _nesting_level = 0;
+  _memento_original_return_address = NULL;
+  _memento_stack_trace = NULL;
 
 #ifdef ASSERT
   _visited_for_critical_count = false;
@@ -391,6 +399,8 @@
     ThreadLocalStorage::invalidate_all();
   }
   CHECK_UNHANDLED_OOPS_ONLY(if (CheckUnhandledOops) delete unhandled_oops();)
+
+  assert (_trace_buffer == NULL, "leaking unsubmitted trace buffer");
 }
 
 // NOTE: dummy function for assertion purpose.
@@ -804,6 +814,9 @@
 void Thread::interrupt(Thread* thread) {
   trace("interrupt", thread);
   debug_only(check_for_dangling_thread_pointer(thread);)
+  if (EnableEventTracing) {
+    TraceEvents::write_thread_interrupt(thread);
+  }
   os::interrupt(thread);
 }
 
@@ -1657,6 +1670,10 @@
   assert(JavaThread::current() == this, "sanity check");
   assert(!Thread::current()->owns_locks(), "sanity check");
 
+  if (EnableEventTracing) {
+    TraceEvents::write_thread_start();
+  }
+
   DTRACE_THREAD_PROBE(start, this);
 
   // This operation might block. We call that after all safepoint checks for a new thread has
@@ -1700,6 +1717,10 @@
 
   DTRACE_THREAD_PROBE(stop, this);
 
+  if (EnableEventTracing) {
+    TraceEvents::write_thread_exit();
+  }
+
   this->exit(false);
   delete this;
 }
@@ -1924,6 +1945,14 @@
   }
 #endif // INCLUDE_ALL_GCS
 
+  { // Submit trace buffer if we still hold one
+    No_Safepoint_Verifier nsv;
+    if (trace_buffer() != NULL) {
+      TraceManager::submit_buffer(trace_buffer());
+      set_trace_buffer(NULL);
+    }
+  }
+
   // Remove from list of active threads list, and notify VM thread if we are the last non-daemon thread
   Threads::remove(this);
 }
@@ -3421,6 +3450,11 @@
     return status;
   }
 
+  // Initialize event tracing
+  if (EnableEventTracing) {
+    TraceManager::initialize();
+  }
+
   // Should be done after the heap is fully created
   main_thread->cache_global_variables();
 
@@ -3662,6 +3696,10 @@
 
   BiasedLocking::init();
 
+  if (EnableEventTracing) {
+    TraceManager::start_threads(CHECK_0); // needs a fully functional Java environment
+  }
+
 #if INCLUDE_RTM_OPT
   RTMLockingCounters::init();
 #endif
diff --git a/src/share/vm/runtime/thread.hpp b/src/share/vm/runtime/thread.hpp
--- a/src/share/vm/runtime/thread.hpp
+++ b/src/share/vm/runtime/thread.hpp
@@ -88,6 +88,9 @@
 
 DEBUG_ONLY(class ResourceMark;)
 
+class TraceBuffer;
+class CachedTraceStack;
+
 class WorkerThread;
 
 // Class hierarchy
@@ -262,6 +265,14 @@
   // Thread-local buffer used by MetadataOnStackMark.
   MetadataOnStackBuffer* _metadata_on_stack_buffer;
 
+  TraceBuffer *_trace_buffer;
+  debug_only(bool _trace_active;)
+  intptr_t     _park_last_global_seq;
+  int          _park_priority;
+  int          _nesting_level;
+  address      _memento_original_return_address;
+  const CachedTraceStack* _memento_stack_trace;
+
   TRACE_DATA _trace_data;                       // Thread-local data for tracing
 
   ThreadExt _ext;
@@ -312,6 +323,7 @@
   virtual bool is_VM_thread()       const            { return false; }
   virtual bool is_Java_thread()     const            { return false; }
   virtual bool is_Compiler_thread() const            { return false; }
+  virtual bool is_TraceReader_thread() const         { return false; }
   virtual bool is_hidden_from_external_view() const  { return false; }
   virtual bool is_jvmti_agent_thread() const         { return false; }
   // True iff the thread can perform GC operations at a safepoint.
@@ -443,6 +455,21 @@
   void incr_allocated_bytes(jlong size) { _allocated_bytes += size; }
   inline jlong cooked_allocated_bytes();
 
+  TraceBuffer *trace_buffer()            { return _trace_buffer; }
+  void set_trace_buffer(TraceBuffer *b)  { _trace_buffer = b;    }
+  DEBUG_ONLY(bool trace_active()         { return _trace_active; })
+  DEBUG_ONLY(void toggle_trace_active()  { _trace_active = !_trace_active; })
+  void set_park_last_global_seq(intptr_t seq) { _park_last_global_seq = seq; }
+  static ByteSize park_last_global_seq_offset() { return byte_offset_of(Thread, _park_last_global_seq); }
+  int park_priority()                    { return _park_priority; }
+  static ByteSize park_priority_offset() { return byte_offset_of(Thread, _park_priority); }
+  int nesting_level()                    { return _nesting_level; }
+  static ByteSize nesting_level_offset() { return byte_offset_of(Thread, _nesting_level); }
+  address& memento_original_return_address()               { return _memento_original_return_address; }
+  static ByteSize memento_original_return_address_offset() { return byte_offset_of(Thread, _memento_original_return_address); }
+  const CachedTraceStack *memento_stack_trace()            { return _memento_stack_trace; }
+  void set_memento_stack_trace(const CachedTraceStack *ts) { _memento_stack_trace = ts;   }
+
   TRACE_DATA* trace_data()              { return &_trace_data; }
 
   const ThreadExt& ext() const          { return _ext; }
diff --git a/src/share/vm/runtime/vframe.hpp b/src/share/vm/runtime/vframe.hpp
--- a/src/share/vm/runtime/vframe.hpp
+++ b/src/share/vm/runtime/vframe.hpp
@@ -322,6 +322,9 @@
       return (nmethod*) cb();
   }
 
+  frame*             frame_pointer()    { return &_frame; }
+  JavaThread*        thread()     const { return _thread; }
+
   // Frame type
   bool is_interpreted_frame() const { return _frame.is_interpreted_frame(); }
   bool is_entry_frame() const       { return _frame.is_entry_frame(); }
diff --git a/src/share/vm/runtime/vmThread.cpp b/src/share/vm/runtime/vmThread.cpp
--- a/src/share/vm/runtime/vmThread.cpp
+++ b/src/share/vm/runtime/vmThread.cpp
@@ -36,6 +36,7 @@
 #include "runtime/vm_operations.hpp"
 #include "services/runtimeService.hpp"
 #include "trace/tracing.hpp"
+#include "evtrace/traceEvents.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/events.hpp"
 #include "utilities/xmlstream.hpp"
@@ -272,6 +273,10 @@
   // possible to set the VM thread priority higher than any Java thread.
   os::set_native_priority( this, prio );
 
+  if (EnableEventTracing) {
+    TraceEvents::write_thread_start();
+  }
+
   // Wait for VM_Operations until termination
   this->loop();
 
@@ -458,11 +463,20 @@
           // 'GuaranteedSafepointInterval' milliseconds.  This will run all
           // the clean-up processing that needs to be done regularly at a
           // safepoint
+
+          if (EnableEventTracing) {
+            TraceEvents::write_safepoint_begin(TraceTypes::safepoint_periodic);
+          }
+
           SafepointSynchronize::begin();
           #ifdef ASSERT
             if (GCALotAtAllSafepoints) InterfaceSupport::check_gc_alot();
           #endif
           SafepointSynchronize::end();
+
+          if (EnableEventTracing) {
+            TraceEvents::write_safepoint_end(0);
+          }
         }
         _cur_vm_operation = _vm_queue->remove_next();
 
@@ -498,6 +512,11 @@
 
         _vm_queue->set_drain_list(safepoint_ops); // ensure ops can be scanned
 
+        if (EnableEventTracing) {
+          TraceEvents::write_safepoint_begin(TraceTypes::safepoint_for_vm_op);
+        }
+
+        int vm_ops_evaluated = 1;
         SafepointSynchronize::begin();
         evaluate_operation(_cur_vm_operation);
         // now process all queued safepoint ops, iteratively draining
@@ -515,6 +534,7 @@
               if (PrintSafepointStatistics) {
                 SafepointSynchronize::inc_vmop_coalesced_count();
               }
+              vm_ops_evaluated++;
             } while (_cur_vm_operation != NULL);
           }
           // There is a chance that a thread enqueued a safepoint op
@@ -542,6 +562,10 @@
         // Complete safepoint synchronization
         SafepointSynchronize::end();
 
+        if (EnableEventTracing) {
+          TraceEvents::write_safepoint_end((u4) vm_ops_evaluated);
+        }
+
       } else {  // not a safepoint operation
         if (TraceLongCompiles) {
           elapsedTimer t;
@@ -578,9 +602,17 @@
       long interval          = SafepointSynchronize::last_non_safepoint_interval();
       bool max_time_exceeded = GuaranteedSafepointInterval != 0 && (interval > GuaranteedSafepointInterval);
       if (SafepointALot || max_time_exceeded) {
+    	if (EnableEventTracing) {
+          TraceEvents::write_safepoint_begin(TraceTypes::safepoint_periodic);
+        }
+
         HandleMark hm(VMThread::vm_thread());
         SafepointSynchronize::begin();
         SafepointSynchronize::end();
+
+        if (EnableEventTracing) {
+          TraceEvents::write_safepoint_end(0);
+        }
       }
     }
   }
@@ -667,9 +699,17 @@
     _cur_vm_operation = op;
 
     if (op->evaluate_at_safepoint() && !SafepointSynchronize::is_at_safepoint()) {
+      if (EnableEventTracing) {
+        TraceEvents::write_safepoint_begin(TraceTypes::safepoint_for_vm_op);
+      }
+
       SafepointSynchronize::begin();
       op->evaluate();
       SafepointSynchronize::end();
+
+      if (EnableEventTracing) {
+        TraceEvents::write_safepoint_end(1);
+      }
     } else {
       op->evaluate();
     }
diff --git a/src/share/vm/runtime/vm_operations.hpp b/src/share/vm/runtime/vm_operations.hpp
--- a/src/share/vm/runtime/vm_operations.hpp
+++ b/src/share/vm/runtime/vm_operations.hpp
@@ -97,6 +97,8 @@
   template(LinuxDllLoad)                          \
   template(RotateGCLog)                           \
   template(WhiteBoxOperation)                     \
+  template(ReclaimTraceBuffers)                   \
+  template(ResetTraceMetadata)                    \
 
 class VM_Operation: public CHeapObj<mtInternal> {
  public:
diff --git a/src/share/vm/utilities/vmError.cpp b/src/share/vm/utilities/vmError.cpp
--- a/src/share/vm/utilities/vmError.cpp
+++ b/src/share/vm/utilities/vmError.cpp
@@ -476,7 +476,7 @@
      if (_context) {
        st->print_cr("# Problematic frame:");
        st->print("# ");
-       frame fr = os::fetch_frame_from_context(_context);
+       frame fr = os::fetch_frame_from_context(_thread, _context);
        fr.print_on_error(st, buf, sizeof(buf));
        st->cr();
        st->print_cr("#");
@@ -562,7 +562,7 @@
        address stack_bottom = stack_top - stack_size;
        st->print("[" PTR_FORMAT "," PTR_FORMAT "]", stack_bottom, stack_top);
 
-       frame fr = _context ? os::fetch_frame_from_context(_context)
+       frame fr = _context ? os::fetch_frame_from_context(_thread, _context)
                            : os::current_frame();
 
        if (fr.sp()) {
@@ -581,7 +581,7 @@
        // We have printed the native stack in platform-specific code
        // Windows/x64 needs special handling.
      } else {
-       frame fr = _context ? os::fetch_frame_from_context(_context)
+       frame fr = _context ? os::fetch_frame_from_context(_thread, _context)
                            : os::current_frame();
 
        // see if it's a valid frame
@@ -605,13 +605,13 @@
                  RegisterMap map((JavaThread*)_thread, false); // No update
                  fr = fr.sender(&map);
                } else {
-                 fr = os::get_sender_for_C_frame(&fr);
+                 fr = os::get_sender_for_C_frame(_thread, &fr);
                }
              } else {
                // is_first_C_frame() does only simple checks for frame pointer,
                // it will pass if java compiled code has a pointer in EBP.
                if (os::is_first_C_frame(&fr)) break;
-               fr = os::get_sender_for_C_frame(&fr);
+               fr = os::get_sender_for_C_frame(_thread, &fr);
              }
           }
 
