Java notify和notifyAll源碼分析與性能對比
一、源碼剖析
首先,看看在synchronizer.cpp中notify和notifyall的實現:
void ObjectSynchronizer::notify(Handle obj, TRAPS) {
if (UseBiasedLocking) {
BiasedLocking::revoke_and_rebias(obj, false, THREAD);
assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now");
}
markOop mark = obj->mark();
if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) {
return;
}
ObjectSynchronizer::inflate(THREAD, obj())->notify(THREAD);
}
// NOTE: see comment of notify()
void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
if (UseBiasedLocking) {
BiasedLocking::revoke_and_rebias(obj, false, THREAD);
assert(!obj->mark()->has_bias_pattern(), "biases should be revoked by now");
}
markOop mark = obj->mark();
if (mark->has_locker() && THREAD->is_lock_owned((address)mark->locker())) {
return;
}
ObjectSynchronizer::inflate(THREAD, obj())->notifyAll(THREAD);
}
兩者實現一樣,看看objectMonitor.cpp中的實現。
notify:
// Consider:
// If the lock is cool (cxq == null && succ == null) and we're on an MP system
// then instead of transferring a thread from the WaitSet to the EntryList
// we might just dequeue a thread from the WaitSet and directly unpark() it.
void ObjectMonitor::notify(TRAPS) {
CHECK_OWNER();
if (_WaitSet == NULL) {
TEVENT (Empty-Notify) ;
return ;
}
DTRACE_MONITOR_PROBE(notify, this, object(), THREAD);
int Policy = Knob_MoveNotifyee ;
Thread::SpinAcquire (&_WaitSetLock, "WaitSet - notify") ;
ObjectWaiter * iterator = DequeueWaiter() ;
if (iterator != NULL) {
TEVENT (Notify1 - Transfer) ;
guarantee (iterator->TState == ObjectWaiter::TS_WAIT, "invariant") ;
guarantee (iterator->_notified == 0, "invariant") ;
if (Policy != 4) {
iterator->TState = ObjectWaiter::TS_ENTER ;
}
iterator->_notified = 1 ;
Thread * Self = THREAD;
iterator->_notifier_tid = Self->osthread()->thread_id();
ObjectWaiter * List = _EntryList ;
if (List != NULL) {
assert (List->_prev == NULL, "invariant") ;
assert (List->TState == ObjectWaiter::TS_ENTER, "invariant") ;
assert (List != iterator, "invariant") ;
}
if (Policy == 0) { // prepend to EntryList
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
List->_prev = iterator ;
iterator->_next = List ;
iterator->_prev = NULL ;
_EntryList = iterator ;
}
} else
if (Policy == 1) { // append to EntryList
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
// CONSIDER: finding the tail currently requires a linear-time walk of
// the EntryList. We can make tail access constant-time by converting to
// a CDLL instead of using our current DLL.
ObjectWaiter * Tail ;
for (Tail = List ; Tail->_next != NULL ; Tail = Tail->_next) ;
assert (Tail != NULL && Tail->_next == NULL, "invariant") ;
Tail->_next = iterator ;
iterator->_prev = Tail ;
iterator->_next = NULL ;
}
} else
if (Policy == 2) { // prepend to cxq
// prepend to cxq
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
iterator->TState = ObjectWaiter::TS_CXQ ;
for (;;) {
ObjectWaiter * Front = _cxq ;
iterator->_next = Front ;
if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) {
break ;
}
}
}
} else
if (Policy == 3) { // append to cxq
iterator->TState = ObjectWaiter::TS_CXQ ;
for (;;) {
ObjectWaiter * Tail ;
Tail = _cxq ;
if (Tail == NULL) {
iterator->_next = NULL ;
if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) {
break ;
}
} else {
while (Tail->_next != NULL) Tail = Tail->_next ;
Tail->_next = iterator ;
iterator->_prev = Tail ;
iterator->_next = NULL ;
break ;
}
}
} else {
ParkEvent * ev = iterator->_event ;
iterator->TState = ObjectWaiter::TS_RUN ;
OrderAccess::fence() ;
ev->unpark() ;
}
if (Policy < 4) {
iterator->wait_reenter_begin(this);
}
// _WaitSetLock protects the wait queue, not the EntryList. We could
// move the add-to-EntryList operation, above, outside the critical section
// protected by _WaitSetLock. In practice that's not useful. With the
// exception of wait() timeouts and interrupts the monitor owner
// is the only thread that grabs _WaitSetLock. There's almost no contention
// on _WaitSetLock so it's not profitable to reduce the length of the
// critical section.
}
Thread::SpinRelease (&_WaitSetLock) ;
if (iterator != NULL && ObjectMonitor::_sync_Notifications != NULL) {
ObjectMonitor::_sync_Notifications->inc() ;
}
}
notifyAll:
void ObjectMonitor::notifyAll(TRAPS) {
CHECK_OWNER();
ObjectWaiter* iterator;
if (_WaitSet == NULL) {
TEVENT (Empty-NotifyAll) ;
return ;
}
DTRACE_MONITOR_PROBE(notifyAll, this, object(), THREAD);
int Policy = Knob_MoveNotifyee ;
int Tally = 0 ;
Thread::SpinAcquire (&_WaitSetLock, "WaitSet - notifyall") ;
for (;;) {
iterator = DequeueWaiter () ;
if (iterator == NULL) break ;
TEVENT (NotifyAll - Transfer1) ;
++Tally ;
// Disposition - what might we do with iterator ?
// a. add it directly to the EntryList - either tail or head.
// b. push it onto the front of the _cxq.
// For now we use (a).
guarantee (iterator->TState == ObjectWaiter::TS_WAIT, "invariant") ;
guarantee (iterator->_notified == 0, "invariant") ;
iterator->_notified = 1 ;
Thread * Self = THREAD;
iterator->_notifier_tid = Self->osthread()->thread_id();
if (Policy != 4) {
iterator->TState = ObjectWaiter::TS_ENTER ;
}
ObjectWaiter * List = _EntryList ;
if (List != NULL) {
assert (List->_prev == NULL, "invariant") ;
assert (List->TState == ObjectWaiter::TS_ENTER, "invariant") ;
assert (List != iterator, "invariant") ;
}
if (Policy == 0) { // prepend to EntryList
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
List->_prev = iterator ;
iterator->_next = List ;
iterator->_prev = NULL ;
_EntryList = iterator ;
}
} else
if (Policy == 1) { // append to EntryList
if (List == NULL) {
iterator->_next = iterator->_prev = NULL ;
_EntryList = iterator ;
} else {
// CONSIDER: finding the tail currently requires a linear-time walk of
// the EntryList. We can make tail access constant-time by converting to
// a CDLL instead of using our current DLL.
ObjectWaiter * Tail ;
for (Tail = List ; Tail->_next != NULL ; Tail = Tail->_next) ;
assert (Tail != NULL && Tail->_next == NULL, "invariant") ;
Tail->_next = iterator ;
iterator->_prev = Tail ;
iterator->_next = NULL ;
}
} else
if (Policy == 2) { // prepend to cxq
// prepend to cxq
iterator->TState = ObjectWaiter::TS_CXQ ;
for (;;) {
ObjectWaiter * Front = _cxq ;
iterator->_next = Front ;
if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) {
break ;
}
}
} else
if (Policy == 3) { // append to cxq
iterator->TState = ObjectWaiter::TS_CXQ ;
for (;;) {
ObjectWaiter * Tail ;
Tail = _cxq ;
if (Tail == NULL) {
iterator->_next = NULL ;
if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) {
break ;
}
} else {
while (Tail->_next != NULL) Tail = Tail->_next ;
Tail->_next = iterator ;
iterator->_prev = Tail ;
iterator->_next = NULL ;
break ;
}
}
} else {
ParkEvent * ev = iterator->_event ;
iterator->TState = ObjectWaiter::TS_RUN ;
OrderAccess::fence() ;
ev->unpark() ;
}
if (Policy < 4) {
iterator->wait_reenter_begin(this);
}
// _WaitSetLock protects the wait queue, not the EntryList. We could
// move the add-to-EntryList operation, above, outside the critical section
// protected by _WaitSetLock. In practice that's not useful. With the
// exception of wait() timeouts and interrupts the monitor owner
// is the only thread that grabs _WaitSetLock. There's almost no contention
// on _WaitSetLock so it's not profitable to reduce the length of the
// critical section.
}
Thread::SpinRelease (&_WaitSetLock) ;
if (Tally != 0 && ObjectMonitor::_sync_Notifications != NULL) {
ObjectMonitor::_sync_Notifications->inc(Tally) ;
}
}
對比可知,在當前線程SpinAcquire到_WaitSetLock之後,notify只調用一次DequeueWaiter方法獲取到一個等候者,而notifyAll需要循環DequeueWaiter直至waitset爲空。爾後的操作大致一樣,根據Policy把等候者插入到臨界區的EntryList中,最後自旋釋放_WaitSetLock。
代碼上看來,notify和notifyAll在性能上可能還是會有一定的差別。
二、測試驗證
實際上到底有沒有性能差異,有多少差別呢?我們可以用JMH來驗證一下。
測試代碼如下:
package co.speedar.infra.test;
import java.util.concurrent.CountDownLatch;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
@BenchmarkMode(value = { Mode.AverageTime, Mode.Throughput })
@State(Scope.Thread)
public class NotifyNotifyAllTest {
private volatile boolean wait1 = true;
private final Object lock1 = new Object();
@Param({ "1", "10", "50" })
private int length;
@Benchmark
public void testNotify() throws InterruptedException {
doTestNotify(length, false);
}
@Benchmark
public void testNotifyAll() throws InterruptedException {
doTestNotify(length, true);
}
private void doTestNotify(final int count, final boolean isAll) throws InterruptedException {
CountDownLatch latch = new CountDownLatch(count);
class Waiter implements Runnable {
public void run() {
synchronized (lock1) {
latch.countDown();
while (wait1) {
try {
lock1.wait();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
}
for (int i = 0; i < count; i++) {
new Thread(new Waiter()).start();
}
latch.await();
class Notifier implements Runnable {
public void run() {
synchronized (lock1) {
if (wait1) {
wait1 = false;
}
if (isAll) {
lock1.notifyAll();
} else {
lock1.notify();
}
}
}
}
for (int i = 0; i < count; i++) {
new Thread(new Notifier()).start();
}
}
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder().include(NotifyNotifyAllTest.class.getSimpleName()).forks(1).threads(8)
.warmupIterations(5).measurementIterations(5).build();
new Runner(opt).run();
}
}
線程Waiter啓動時候嘗試獲取鎖,然後進入wait狀態,全部開啓完畢後latch阻塞完成,開始啓動Notifier線程喚醒等待中的Waiter線程,執行結果如下:
Benchmark | (length) | Mode | Cnt | Score | Error | Units |
---|---|---|---|---|---|---|
NotifyNotifyAllTest.testNotify | 1 | thrpt | 5 | 14025.549 | ±150.552 | ops/s |
NotifyNotifyAllTest.testNotify | 10 | thrpt | 5 | 1449.138 | ±10.629 | ops/s |
NotifyNotifyAllTest.testNotify | 50 | thrpt | 5 | 288.984 | ±2.692 | ops/s |
NotifyNotifyAllTest.testNotifyAll | 1 | thrpt | 5 | 14235.773 | ±305.752 | ops/s |
NotifyNotifyAllTest.testNotifyAll | 10 | thrpt | 5 | 1452.429 | ±12.703 | ops/s |
NotifyNotifyAllTest.testNotifyAll | 50 | thrpt | 5 | 295.088 | ±2.200 | ops/s |
NotifyNotifyAllTest.testNotify | 1 | avgt | 5 | 0.001 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotify | 10 | avgt | 5 | 0.006 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotify | 50 | avgt | 5 | 0.027 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotifyAll | 1 | avgt | 5 | 0.001 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotifyAll | 10 | avgt | 5 | 0.006 | ±0.001 | s/op |
NotifyNotifyAllTest.testNotifyAll | 50 | avgt | 5 | 0.028 | ±0.001 | s/op |
三、結論
可以看到,吞吐量和執行時間都沒有太大的差別,從性能角度來說,沒必要用notify替換notifyAll。