Main /
RxTxCrashingVergilReproducing The IssueAfter solving the RxTx Hanging problem, now under Mac OS 10.11.1, which I run $PTII/bin/vergil $PTII/ptolemy/actor/lib/jjs/modules/serial/test/SerialHelloWorld.xml with two XBees plugged in, I get a crash when I stop the model. Here's the model: ![]() The RxTxSerialHelloWorldCrash.txt has the following interesting bits: Process: java [88698] Path: /Library/Java/JavaVirtualMachines/jdk1.8.0_65.jdk/Contents/Home/bin/java Identifier: net.java.openjdk.cmd Version: 1.0 (1.0) Code Type: X86-64 (Native) Parent Process: bash [88687] Responsible: Terminal [620] User ID: 503 Date/Time: 2015-12-07 16:15:04.343 -0800 OS Version: Mac OS X 10.11.1 (15B42) Report Version: 11 Anonymous UUID: FFB0C42D-C30C-C2CA-DBCA-5A8AE184FC69 Sleep/Wake UUID: 8BEAAF7B-52B1-4CBE-AAD4-1B27CE2FD427 Time Awake Since Boot: 470000 seconds Time Since Wake: 63000 seconds System Integrity Protection: enabled Crashed Thread: 32 Exception Type: EXC_CRASH (SIGABRT) Exception Codes: 0x0000000000000000, 0x0000000000000000 Thread 32 is the crashed thread, it looks like: Thread 32 Crashed: 0 libsystem_kernel.dylib 0x00007fff8d4b22b2 __semwait_signal + 10 1 libsystem_c.dylib 0x00007fff8f160a75 nanosleep + 199 2 libsystem_c.dylib 0x00007fff8f160968 usleep + 54 3 libNRJavaSerial.jnilib 0x000000012c0027da drain_loop + 58 4 libsystem_pthread.dylib 0x00007fff8b1e29b1 _pthread_body + 131 5 libsystem_pthread.dylib 0x00007fff8b1e292e _pthread_start + 168 6 libsystem_pthread.dylib 0x00007fff8b1e0385 thread_start + 13 Threads 30 and 31 are interesting: Thread 30:: Java: SerialHelloWorld 0 libsystem_kernel.dylib 0x00007fff8d4b22b2 __semwait_signal + 10 1 libsystem_c.dylib 0x00007fff8f160a75 nanosleep + 199 2 libsystem_c.dylib 0x00007fff8f160968 usleep + 54 3 libNRJavaSerial.jnilib 0x000000012c007b56 Java_gnu_io_RXTXPort_interruptEventLoop + 118 4 ??? 0x0000000111b9e954 0 + 4592363860 5 ??? 0x0000000111b90c4d 0 + 4592307277 6 ??? 0x0000000111b90c4d 0 + 4592307277 7 ??? 0x0000000111b90c92 0 + 4592307346 8 ??? 0x0000000111b90c4d 0 + 4592307277 9 ??? 0x0000000111b90c4d 0 + 4592307277 10 ??? 0x0000000111b90c4d 0 + 4592307277 11 ??? 0x0000000111b90c4d 0 + 4592307277 12 ??? 0x0000000111b90c4d 0 + 4592307277 13 ??? 0x0000000111b90c4d 0 + 4592307277 14 ??? 0x0000000111b897a7 0 + 4592277415 15 libjvm.dylib 0x0000000110eeab02 JavaCalls::call_helper(JavaValue*, methodHandle*, JavaCallArguments*, Thread*) + 1710 16 libjvm.dylib 0x0000000110eeb290 JavaCalls::call_virtual(JavaValue*, KlassHandle, Symbol*, Symbol*, JavaCallArguments*, Thread*) + 356 17 libjvm.dylib 0x0000000110eeb43c JavaCalls::call_virtual(JavaValue*, Handle, KlassHandle, Symbol*, Symbol*, Thread*) + 74 18 libjvm.dylib 0x0000000110f45959 thread_entry(JavaThread*, Thread*) + 124 19 libjvm.dylib 0x00000001111612d3 JavaThread::thread_main_inner() + 155 20 libjvm.dylib 0x00000001111629ce JavaThread::run() + 450 21 libjvm.dylib 0x000000011108342e java_start(Thread*) + 246 22 libsystem_pthread.dylib 0x00007fff8b1e29b1 _pthread_body + 131 23 libsystem_pthread.dylib 0x00007fff8b1e292e _pthread_start + 168 24 libsystem_pthread.dylib 0x00007fff8b1e0385 thread_start + 13 Thread 31:: Java: Thread-2 0 libsystem_kernel.dylib 0x00007fff8d4b2222 __select + 10 1 libNRJavaSerial.jnilib 0x000000012c007227 Java_gnu_io_RXTXPort_eventLoop + 263 2 ??? 0x0000000111b9e954 0 + 4592363860 3 ??? 0x0000000111b90c4d 0 + 4592307277 4 ??? 0x0000000111b897a7 0 + 4592277415 5 libjvm.dylib 0x0000000110eeab02 JavaCalls::call_helper(JavaValue*, methodHandle*, JavaCallArguments*, Thread*) + 1710 6 libjvm.dylib 0x0000000110eeb290 JavaCalls::call_virtual(JavaValue*, KlassHandle, Symbol*, Symbol*, JavaCallArguments*, Thread*) + 356 7 libjvm.dylib 0x0000000110eeb43c JavaCalls::call_virtual(JavaValue*, Handle, KlassHandle, Symbol*, Symbol*, Thread*) + 74 8 libjvm.dylib 0x0000000110f45959 thread_entry(JavaThread*, Thread*) + 124 9 libjvm.dylib 0x00000001111612d3 JavaThread::thread_main_inner() + 155 10 libjvm.dylib 0x00000001111629ce JavaThread::run() + 450 11 libjvm.dylib 0x000000011108342e java_start(Thread*) + 246 12 libsystem_pthread.dylib 0x00007fff8b1e29b1 _pthread_body + 131 13 libsystem_pthread.dylib 0x00007fff8b1e292e _pthread_start + 168 14 libsystem_pthread.dylib 0x00007fff8b1e0385 thread_start + 13 Focusing on Thread 31: 3 libNRJavaSerial.jnilib 0x000000012c0027da drain_loop + 58 drain_loop is defined in SerialImpl.c, see https://github.com/NeuronRobotics/nrjavaserial/blob/master/src/main/c/src/SerialImp.c: #if !defined(TIOCSERGETLSR) && !defined(WIN32) /*---------------------------------------------------------- drain_loop() accept: perform: call tcdrain() and report an event when it succeds return: none exceptions: comments: ----------------------------------------------------------*/ void *drain_loop( void *arg ) { struct event_info_struct *eis = ( struct event_info_struct * ) arg; /* char msg[80]; */ int i; pthread_detach( pthread_self() ); for(i=0;;i++) { report_verbose("drain_loop: looping\n"); if( eis->eventloop_interrupted ) { goto end; } #if defined(__sun__) /* FIXME: No time to test on all OS's for production */ if (usleep(5000)) { report("drain_loop: received EINTR"); } #else if (usleep(1000000)) { report("drain_loop: received EINTR"); } #endif /* __sun__ */ /* system_wait(); */ if( eis->eventloop_interrupted ) { goto end; } if( tcdrain( eis->fd ) == 0 ) { if( eis && eis->writing ) { /* sprintf(msg, "drain_loop: setting OUTPUT_BUFFER_EMPTY\n" ); report( msg ); */ eis->output_buffer_empty_flag = 1; eis->writing=JNI_FALSE; } else { if( !eis ) { goto end; } report_verbose("drain_loop: writing not set\n"); } } else if (errno != EINTR) { report("drain_loop: tcdrain bad fd\n"); goto end; } else { report("drain_loop: received EINTR in tcdrain\n"); } } end: report("------------------ drain_loop exiting ---------------------\n"); eis->drain_loop_running = 0; pthread_exit( NULL ); return( NULL ); } #endif /* !defined(TIOCSERGETLSR) && !defined(WIN32) */
Patch to FreeBSDhttps://bugs.freebsd.org/bugzilla/show_bug.cgi?id=200179 is key, it discusses a number of fixes that should be folded in. SerialImp.c 12) Tidied code indentation (where needed, it is a medium length file and doesn't provide benifit on areas that work) 13) Added standard out debug support to allow for assessment of interaction with Java code that can be compiled in (DEBUG_STDOUT) 14) Added entry and exist debug statement for key functions 15) where sprintf is being used, ensure that the existing ptr refers to an empty string (uses memset to sizeof) 16) cleared up display tabulation of function headers in entire file (on second enumeration strings contained garbage) 17) Remove LEAVE and ENTRY functions as not working as expected in header. 18) Made sure all debug statements are null pointer terminated (note sprintf is not solely used for debug message formatting) 19) Added condition so that a port that is in use during testread() is still reported as selectable. (If you attempt to open it after the port scan and it is still EBUSY the behaviour is unaltered and will throw a suitable error back) 20) Disabled uucp for lpt ports, as uucp only applies to tty on FreeBSD (so says the Handbook...) 21) RXTXPort:interruptEventLoop - resolved SIGABRT call to process (intended to kill off thread not entire JRE)... 22) RXTXPort:interruptEventLoop now waits for timer in child thread to expire before attempting to closing the thread (delay between 0 and 1000000 sleeps) 23) All debug streams for FreeBSD now forward to same function for uniform formatting. This fixes two downstream issues in the arduino port, which results in missing dependancies, a core dump when uploading, or very slow port enumeration (up to 30s in some cases) when the arduino IDE is first started. Patch to SerialImpl.c is a patch to FreeBSD, but what is the basefile? https://svnweb.freebsd.org/ports/head/comms/rxtx/Makefile?view=markup indicates that rxtx-2.2pre2 on http://rxtx.qbang.org/pub/rxtx/ is the source. Here's how to apply the patch: wget http://rxtx.qbang.org/pub/rxtx/rxtx-2.2pre2.zip unzip rxtx-2.2pre2.zip cd rxtx-2.2pre2 cd src wget -O SerialImp.patch 'https://svnweb.freebsd.org/ports/head/comms/rxtx/files/patch-src-SerialImp.c?revision=390725&view=co&pathrev=390725' cp SerialImp.c /tmp patch < SerialImp.patch Diff between nrjavaserial and FreeBSDbash-3.2$ diff -crwb ~/src/nrjavaserial/src/main/c/src/SerialImp.c SerialImp.c *** /Users/cxh/src/nrjavaserial/src/main/c/src/SerialImp.c 2015-12-07 13:39:18.000000000 -0800 --- SerialImp.c 2015-12-07 17:45:01.000000000 -0800 ... ----------------------------------------------------------*/ void *drain_loop( void *arg ) { + report("Entering - drain_loop\n"); struct event_info_struct *eis = ( struct event_info_struct * ) arg; /* char msg[80]; */ int i; + + report_verbose("drain_loop: pthread detaching\n"); pthread_detach( pthread_self() ); + report_verbose("drain_loop: begining for loop\n"); for(i=0;;i++) { report_verbose("drain_loop: looping\n"); if( eis->eventloop_interrupted ) { + report_verbose("drain_loop: EventLoop Interrupted\n"); goto end; } + #if defined(__sun__) /* FIXME: No time to test on all OS's for production */ + report_verbose("drain_loop: SUN: Sleeping now\n"); if (usleep(5000)) { ! report("***drain_loop: received EINTR\n"); } #else + report_verbose("drain_loop: Sleeping now\n"); if (usleep(1000000)) { ! report("***drain_loop: received EINTR\n"); } #endif /* __sun__ */ + + report_verbose("***drain_loop: Out of sleep loop\n"); /* system_wait(); */ + if( eis->eventloop_interrupted ) { + report_verbose("*** drain_loop: Interupted\n"); + eis->closing = 1; goto end; } + if( tcdrain( eis->fd ) == 0 ) { if( eis && eis->writing ) { /* + memset(&msg[0], 0, sizeof(msg)); sprintf(msg, "drain_loop: setting OUTPUT_BUFFER_EM\ PTY\n" ); report( msg ); */ *************** *** 1327,1333 **** } end: report("------------------ drain_loop exiting ---------------------\n"); ! eis->drain_loop_running = 0; pthread_exit( NULL ); return( NULL ); } --- 1373,1379 ---- } end: report("------------------ drain_loop exiting ---------------------\n"); ! eis->closing = 1; pthread_exit( NULL ); return( NULL ); } Even with the above changes, the error would still happen. Possible WorkaroundIt seems that stopping the model and then quickly exiting Vergil causes the crash. However, this was with a version of src/main/c/resources/native/osx/libNRJavaSerial.jnilib that was from git and did not have recent changes in it. If I recompile from source, then the crash occurs under OS X when I stop the model. The recent changes in this file are: bash-3.2$ git log src/main/c/src/SerialImp.c commit 74c4f8c77caf51134d52445f6af16230ca14bd5e Author: Samuel Coleman <samuel@seenet.ca> Date: Fri Nov 20 09:22:51 2015 -0500 Mark defunct ports as closing on OS X. Without this, CPU usage goes to 100% and stays there on OS X after calling CommPort.close(). commit ea5c0965c69bac549bf2023eaff20082bf85f67e Author: Samuel Coleman <samuel@seenet.ca> Date: Fri Nov 20 09:18:40 2015 -0500 Reduce CPU usage of serial event generator. There's no reason for the select(2) call to have a 1ms timeout. At that point, it's basically a busy loop. A 100ms timeout greatly reduces CPU usage while retaining fairly quick responsiveness to port closure. Closes #36. commit Author: hephaestus <mad.hephaestus@gmail.com> Date: Wed Oct 28 21:32:34 2015 -0400 moving to gradle build If I back out the change that reduces the CPU usage, then the problem goes away. The change is: bash-3.2$ git diff src/main/c/src/SerialImp.c diff --git a/src/main/c/src/SerialImp.c b/src/main/c/src/SerialImp.c index fdd1c9b..8e1cc8a 100644 --- a/src/main/c/src/SerialImp.c +++ b/src/main/c/src/SerialImp.c @@ -4146,7 +4146,7 @@ end: if (eis->fd < FD_SETSIZE && eis->fd > 0) { FD_SET( eis->fd, &eis->rfds ); eis->tv_sleep.tv_sec = 0; - eis->tv_sleep.tv_usec = 100 * 1000; + eis->tv_sleep.tv_usec = 1000; eis->initialised = 1; return( 1 ); } else { For discussion of this, see
Indeed, libNRJavaSerial.jnilib has not been updated since before the above changes. bash-3.2$ git log src/main/c/resources/native/osx/libNRJavaSerial.jnilib commit d6692407269a607dfcd0a58612f5c2841bdf44ad Author: Samuel Coleman <samuel@seenet.ca> Date: Mon Nov 23 12:49:40 2015 -0500 Recompile natives affected by performance fixes. Linux: Built against glibc 2.5 on CentOS 5.11. ARM: Cross-compiled against glibc 2.15 on Ubuntu 14.04. PPC: Cross-compiled against glibc 2.19 on Ubuntu 14.04. OS X: Suitable for both 32- and 64-bit platforms. Cortex-A8-specific ARM builds have been removed in this release as they're not actually loaded from Java. libNRJavaSerial.so/libNRJavaSerial_HF.so target ARMv7-A and should work on Cortex-A8 in lieu of any more optimized build. Windows builds remain unchanged as the recent native code changes specifically do not affect Windows. commit f24084736c1cae1519df45138f4f3243c27db73d Author: hephaestus <mad.hephaestus@gmail.com> Date: Wed Oct 28 21:32:34 2015 -0400 moving to gradle build bash-3.2$ Another Possible Solutionhttps://bugs.freebsd.org/bugzilla/show_bug.cgi?id=200179 says: 21) RXTXPort:interruptEventLoop - resolved SIGABRT call to process (intended to kill off thread not entire JRE)... 22) RXTXPort:interruptEventLoop now waits for timer in child thread to expire before attempting to closing the thread (delay between 0 and 1000000 sleeps) So, updating /*---------------------------------------------------------- interruptEventLoop accept: nothing perform: increment eventloop_interrupted return: nothing exceptions: none comments: all eventloops in this PID will check if their thread is interrupted. When all the interrupted threads exit they will decrement the var leaving it 0. the remaining threads will continue. ----------------------------------------------------------*/ JNIEXPORT void JNICALL RXTXPort(interruptEventLoop)(JNIEnv *env, jobject jobj) { report("Entering - RXTXPort_interruptEventLoop\n"); struct event_info_struct *index = master_index; int fd = get_java_var( env, jobj, "fd", "I" ); int searching = 1; int waiting = 1; int loop_count = 0; char buff[ 60 ] = ""; report("RXTXPort_interruptEventLoop - Searching...\n"); while( searching ) { index = master_index; if( index ) { while( index->fd != fd && index->next ) index = index->next; if ( index->fd == fd ) searching = 0; } else report("x"); if( searching ) { report("@"); usleep(1000); } } report("RXTXPort_interruptEventLoop - Searched\n"); index->eventloop_interrupted = 1; report("RXTXPort_interruptEventLoop - Interrupted\n"); /* Many OS's need a thread running to determine if output buffer is empty. For Linux and Win32 it is not needed. So closing is used to shut down the thread in the write order on OS's that don't have kernel support for output buffer empty. In rxtx TIOCSERGETLSR is defined for win32 and Linux */ #ifdef TIOCSERGETLSR index->closing=1; #endif /* TIOCSERGETLSR */ #ifdef WIN32 termios_interrupt_event_loop( index->fd, 1 ); #endif /* WIN32 */ #if !defined(TIOCSERGETLSR) && !defined(WIN32) // TIOCSERGETLSR is not define under FreeBSD 10.1-p9 i386 or Mac OS X // memset(&buff[0], 0, sizeof(buff)); sprintf( buff, "RXTXPort_interruptEventLoop - Closing State is %i\n", index->closing ); report (buff); report("RXTXPort_interruptEventLoop - Going to wait for thread to say it is closing\n"); loop_count = 0; while( waiting ) { report("RXTXPort_interruptEventLoop - Waiting\n"); usleep(5001); loop_count = loop_count + 1; if ((index->closing) == 1) waiting = 0; if (loop_count > 20) waiting = 0; } memset(&buff[0], 0, sizeof(buff)); sprintf( buff, "RXTXPort_interruptEventLoop - Closing State is now %i\n", index->closing ); report (buff); /** * The command below was being sent to the relevant thread, * however under POSIX spec this abort signal applies to the * whole process, NOT the thread. * * However, if RXTX is being call by something else in Java, * this causes the parent enumaertor and the entire JRE to * SIGABRT, resulting in a Java Core Dump (as requested)! * * Not sure what this call was supposed to achive; Have * included new wait loop above to ensure that we wait for the * thread to pick up that we have asked for it to exit: * * index->eventloop_interrupted = 1; * * No way for checking if an OS is POSIX compliant with * pthread_kill, when TIOCSERGETLSR is not defined? **/ /** * pthread_kill(index->drain_tid, SIGABRT); **/ report("***RXTXPort_interruptEventLoop - Thread Killed\n"); /* TODO use wait/join/SIGCHLD/?? instead of sleep? */ report("RXTXPort_interruptEventLoop - Sleeping now...\n"); usleep(50 * 1000); report("RXTXPort_interruptEventLoop - Waking up...\n"); report("RXTXPort_interruptEventLoop - Waking up...\n"); /** * Under normal conditions, SIGABRT will unblock tcdrain. However * a non-responding USB device combined with an unclean driver * may still block. This is very ugly because it may block the call * to close indefinetly. * * UPDATE to above statement: * SIGABRT does not have the expected results in a * POSIX environment. **/ #if defined(__APPLE__) index->closing = 1; /* Continuing on OS X causes an invalid memory access. */ return; #endif if (index->closing != 1) { /* good bye tcdrain, and thanks for all the fish */ report("interruptEventLoop: canceling blocked drain thread\n"); pthread_cancel(index->drain_tid); index->closing = 1; } #endif report("Leaving - interruptEventLoop\n"); } |