diff --git a/PYME/Acquire/Hardware/pco/pco_sdk_cam.py b/PYME/Acquire/Hardware/pco/pco_sdk_cam.py index 61dc1fbea..1c31ba570 100644 --- a/PYME/Acquire/Hardware/pco/pco_sdk_cam.py +++ b/PYME/Acquire/Hardware/pco/pco_sdk_cam.py @@ -20,10 +20,13 @@ import queue import threading import time +import copy k32_dll = ctypes.windll.kernel32 # lets us use the recommended WaitForSingleObject call (see pco.sdk) # instead of the not-recommended-for-polling pco_sdk.get_buffer_status() + + # Define event handle type (needed for pco_sdk.add_buffer_extern()) # Generally we will want to use k32_dll.CreateEventA(None, 1, 0, None) # See https://docs.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-createeventa @@ -68,6 +71,14 @@ def unregCamera(cls): MAX_QUEUED_BUFFERS = 16 # pco. has a hard limit on attaching no # more than 16 buffers at a time to the camera +# Windows WaitForSingleObject return codes +WAIT_OBJECT_0 = 0x00000000 +WAIT_TIMEOUT = 0x00000102 +WAIT_ABANDONED = 0x00000080 +WAIT_FAILED = 0xFFFFFFFF + +MAX_CONSECUTIVE_ERRORS = 10 # max consecutive wait/buffer errors before we abort recording + class PcoSdkCam(Camera): def __init__(self, camNum, debuglevel='off'): Camera.__init__(self) @@ -107,11 +118,14 @@ def Init(self): self._binning_x = 1 self._binning_y = 1 self._n_timeouts = 0 + self.hardware_overflowed = False # used by frameWrangler to know camera is unhappy self._i = 0 self._buffers_to_queue = queue.Queue() self._queued_buffers = queue.Queue() self._full_buffers = queue.Queue() self.SetROI(1, 1, self.GetCCDWidth(), self.GetCCDHeight()) + self._consecutive_errors = 0 + self._max_consecutive_errors = MAX_CONSECUTIVE_ERRORS self.SetIntegTime(0.025) self.SetAcquisitionMode(self.MODE_CONTINUOUS) self._cam_type = pco_sdk.get_camera_type(self._handle) @@ -155,24 +169,96 @@ def _poll_loop(self): # wait for the buffer wait_status = k32_dll.WaitForSingleObject(self._buf_event[_curr_buf], self._timeout) - if wait_status: - logger.warning(f"Waited too long for buffer ({self._timeout} ms).") + if wait_status == WAIT_TIMEOUT: + # Timeout: do NOT deliver a frame; recycle the buffer + self._n_timeouts += 1 + self._consecutive_errors += 1 + + logger.warning( + f"PCO: WaitForSingleObject timeout after {self._timeout} ms " + f"(timeouts={self._n_timeouts}, consecutive_errors={self._consecutive_errors}) " + f"on buffer {_curr_buf}. Recycling." + ) + + k32_dll.ResetEvent(self._buf_event[_curr_buf]) - #TODO: we currently continue as if we got the buffer - is this the right thing to do? - # Presumably the status will be non-zero and we will drop the buffer?, but then what - # happens to those buffers? do they just dissapear? + if (self._n_timeouts < MAX_TIMEOUTS) and (self.contMode == self.MODE_CONTINUOUS): + # In continuous mode, we can try and re-queue the buffer and carry on + # In single-shot mode, requeuing won't work as we will also need to retrigger - just flag overflow + # and let the FrameWrangler handle a restart + self._buffers_to_queue.put(_curr_buf) + else: + logger.error("PCO: repeated buffer timeouts -> hardware_overflowed=True") + self.hardware_overflowed = True - k32_dll.ResetEvent(self._buf_event[_curr_buf]) - # make sure this buffer is safe to use - status = self._buffer_status[_curr_buf] - if status: - logger.warning(f"Error {status} during check of buffer {_curr_buf}.") - #DB: Do you see a lot of these warnings? IE - do we get one every time we have a timeout? - # drop this buffer + elif wait_status == WAIT_OBJECT_0: + # Normal case + self._n_timeouts = 0 + self._consecutive_errors = 0 + + k32_dll.ResetEvent(self._buf_event[_curr_buf]) + + status = self._buffer_status[_curr_buf] + if status != 0: + # Bad buffer -> recycle it, don't deliver + logger.warning(f"PCO: error {status} during check of buffer {_curr_buf}. Recycling.") + self._consecutive_errors += 1 + if (self.contMode == self.MODE_CONTINUOUS): + self._buffers_to_queue.put(_curr_buf) + else: + # single shot mode - just flag overflow + logger.error("PCO: buffer error in single-shot mode -> hardware_overflowed=True") + self.hardware_overflowed = True + else: + # use it + self._full_buffers.put(_curr_buf) + self._n_buffered += 1 + else: - # use it - self._full_buffers.put(_curr_buf) - self._n_buffered += 1 + # Other wait error: recycle buffer, flag error + self._consecutive_errors += 1 + + # use GetLastError and FormatMessage to get more info + err_code = k32_dll.GetLastError() + msg_buffer = ctypes.create_string_buffer(256) + k32_dll.FormatMessageA( + 0x00000000, + None, + err_code, + 0, + msg_buffer, + len(msg_buffer), + None + ) + + logger.error( + "PCO: WaitForSingleObject returned unexpected status 0x%08X for buffer %d.\n GetLastError: %d\n Message: %s\n Recycling buffer.", + wait_status, _curr_buf, err_code, msg_buffer.value.decode() + ) + + k32_dll.ResetEvent(self._buf_event[_curr_buf]) + self._buffers_to_queue.put(_curr_buf) + + # If things look persistently bad, stop recording to avoid hard SDK crashes. + if self._consecutive_errors >= self._max_consecutive_errors: + logger.error( + "PCO: too many consecutive buffer errors (%d) -> stopping recording to avoid crash", + self._consecutive_errors + ) + try: + pco_sdk.set_recording_state(self._handle, pco_sdk.PCO_CAMERA_STOPPED) + pco_sdk.cancel_images(self._handle) + except Exception: + logger.exception("PCO: error while stopping camera after repeated buffer errors") + + # Mark stopped on our side; FrameWrangler/UI can restart cleanly. + self._recording = False + self.hardware_overflowed = True + self._n_buffered = 0 + self._n_queued = 0 + self._n_timeouts = 0 + self._consecutive_errors = 0 + else: # sleep for a bit longer if there were no buffers queued sleep_time = 0.01 @@ -433,7 +519,12 @@ def _init_buffers(self): @property def _timeout(self): - return int(max(2*100*self.GetCycleTime(), 100)) + # set the timeout (ms)based on cycle time + # timeout should be longer than cycle time + readout time + some margin for windows timing uncertainty (default to 2x cycle time) + # timeout should not be too long, otherwise one bad buffer can block things and cause overflows. + # TODO: reduce the minimum timeout to something more reasonable (e.g. 100 ms)?? Current hardcoded value of 1s is very likely excessive + # and may cause issues, especially if the camera is running at a high frame rate. + return int(max(2*1000*self.GetCycleTime(), 1000)) def StartExposure(self): #logger.debug(f'PcoSdkCam: StartExposure called from thread {threading.current_thread().name} at {time.time()}') @@ -448,6 +539,8 @@ def StartExposure(self): if self._recording == False: self._init_buffers() self._recording = True + self.hardware_overflowed = False + self._n_timeouts = 0 self._log_exposure_start() @@ -472,7 +565,14 @@ def StopAq(self): self._n_buffered = 0 self._n_queued = 0 self._n_timeouts = 0 + self.hardware_overflowed = False + + # close and clear buffer events + # (Prevents us from leaking handles when running for a long time with many start/stops) + for ev in self._buf_event: + k32_dll.CloseHandle(ev) self._buf_event = [] + self._buf_addr = [] self._buf_status_addr = [] self._buffer = None @@ -481,10 +581,11 @@ def StopAq(self): def TriggerAq(self): if (self._mode == self.MODE_SINGLE_SHOT) or (self._mode == self.MODE_SOFTWARE_TRIGGER): - res = pco_sdk.force_trigger(self._handle) + # res = pco_sdk.force_trigger(self._handle) # FIFO queue the queable buffers so we don't # grab images before a trigger in _poll_loop - self._buffers_to_queue.put(self._i) + self._buffers_to_queue.put(copy.deepcopy(self._i)) # ensure we put a copy of the index, not a reference + res = pco_sdk.force_trigger(self._handle) self._i += 1 if self._i >= self._n_buffers: self._i = 0 diff --git a/PYME/Acquire/frameWrangler.py b/PYME/Acquire/frameWrangler.py index 5be027328..f1c4fee6c 100644 --- a/PYME/Acquire/frameWrangler.py +++ b/PYME/Acquire/frameWrangler.py @@ -419,9 +419,18 @@ def checkHardware(self): NB: This is largely legacy code, as the camera is usually used in free-running mode.""" for callback in self.HardwareChecks: - if not callback(): - logger.debug('Waiting for hardware') + try: + ready = callback() + except Exception: + logger.exception('Hardware check %r raised an exception', callback) + return False + + if not ready: + logger.debug('Waiting for hardware: %r reported not-ready', callback) return False + # if not callback(): + # logger.debug('Waiting for hardware') + # return False return True