python-microscopy · David-Baddeley · Oct 2, 2022 · Sep 27, 2022 · Sep 27, 2022 · Sep 28, 2022
diff --git a/PYME/Acquire/Hardware/Camera.py b/PYME/Acquire/Hardware/Camera.py
@@ -138,6 +138,8 @@ def __init__(self, *args, **kwargs):
 
         self.active = True  # Should the camera write its metadata?
 
+        self._saturation_threshold = (2**16) - 1 # default saturation threshold, if not provided in noise_properties
+
         # Register as a provider of metadata (record camera settings)
         # this is important so that the camera settings get recorded
         MetaDataHandler.provideStartMetadata.append(self.GenStartMetadata)
@@ -827,7 +829,10 @@ def SaturationThreshold(self):
         int
             the full well capacity (in ADU), typically 2^bitdepth - 1
         """
-        return self.noise_properties['SaturationThreshold']
+        try: 
+            return self.noise_properties['SaturationThreshold']
+        except (KeyError, RuntimeError):
+            return self._saturation_threshold
 
 
     def Shutdown(self):

diff --git a/PYME/Acquire/Hardware/Simulator/fakeCam.py b/PYME/Acquire/Hardware/Simulator/fakeCam.py
@@ -383,7 +383,7 @@ def __init__(self, XVals, YVals, noiseMaker, zPiezo, zOffset=50.0, fluors=None,
         self._objects=None
         self.noiseMaker=noiseMaker
 
-        self.SaturationThreshold = (2**16) - 1
+        self._saturation_threshold = (2**16) - 1
         self.DefaultEMGain = 150
         self.preampGain = 1
 

diff --git a/PYME/Analysis/keep_pairs.py b/PYME/Analysis/keep_pairs.py
@@ -0,0 +1,69 @@
+"""
+A simple utility function to look for (and keep) pairs in 2 sets of localisations.
+
+Useful for comparing different analysis and/or detection modes on the same data. 
+Could also be used on, e.g. beads in different colour channels.
+
+David Baddeley 2022
+"""
+
+import numpy as np
+
+def _idx(t):
+    idx = np.concatenate([[0,], np.argwhere(np.diff(t) > 0).squeeze() + 1])
+    idr_r = {t[i]:i for i in idx}
+    idx_e = {t[i]:i+1 for i in idx[1:]-1}
+    idx_e [t[-1]] = len(t) - 1
+
+    return idx, idr_r, idx_e
+
+def keep_pairs(x0, y0, t0, x1, y1, t1):
+    """
+    Find all localisations in (x0,y0,t0) which have a corresponding localisation in (x1,y1,t1) and return
+    the matching pairs. Uses a simple distance metric to find pairs within a frame.
+
+    TODO:
+    - Return indices so that additional columns etc ... can be examined
+    - Potentially change to finding nearest match **without** replacement. As coded it is theoretically possible
+      for 2 localisations in set 0 to map to a single localisation in set 1 (and vice versa), although this is
+      expected to be rare in practive.
+    """
+    idx0, f0, e0, = _idx(t0)
+    idx1, f1, e1 = _idx(t1)
+
+    o0 = []
+    o1 = []
+    do = []
+
+    idxs = np.array(list(f0.keys()), dtype='i')
+    for i in idxs:
+        if i in f1:
+            #skip frames which are completely missing from t1
+
+            x0_i, y0_i, t0_i = x0[f0[i]:e0[i]], y0[f0[i]:e0[i]], t0[f0[i]:e0[i]]
+            x1_i, y1_i, t1_i = x1[f1[i]:e1[i]], y1[f1[i]:e1[i]], t1[f1[i]:e1[i]]
+
+            d = (x0_i[:,None] - x1_i[None,:])**2 + (y0_i[:,None] - y1_i[None,:])**2
+
+            if len(x0_i) > len(x1_i):
+                mx = np.atleast_1d(d.argmin(0).squeeze())
+                dm = d[mx, np.arange(len(mx), dtype='i')].squeeze()
+                o1.append(np.vstack([x1_i, y1_i, t1_i]))
+                o0.append(np.vstack([x0_i[mx], y0_i[mx], t0_i[mx]]))
+                do.append(dm)
+            else:
+                mx = np.atleast_1d(d.argmin(1).squeeze())
+                dm = d[np.arange(len(mx), dtype='i'),mx].squeeze()
+                o1.append(np.vstack([x1_i[mx], y1_i[mx], t1_i[mx]]))
+                o0.append(np.vstack([x0_i, y0_i, t0_i]))
+                do.append(dm)
+
+            #if i < 50:
+            #    print(d.shape, dm.shape, mx.shape)
+            #    print(dm)
+
+    #print(o0[0].shape, len(o0), o0[1].shape, do[0].shape)
+
+    do = np.hstack(do)
+    return np.concatenate(o0, 1), np.concatenate(o1, 1), do
+
diff --git a/docs/cluster/PYMECluster.rst b/docs/cluster/PYMECluster.rst
@@ -0,0 +1,11 @@
+The PYME Storage and compute cluster architecture
+*************************************************
+
+
+.. toctree::
+   :maxdepth: 1
+
+   cluster_install
+   clusterFilesystem
+   cluster_compute
+   cluster_ui
diff --git a/docs/cluster/clusterFilesystem.rst b/docs/cluster/clusterFilesystem.rst
@@ -44,7 +44,7 @@ Startup / installation
 
 - install PYME as a python package
 - run :py:mod:`PYMEDataServer <PYME.cluster.HTTPDataServer>` (either using :program:`PYMEClusterOfOne`, or launching individually on each
-  cluster node after following the `configuration instructions <_clusterinstall>`_ :ref:`clusterinstall_` )
+  cluster node after following the :ref:`configuration instructions <clusterinstall>` )
 
 Data streaming
 ==============

diff --git a/docs/cluster/cluster_compute.rst b/docs/cluster/cluster_compute.rst
@@ -0,0 +1,151 @@
+Distributed computation on the PYME Cluster
+*******************************************
+
+The PYME cluster architecture contains tools for distributed analysis of 
+imaging data in a "data-local" manner where computation is performed on
+the nodes housing the data, avoiding IO overhead which would otherwise limit
+performance.
+
+Compute tasks
+=============
+
+We currently support two types of computational tasks (below), although the
+framework is designed to allow additional task types to easily be added in 
+the future. 
+
+Localisation tasks
+------------------
+
+These are a localisation-microscopy specific task type optimised for
+performing single-molecule localisation on a single frame of an image series.
+A wide range of different localisation algorithms are supported (for more details see :ref:`localisationanalysis`)
+
+"Recipe" tasks
+--------------
+
+These are a more generic task type which supports a range of different image
+and data-processing functions specified by combining analysis building blocks
+into :ref:`recipes <recipes>`. Recipes are specified using a .yaml text format,
+and can either be created manually or using the recipe editor.
+
+
+Rule-based task scheduler
+=========================
+
+PYME takes a different approach to task scheduling than many 
+common cluster schedulers. Rather than centrally definining individual tasks 
+and distribututing these across cluster nodes from a central server, 
+we define so-called "**Rules**" which decribe how to generate tasks.
+Each cluster node contacts the central server for a copy of the currently
+active rules, generates candidate tasks based on the rules, scores these
+based on node capability and whether the required data is available
+locally on the that node, and then submits a bid to the central server 
+specifying which tasks it want's to perform and at what cost. The central 
+server then compares the bids and replies with a set of tasks indices for
+which the node has won the bid.
+
+This architecture solves a number of problems faced when trying to distribute
+a very large number (thousands per second) of small tasks. It has 3 main 
+advantages: 
+
+- It dramatically reduces the network traffic required for task distribution, 
+  with only the rule and integer task IDs transmitted between nodes.
+- It distributes the work of assigning the tasks across the entire cluster. 
+  Most notably, it allows data locality checking to be performed locally. 
+  *"Do I have this file?"* turns out out to be a **much, much** cheaper question than 
+  *"Which node has this file?"*. 
+- It reduces memory usage in storing task descriptions, with task-descriptions
+  only generated for enough tasks to full each nodes input queue. This is 
+  critically important when, e.g. re-analysing streamed data, where a 
+  conventional task allocation strategy would lead to the generation of 200,000,000 
+  tasks for a day's worth of streamed image frames. 
+
+
+High-level rule API
+-------------------
+
+Recipe Rules
+''''''''''''
+
+Recipe rules can be used apply a single :ref:`recipe <recipes>` to multiple 
+files in parallel. A ``RecipeRule`` expects recipe text, an output directory
+path that is relative to the PYME cluster's root data directory and at least
+one input. The recipe text can alternatively be passed via a cluster URI that
+points to recipe text.
+
+For example, we can create a recipe that will simulate random points and save
+them in an output directory of our choice.
+
+.. code-block:: python
+
+    recipe_text= '''
+    - simulation.RandomPoints:
+        output: points
+    - output.HDFOutput:
+        filePattern: '{output_dir}/test.hdf'
+        inputVariables:
+        points: points
+        scheme: pyme-cluster://
+    '''
+
+.. note::
+
+    ``scheme: pyme-cluster://`` puts the output into the cluster file system so 
+    that it is accessible to subsequent recipe runs etc ... This is typically 
+    ``~/PYMEData`` in a :ref:`cluster-of-one <localisationanalysis>` scenario 
+    unless otherwise specified through `PYME.config <api/PYME.config>` It also 
+    routes all IO through the dataserver, avoiding IO conflicts.
+
+    To concatenate the results from multiple operations into a single table, 
+    use ``scheme: pyme-cluster:// - aggregate``.
+
+Recipe tasks are generated for each input in the rule - for simulation we use 
+the ``__sim`` proxy input. For ``__sim`` "inputs" the filename can be an 
+arbitrary string, and is propagated to the ``sim_tag`` context variable which 
+can be used in the output `filePattern`.
+
+.. code-block:: python
+
+    from PYME.cluster import rules
+
+    r = rules.RecipeRule(recipe=recipe_text, output_dir='test', inputs={'__sim':['1']})
+    r.push()
+
+Inputs are specified as a dictionary, where the key is passed in as the name of 
+the input datasource, for direct use within the recipe, and the value must be a 
+*list* of data sources.  ``__sim`` is a special case where no input is used in 
+the recipe. If, instead, we wanted to create a recipe that read in a data set, 
+created a surface, and then saved this surface to file, we would execute the 
+following.
+
+.. code-block:: python
+
+    recipe_text = '''
+    - pointcloud.Octree:
+        input_localizations: shape_shape
+        output_octree: octree
+    - surface_fitting.DualMarchingCubes:
+        input: octree
+        output: mesh
+        remesh: true
+    - output.STLOutput:
+        filePattern: '{{output_dir}}/my_surface.stl'
+        inputName: membrane
+        scheme: pyme-cluster://
+    '''
+
+    rule = RecipeRule(recipe=recipe_text, output_dir=output_dir, 
+                      inputs={'shape': [f'pyme-cluster:///{output_dir}/shape.hdf']})
+
+    rule.push()
+
+Note that in this case, an HDF file is passed as input. This is opened in the
+pipeline as a data source with name ``<input_name>_<table_name>``. In this 
+case, the table name is also named ``shape`` and so ``input_localizations`` in 
+the recipe is named ``shape_shape``.
+
+
+
+Low level API
+-------------
+
diff --git a/docs/cluster/rules.rst b/docs/cluster/rules.rst
diff --git a/docs/index.rst b/docs/index.rst
@@ -30,6 +30,7 @@ particular could use a lot of work. Hopefully it's enough to get you started.
    PYMEVis
    component_programs
    LocalisationAnalysis
+   cluster/PYMECluster
-Original file line number
+Diff line change
@@ Expand Up @@
        PYMEVis
        component_programs
        LocalisationAnalysis
+       cluster/PYMECluster
@@ Expand Down @@