This tutorial tries to explain what are the different steps to follow to implement a series of image processing functions in a dataset.

We will: * Create a dataset. * Find different motor positions and reshape the data into these dimensions. * Apply different image processing techniques like background substraction and hot pixel removal. * Apply a region of interest to the images to decrease CPU time and memory RAM. * Find and apply a linear shift between the images. * Find the different components that create the dataset by the means of blind source separation algorithms. * Link the components between different dataset with feature matching algorithms.

All operations are interchangeable, which means they can be applied in different orders, but it is important to apply the region of interest as soon as possible to decrease the computational time, especially in large datasets.

[1]:
from matplotlib import pyplot as plt
[2]:
def plot_comparison(original, filtered, filter_name, original_name="original"):
    fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(15, 15), sharex=True, sharey=True)
    ax1.imshow(original, cmap=plt.cm.viridis)
    ax1.set_title(original_name)
    ax2.imshow(filtered, cmap=plt.cm.viridis)
    ax2.set_title(filter_name)

Dataset#

[3]:
import glob
import os
import numpy
from darfix.core.dataset import Dataset

dataset = Dataset(
    _dir="/tmp/darfix",
    first_filename="/users/garrigaf/Documents/datasets/reduced_strain/strain_0000.edf",
    in_memory=True,
)
[4]:
from darfix.core.dimension import POSITIONER_METADATA

dataset.find_dimensions(POSITIONER_METADATA)
[5]:
dataset.remove_dim(1)
dataset.remove_dim(2)
dataset.add_dim(1, dataset.dims.get(3))
dataset.remove_dim(3)
[6]:
dataset.dims.get(0).set_unique_values(dataset.dims.get(0).unique_values)
[7]:
plot_comparison(dataset.get_data(13), dataset.get_data(16), "")
../_images/tutorials_darfix_tutorial_8_0.png
[8]:
dataset = dataset.reshape_data()
[9]:
metadata = numpy.swapaxes(dataset.data.metadata, 0, 1)[:, 0].shape
[10]:
dataset2 = dataset.project_data(1)
[11]:
dataset2.data.metadata.shape
[11]:
(2,)

ROI#

[12]:
from darfix.core import roi

roi_dataset = dataset.apply_roi(origin=[730, 825], size=[726, 346])
# bg_dataset = bg_dataset.apply_roi(origin=[900, 750], size=[500, 500])
[13]:
plot_comparison(roi_dataset.get_data(13), roi_dataset.get_data(16), "")
../_images/tutorials_darfix_tutorial_15_0.png
[14]:
ydata = roi_dataset.data[:, :, 515, 181]
plt.imshow(ydata)
[14]:
<matplotlib.image.AxesImage at 0x7fd3e48d88d0>
../_images/tutorials_darfix_tutorial_16_1.png
[32]:
from darfix.core.mapping import fit_2d_rocking_curve

xdim = dataset.dims.get(1)
ydim = dataset.dims.get(0)
y_gauss = fit_2d_rocking_curve([ydata, None], [xdim.unique_values, ydim.unique_values])
[33]:
def multi_gaussian(M, x0, y0, xalpha, yalpha, A, C):
    x, y = M
    return A * numpy.exp(
        -(((x - x0) / xalpha) ** 2) - ((y - y0) / yalpha) ** 2 + C * (x - x0) * (y - y0)
    )
[34]:
from scipy.stats import multivariate_normal

xdim = dataset.dims.get(1)
ydim = dataset.dims.get(0)
X, Y = numpy.meshgrid(xdim.unique_values, ydim.unique_values)
xdata = numpy.vstack((X.ravel(), Y.ravel()))
print(xdata.shape)
(2, 961)
[38]:
from scipy.optimize import curve_fit

A = ydata.ptp()
x0 = numpy.mean(xdim.unique_values)
y0 = numpy.mean(ydim.unique_values)
xalpha = numpy.std(xdim.unique_values)
yalpha = numpy.std(ydim.unique_values)
C = 0
pars, cov = curve_fit(
    f=multi_gaussian,
    xdata=xdata,
    ydata=ydata.ravel(),
    p0=[x0, y0, xalpha, yalpha, A, C],
)
print(pars)
y_gauss = multi_gaussian(xdata, *pars)
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-38-d2479a827da7> in <module>
      6 yalpha = numpy.std(ydim.unique_values)
      7 C = 0
----> 8 pars, cov = curve_fit(f=multi_gaussian, xdata=xdata, ydata=ydata.ravel(), p0=[x0, y0, xalpha, yalpha, A, C])
      9 print(pars)
     10 y_gauss = multi_gaussian(xdata, *pars)

~/.local/share/virtualenvs/darfix/lib/python3.7/site-packages/scipy/optimize/minpack.py in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, jac, **kwargs)
    792         cost = np.sum(infodict['fvec'] ** 2)
    793         if ier not in [1, 2, 3, 4]:
--> 794             raise RuntimeError("Optimal parameters not found: " + errmsg)
    795     else:
    796         # Rename maxfev (leastsq) to max_nfev (least_squares), if specified.

RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 1400.
[37]:
plt.imshow(ydata)
plt.contour(y_gauss.reshape(ydata.shape))
plt.show()
../_images/tutorials_darfix_tutorial_21_0.png

Filter data#

[11]:
hi_indices, li_indices = dataset.partition_by_intensity()
Computing intensity |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0%
[12]:
li_indices.shape
[12]:
(397,)

Background subtraction#

[19]:
dataset = roi_dataset.apply_background_subtraction(method="median")
[20]:
plot_comparison(dataset.get_data(13), dataset.get_data(16), "")
../_images/tutorials_darfix_tutorial_27_0.png

Hot Pixel Removal#

[21]:
dataset = dataset.apply_hot_pixel_removal()
[22]:
plot_comparison(dataset.get_data(13), dataset.get_data(16), "")
../_images/tutorials_darfix_tutorial_30_0.png

Threshold removal#

[23]:
dataset = dataset.apply_threshold_removal(bottom=10)
[24]:
plot_comparison(dataset.get_data(13), dataset.get_data(16), "")
../_images/tutorials_darfix_tutorial_33_0.png

Shift Detection#

[19]:
shift_dataset = dataset.find_and_apply_shift()
Finding shift |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0%
Applying shift |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0%
[20]:
plot_comparison(shift_dataset.get_data(13), shift_dataset.get_data(16), "")
../_images/tutorials_darfix_tutorial_36_0.png
[21]:
plot_comparison(dataset.zsum(), shift_dataset.zsum(), "After shift", "Before shift")
../_images/tutorials_darfix_tutorial_37_0.png

BSS#

[92]:
# Find number of components that represent 99% of the data
vals = shift_dataset.pca(return_vals=True)
vals = vals / numpy.sum(vals)
n_components = len(vals[vals > 0.01])
n_components
[92]:
3
[93]:
# Compute NNICA+NMF to find the components
comp, W = shift_dataset.nica_nmf(n_components, num_iter=500)
Updating decomposition matrices |████████████████████████████████████████████████████████████████████████████████████████████████████| 100.0%
/users/garrigaf/.local/share/virtualenvs/darfix/lib/python3.7/site-packages/sklearn/decomposition/_nmf.py:1077: ConvergenceWarning: Maximum number of iterations 500 reached. Increase it to improve convergence.
  " improve convergence." % max_iter, ConvergenceWarning)
[ ]:
fig, ax = plt.subplots(ncols=n_components, figsize=(50, 5), sharex=True, sharey=True)

# print(components.shape)
plt.suptitle("NNICA components of dataset")
for i in numpy.arange(n_components):
    ax[i].imshow(comp[i].reshape(shift_dataset.get_data(0).shape))
    ax[i].axis("off")
[ ]:
# Save components on treated folder

from darfix.io.utils import write_components

h, w = shift_dataset.get_data(0).shape
write_components(
    "components",
    "entry",
    shift_dataset.get_dimensions_values(),
    W,
    comp.reshape((n_components, h, w)),
    "1",
)

Feature Matching#

[ ]:
from darfix.core.componentsMatching import ComponentsMatching, Method
from darfix.io.utils import read_components

db1 = "components"
db2 = "components"


dimensions1, components1, W1 = read_components(db1)
dimensions2, components2, W2 = read_components(db2)

componentsMatching = ComponentsMatching(components=[components1, components2])

final_matches, matches = componentsMatching.match_components(
    method=Method.orb_feature_matching
)

draws = numpy.array(
    componentsMatching.draw_matches(final_matches, matches, displayMatches=False)
)
[ ]:
rows = 1
cols = int(len(draws) / rows)
fig, ax = plt.subplots(ncols=cols, nrows=rows, figsize=(30, 5))

# print(components.shape)
plt.suptitle("Linked components from two different layers of HEAT 1")
for i in range(cols):
    ax[i].imshow(draws[i], cmap="jet")
    ax[i].axis("off")
    if i in final_matches:
        ax[i].set_title("{}, {}".format(i, final_matches[i]))
    else:
        ax[i].set_title("{}, -".format(i))

Grain plot#

[25]:
# Compute moments
moments = dataset.apply_moments()
plot_comparison(moments[0][0], moments[1][0], "COM obpitch", "COM diffry")
/users/garrigaf/Documents/git/darfix/darfix/core/mapping.py:231: RuntimeWarning: invalid value encountered in true_divide
  com = numpy.sum(stack, axis=0) / zsum
/users/garrigaf/Documents/git/darfix/darfix/core/mapping.py:234: RuntimeWarning: invalid value encountered in true_divide
  std = numpy.sqrt(sum((data[i] * ((repeat_values[i] - com)**2) for i in range(len(data))))) / zsum
../_images/tutorials_darfix_tutorial_47_1.png
[49]:
# Compute FWHM
plot_comparison(moments[0][1], moments[1][1], "FWHM obpitch", "FWHM diffry")
../_images/tutorials_darfix_tutorial_48_0.png
[50]:
# Show skewness
plot_comparison(moments[0][2], moments[1][2], "skewness obpitch", "skewness diffry")
../_images/tutorials_darfix_tutorial_49_0.png
[51]:
# Compute kurtosis
plot_comparison(moments[0][3], moments[1][3], "kurtosis obpitch", "kurtosis diffry")
../_images/tutorials_darfix_tutorial_50_0.png
[45]:
# Compute mosaicity
from matplotlib.colors import hsv_to_rgb

com0 = moments[0][0]
no_nans_indices = ~numpy.isnan(com0)
min_com0 = numpy.min(com0[no_nans_indices] if len(com0[no_nans_indices]) > 0 else com0)
com0[numpy.isnan(com0)] = min_com0
norms0 = (com0 - min_com0) / numpy.ptp(com0)
com1 = moments[1][0]
no_nans_indices = ~numpy.isnan(com1)
min_com1 = numpy.min(com1[no_nans_indices] if len(com1[no_nans_indices]) > 0 else com1)
com1[numpy.isnan(com1)] = min_com1
norms1 = (com1 - min_com1) / numpy.ptp(com1)

mosaicity = numpy.stack((norms0, norms1, numpy.ones(moments[0].shape[1:])), axis=2)

plt.imshow(hsv_to_rgb(mosaicity))
[45]:
<matplotlib.image.AxesImage at 0x7fd3b44e19e8>
../_images/tutorials_darfix_tutorial_51_1.png
[91]:
from matplotlib.colors import hsv_to_rgb
from silx.image.marchingsquares import find_contours

ori_dist, hsv_key = shift_dataset.compute_mosaicity_colorkey(scale=1)
plt.imshow(hsv_to_rgb(hsv_key))
levels = numpy.linspace(numpy.min(ori_dist), numpy.max(ori_dist), 15)
for i in levels:
    polygons = find_contours(ori_dist, i)
    for contour in polygons:
        x = contour[:, 1]
        y = contour[:, 0]
        centroid = (sum(x) / len(contour), sum(y) / len(contour))
        plt.plot(x, y)
        if i == levels[-2]:
            plt.plot(centroid[0], centroid[1], "ro")
plt.show()
../_images/tutorials_darfix_tutorial_52_0.png

Fit data#

[50]:
# Show rocking curve
from darfix.core.mapping import fit_rocking_curve

px, py = 300, 374
dimension = [[1], [1]]
data = shift_dataset.get_data(dimension=dimension)

if shift_dataset.in_memory:
    y = data[:, int(py), int(px)]
else:
    y = [image[int(py), int(px)] for image in data]

axis = shift_dataset.dims.ndim - dimension[0][0] - 1
dim = shift_dataset.dims.get(axis)
x = numpy.array(
    [data.metadata[i].get_value(kind=dim.kind, name=dim.name) for i in range(len(data))]
).reshape(-1)
item = [numpy.array(y), None]

plt.plot(x, y)

try:
    y_gauss = fit_rocking_curve(item, values=x, num_points=1000)
except TypeError:
    y_gauss = y
    print("Cannot fit")

x_gauss = numpy.linspace(x[0], x[-1], len(y_gauss))
plt.plot(x_gauss, y_gauss)
[50]:
[<matplotlib.lines.Line2D at 0x7f6a027fb3c8>]
../_images/tutorials_darfix_tutorial_54_1.png
[27]:
%%capture --no-display
fit_dataset = shift_dataset.apply_fit(dimension=dimension)
[46]:
numpy.max(fit_dataset.zsum()[1])
[46]:
20.70828701296387
[47]:
numpy.max(shift_dataset.zsum()[1])
[47]:
19
[29]:
img = numpy.sqrt(numpy.subtract(fit_dataset.zsum(), shift_dataset.zsum()) ** 2)
[43]:
numpy.argmax(numpy.subtract(fit_dataset.zsum(), shift_dataset.zsum())[1])
[43]:
312
[31]:
plot_comparison(
    fit_dataset.zsum(), shift_dataset.zsum(), "Fitted zsum", "Original zsum"
)
../_images/tutorials_darfix_tutorial_60_0.png
[62]:
# Plot FWHM

std = numpy.std(shift_dataset.get_data(), axis=0)
std[numpy.isnan(std)] = 0
plt.imshow(2.35482 * std)
[62]:
<matplotlib.image.AxesImage at 0x7fd94dd8f710>
../_images/tutorials_darfix_tutorial_61_1.png
[63]:
# Plot peak values depending on the dimension (motor choosen for the fit)

data = shift_dataset.get_data()
axis = dimension[0][0]
dim = shift_dataset.dims.get(0)
values = numpy.array(
    [metadata.get_value(kind=dim.kind, name=dim.name)[0] for metadata in data.metadata]
)
x = numpy.array(numpy.argmax(data, axis=0))
image = [values[i] for i in x.flatten()]
image = numpy.reshape(image, x.shape)

plt.imshow(image)
[63]:
<matplotlib.image.AxesImage at 0x7fd9a34f7390>
../_images/tutorials_darfix_tutorial_62_1.png
[ ]: