\documentclass[10pt,a4paper]{article}
\usepackage[pdftex]{graphicx}
\usepackage{fullpage}
\usepackage{url}
\usepackage{amssymb,amsmath}

\begin{document}

\begin{center}
Local image structures and morphology \\
Computer Vision, Lab Exercise 2 \\
Andreas van Cranenburgh (0440949), Willem Bressers (5687063) \\
{\em \today}
\end{center}

\abstract{This report discusses the use of the first and second order derivates
for edge detection, and erosions and dilations for shape detection}

\tableofcontents
\listoffigures

\section{Problem specification}
Suppose a cookie factory checks it's cookies on a high speed conveyor belt by 
taking pictures of each cookie. A controller can't keep up with the speed so
the images needs to be checked automatically. But how?

\section{Theory}
To solve the problem there are a few steps to take. First the edge of each
cookie has to be detected. Next it's nessecary to thin the edges. And third
the shape of the cookies need to be checked (for broken or missing parts).

\subsection{Convolutions}
A convolution is an operator that can be applied to an image using a
convolution matrix. It is defined as integrating the product of two functions
(f and a mirrored version of g). In discrete terms, for every pixel its
neighborhood is multiplied with the convolution matrix, and the results are
summed.

\subsubsection{Gaussian kernel}
The Gaussian kernel is an equation representing the canonical convolution
matrix.  Effectively it blurs an image to a certain degree, depending on its
scale. The scale is determined by the variable $\sigma$. For small values of
$\sigma$, the Gausse curve (see figure \ref{gauss}) is taller and the curve is
less broad.

\begin{equation}\begin{gathered}
G_(x, y, \sigma) =
\frac{1}{(\sqrt{2\pi}\sigma )^2} e^{-\frac{x^2+y^2}{2\sigma^2}}
\end{gathered}\end{equation}

\begin{figure}[h]\begin{center}
\includegraphics[scale=0.8]{gaussmesh}
\caption{3D plot of Gaussian kernel with $\sigma = 3$}
\label{gauss}
\end{center}\end{figure}

This function returns a matrix of values scaled by $\sigma$.  The dimensions of
the grid are a multiplication of $2.5$ times the scale
($\sigma$)\footnote{$2.5$ appears to be a common value, at least according to
the lecture notes}. The grid should always be at least the size of $\sigma$,
but not too big either. The summation of this matrix should approach one, since
the summation is a discrete approximation of integrating the Gauss function.
With bigger $\sigma$, and thus bigger grids, the Gauss curve becomes very broad
and the cutoff caused by approximating the integration becomes bigger, thus
making the approximation to integration less accurate.

So far, the Gaussian kernel has been calculated for two dimensions. By using
the separability property of the convolution the same effect can be obtained by
applying a one dimensional Gaussian kernel and then its transpose
consecutively. The equation is very similar to the previous one:

\begin{equation}\begin{gathered}
G (x, \sigma) =
\frac{1}{\sqrt{2\pi}\sigma} e^{-\frac{x^2}{2\sigma^2}}
\end{gathered}\end{equation}

The resulting vector makes convolutions much faster, since there are much less
values to be multiplied and summed. Figure \ref{gausstiming} shows a graph of
the execution times of applying one and two dimensional Gaussian kernels on the
same image, with a sigma ranging from 1 to 15.

\begin{figure}[h]\begin{center}
\includegraphics[scale=0.8]{gausstiming}
\caption{Difference in performance between applying one and two dimensonial
Gaussian kernels. The complexity thus appears to be $O(\sigma)$ and
$O(\sigma^2)$, respectively.}
\label{gausstiming}
\end{center}\end{figure}

\subsubsection{Gaussian derivatives}
It is also possible to approximate the first and second order derivative of an
image by convoluting with the respective derivative of the Gaussian kernel. The
first order derivative of an image $f$ multiplied by the Gaussian kernel
(with respect to $x$): 

\begin{equation}\begin{gathered}
\frac{\delta G_\sigma}{\delta x} = - \frac{x}{\sigma^2} G_\sigma
\end{gathered}\end{equation}

This equation leads to a new image that looks rather like an embossed version
of the original. Because it is the derivative, the actual colors have been
replaced by gradients representing the change in color in the $x$ direction.
The derivative can also be taken with respect to $y$, which will show vertical
gradients. It is also possible to calculate the second derivative of the image:

\begin{equation}\begin{gathered}
\frac{\delta^2 G_\sigma}{\delta x^2} = (\frac{x^2}{\sigma^4} -
\frac{1}{\sigma^2})G_\sigma
\end{gathered}\end{equation}

By calculating the second derivative, the edges of the image become more
salient. There are three kinds of second order derivatives, namely $f_{xx}$,
$f_{xy}$ and $f_{yy}$. Figure \ref{2jet} shows the first and second order
derivative in both directions.

\begin{figure}[h]\begin{center}
\includegraphics[scale=0.75]{2-jet}
\caption{1-jet (left), 2-jet (right). Ie.\ first and second derivative in
both x and y direction.}
\label{2jet}
\end{center}\end{figure}

\subsubsection{Implementation of the Gaussion derivative}
Here is the Matlab code of our implementation of the Gaussian derivative:

\begin{verbatim}
function g  = gD(f, sigma, xorder, yorder)
X = [-2.5*sigma:2.5*sigma];
% Zeroth, first and second gaussian derivatives:
gD = [ Gauss1(sigma); ...
       (-X ./ (sigma^2)) .* Gauss1(sigma); ...
       (-1 / (sigma^2)) .* ((1 - (X.^2) / (sigma^2))) .* Gauss1(sigma) ];
%Apply both x and y derivatives to image 
%(note that gD of y has to be transposed)
g = imfilter(f, gD(xorder+1,:), 'conv', 'replicate');
g = imfilter(g, gD(yorder+1,:)', 'conv', 'replicate');
\end{verbatim}

First the function calculates the grid size by multiplying $\sigma * 2.5$.
Next the implementation samples the Gauss curve, as well as its first and
second derivative. The result is a matrix which is indexed with order of
derivation requested in the appropriate parameter. The resulting vector is
applied as convolution matrix to the original image, in two steps for $x$ and
$y$.

\subsection{Analytic and Gaussian derivative}
The second order $f_{xy}$ Gaussian derivative is a powerful tool to detect 
edges in an image. The following example demonstrates the analytic and Gaussian
(numerical) derivative of a function.

Asume an image described by the following function:

\begin{equation}\begin{gathered}
f(x,y) = A \sin{Vx} + B \cos{Wy}
\end{gathered}\end{equation}

Before the seconde order derivative can be calculated, it's necessary to 
calculate the first order derivatives $f_x$, $f_y$ of the function $f$. %insert derivation here...
Because we want to differentiate to $x$, we assume that $y$ remains the same. 
This means that $y$ has no influence in the result.
\begin{equation}\begin{gathered}
f_x = (V)*A \cos{Vx}
\end{gathered}\end{equation}
\\
Because we want to differentiate to $y$, we assume that $x$ remains the same.
This means that $x$ has no influence in the result.
\begin{equation}\begin{gathered}
f_y = (W)*B * -\sin{Wy}
\end{gathered}\end{equation}
\\
Now it's possible to calculate the second order derivatives $f_{xx}$,
$f_{yy}$, $f_{xy}$ of the function $f$. All the second order derivatives of
the function $f$ are:

\begin{equation}\begin{gathered}
(f_x)_x = f_{xx} =\frac{d}{dx} ( \frac{df}{dx} ) = \frac{d^2 f}{dx^2}\\
(f_y)_y = f_{yy} =\frac{d}{dy} ( \frac{df}{dy} ) = \frac{d^2 f}{dy^2}\\
(f_x)_y = f_{xy} =\frac{d}{dy} ( \frac{df}{dx} ) = \frac{d^2 f}{dx dy}\\
(f_y)_x = f_{yx} =\frac{d}{dx} ( \frac{df}{dy} ) = \frac{d^2 f}{dy dx}
\end{gathered}\end{equation}

Now it is possible to find the higher order derivatives:

\begin{equation}\begin{gathered}
f_{xx} = (V^2)(A * -\sin (Vx))
\end{gathered}\end{equation}

\begin{equation}\begin{gathered}
f_{yy} = (W^2)*(B \cos (Wy))
\end{gathered}\end{equation}

The second order derivative with respect to $xy$ has to be determinded by higher
order partial derivative. This means that the derivative with respect $xy$ is
the same as the derivative with respect to $yx$. The difference lays in the
fact that te derivative needs to be readed from left to right. So the second
order derivative with respect to $xy$, means that first the first order with
respect to $x$ needs to be calculated. Then the result need to be derivatived with
respect to $y$. We have already calculated the first order derivative with respect to
$x$. Now the result needs to be calculated with respect to $y$.

\begin{equation}\begin{gathered}
f_x = (V)*A * \cos{Vx}\\
f_y = (W)*B * -\sin{Wy}\\
f_{xy} = V^2 A * (-\sin{Vx} + W^2(B \cos{Wy})
\end{gathered}\end{equation}

After differentiating this function analytically, it is possible to compare
that with the Gaussian derivative of a discretized version of $f(x,y)$. Figure
\ref{gradientvectors} shows that they are very similar. Except that for the
numerical approximation, the vectors had to be translated by 105 pixels in both
the x and y direction. The reason for this remains elusive to us. The figure
has been superimposed with a grid of vectors showing the magnitude and
direction of change on the sampled locations (every 10 pixels for both x and y)

\begin{figure}[h]\begin{center}
\includegraphics[scale=0.75]{gradientvectors2}
\caption{Gradient vectors: analytic (green) and numerical (red)}
\label{gradientvectors}
\end{center}\end{figure}

\subsection{Canny edge detector}
The Canny edge detector uses the first order Gaussian derivative to find the
edges of an image. First it takes the gradient norm of the image, which
represents the magnitude of biggest change for every pixel, indepent of the
coordinate system, using this formula:

\begin{equation}
f_w = \sqrt{f_x^2 + f_y^2}
\end{equation}

But this is still only the {\em amount} of change, we also need the direction
(angle):

\begin{equation}
f_\theta = \arctan{\frac{f_x}{f_y}}
\end{equation}

Because the algorithm depends on the Gaussian derivative, $\sigma$ can be
specified. This parameter depends the scale or level of detail to be
considered. With $\sigma=1$ many edges will be detected (likely too many),
increasing it will gradually do away with less pronounced edges.

The next step is filtering out non-maxima in the gradient norm (also called
`thinning'). Armed with the angles of greatest change, we can search for pixels
without smaller neighbors in the appropriate direction. Pixels that fail the
test will be black, edges will be white. Since we are dealing with discrete
images, the angles have to be rounded to corespond to one of the four possible
directions (given eight neighbor pixels). Figure \ref{canny} shows the result
of the Canny edge detection on an example image. It should be noted that our
implementation does not use {\em hysterisis}. This amounts to using two
additional parameters specifying a minimum value for an edge to begin, and for
an edge to stop respectively.

\begin{figure}[h]\begin{center}
\includegraphics[scale=1]{cannyman} 
\caption{Canny edge detection with $\sigma=1$ using thinning.}
\label{canny}
\end{center}\end{figure}

\subsection{Morphology}
``Mathematical Morphology'' is a tool to detect objects in an image. For example
assume an image of a plane that that flies in clear air. With Morphology it is
possible to detect the blue sky and filter it out. It is harder to detect the
plane because it would have shadows and different colors. 

\subsubsection{Isodata}
Before anything can be detected it is necessary to find the colors of the
object. Given an appropriate background, it is possible to partition the interval of pixel
values of an image in background and foreground, respectively. This can be done
by looking at the histogram of the image and finding an appropriate treshold.
This can be done non-linearily (iteratively), by using the Newton-Raphson method. An initial guess might be the
mean of all pixel values. Then at each step the mean of all pixels below this
threshold and those above is taken, until the resulting threshold reaches a
plateau, or until a fixed number of iterations have been performed. See figure
\ref{isodata}.

Consequently, each pixel value in the image is compared with the value of the
treshold. This results in a binary version (``mask''). See figure \ref{coins}

\begin{figure}[h]\begin{center}
\includegraphics[scale=0.8]{isodata}
\caption{Histogram of an image (see figure \ref{coins}). The blue line
represents the isodata threshold}
\label{isodata}
\end{center}\end{figure}

\input{erosion}

\subsubsection{Openings and closings}
Before the mask can be use to detect objects it is expedient to remove any
binary noise. This can be done with a combination of dilation and erosion:

Closing:
\begin{equation}\begin{gathered}
(A \oplus S) \ominus S
\end{gathered}\end{equation}
Opening:
\begin{equation}\begin{gathered}
(A \ominus S) \oplus S
\end{gathered}\end{equation}

Where A is the mask and S is a ``marker.'' 
%The function Dilation first add`s the marker at every binairy one. Next it substracts the marker from the mask. 
%The function Erosion first substracts the marker followed by adding the same marker.
%By using first the Dilate function and next the Erosion function most of the noise is removed without loosing border edges.
The difference between opening and closing is: Opening removes noise frome the
mask. Closing connects the noise (noise close to a bigger object) to the object.
The closing operator first dilates (adds noise to objects) followed by an
erosion (remove borders from object with size of the marker).
The opening operator first erodes (removes noise) followed by a dilation
(add the marker size to the objects).

\section{Implementation notes}
We managed to make the Canny edge detector work without ``thinning'' the edges,
but this means that the edges of the resulting image are wider than in the
original image. With some research on the web we found a ``thinning'' filter
that thins the edges, although without using $f_{ww}$ or a Hessian matrix, but
using gradient angles instead.

\section{Usage}
The function \texttt{testGauss1} demonstrates the Gauss function with
$\sigma=1$ to $\sigma=15$ in the $x$ direction. The function \texttt{testGauss}
demostrates a two dimensional Gauss function.  The function \texttt{testgD}
demostrates the first and second order Gaussian derivatives with respect to $x$
and $xy$. The function \texttt{sampleImage} demostrates the gradients of a
function using analytical and numerical methods. The function
\texttt{testCanny} demonstrates the Canny edge detector. The function
\texttt{testIsodata} demonstrates the histogram, treshold and ``mask''. The
function \texttt{testDilate} demonstrates erosion and dilation with a
structuring element of size $N = 15$. The function \texttt{ClearBorders}
demostrates the removal of objects that touch the border. The function
\texttt{dilateCookies} demostrates Erosion and Dilation that removes broken
objects.

%\section{Conclusion}
%A problem whas to find the "marker"-size of the Erosion and Dilation to
%remove the broken cookie. By testing it many times we've managed to find the
%best marker size.

%\section{references}

\end{document}