|

(Very) crude .NET wrapper for Tesseract 3

I was searching for a quick way to recognize license plates and stumbled upon EmguCV’s sample code. But this code requires Tesseract OCR (An open source OCR engine currently sponsored by Google) and its .NET wrapper. Unfortunately, this code was written when Tesseract was still at version 2, so is the .NET wrapper so none of them will work for me (I need Tesseract 3 to use a new language supported by this version), worse still, the original wrapper developer has stated that he won’t touch Tesseract code again, so the only way to proceed would be to write my own wrapper.

As you may have know, writing and debugging a wrapper for another language is a tedious and painstaking process, so I opted for the easy way out: use Tesseract command line version to recognize text without digging into the source code.

And here’s the result: A wrapper class for Tesseract on .NET – it will write your bitmap to a temporary file, execute Tesseract and extract the result back into the code. Note that this will run very slow, and that’s why I say it’s crude, a better way would be to analyze Tesseract code and write an interop DLL, but i just don’t have time for that 🙂

using System;
using System.Collections.Generic;
using System.Text;
using System.Threading;
using System.IO;

namespace PlateDetection
{
    class Tesseract
    {
        string Language = "eng";
        const string Program = @"C:Program Files (x86)Tesseract-OCRtesseract";

        /// <summary>
        /// Changes language
        /// </summary>
        /// <param name="language">Language code</param>
        /// <param name="NotUsed">Not used but kept for compatibitily with Tesseract.net old wrapper</param>
        internal void Init(string language, bool NotUsed)
        {
            Language = language;
        }

        /// <summary>
        /// Main function, recognize any text in the bitmap
        /// </summary>
        /// <param name="bmp"></param>
        /// <param name="rectangle">Not used, you may change that easily by cutting the bitmap</param>
        /// <returns></returns>
        internal List<string> DoOCR(System.Drawing.Bitmap bmp, System.Drawing.Rectangle rectangle)
        {
            bmp.Save("temp.png", System.Drawing.Imaging.ImageFormat.Png);
            System.Diagnostics.Process.Start(Program, "temp.png temp -l " + Language);
            
            string [] Words = new string[0];
            // The application may need some time to process, file is not ready yet,
            // or the application may still writing
            while (true)
            {
                try
                {
                    Words = File.ReadAllText("temp.txt").Split(new char[] { ' ', 'r', 'n', 't' }, StringSplitOptions.RemoveEmptyEntries);
                    break;
                }
                catch
                {

                }
            }
            File.Delete("temp.png");
            File.Delete("temp.txt");
            List<string> Result = new List<string>(Words);
            return Result;
        }

        /// <summary>
        /// Currently not used as there is no allocation done
        /// </summary>
        internal void Dispose()
        {
            
        }
    }
}

And here’s the full plate detection class

using System;
using System.Collections.Generic;
using System.Text;
using System.Drawing;
using Emgu.CV;
using Emgu.CV.CvEnum;
using Emgu.Util;
using System.Diagnostics;
using Emgu.CV.Structure;

namespace PlateDetection
{
    /// <summary>
    /// A license plate detector
    /// </summary>
    public class LicensePlateDetector : DisposableObject
    {

        private Tesseract _ocr;

        /// <summary>
        /// Create a license plate detector
        /// </summary>
        public LicensePlateDetector()
        {
            //create OCR
            _ocr = new Tesseract();

            //You can download more language definition data from
            //http://code.google.com/p/tesseract-ocr/downloads/list
            //Languages supported includes:
            //Dutch, Spanish, German, Italian, French and English
            _ocr.Init("eng", false);
        }

        /// <summary>
        /// Create a license plate detector
        /// </summary>
        public LicensePlateDetector(string language)
        {
            //create OCR
            _ocr = new Tesseract();

            //You can download more language definition data from
            //http://code.google.com/p/tesseract-ocr/downloads/list
            //Languages supported includes:
            //Dutch, Spanish, German, Italian, French and English
            _ocr.Init(language, false);
        }

        /// <summary>
        /// Detect license plate from the given image
        /// </summary>
        /// <param name="img">The image to search license plate from</param>
        /// <param name="licensePlateList">A list of images where the detected license plate region is stored</param>
        /// <param name="filteredLicensePlateList">A list of images where the detected license plate region with noise removed is stored</param>
        /// <param name="boxList">A list where the region of license plate, defined by an MCvBox2D is stored</param>
        /// <returns>The list of words for each license plate</returns>
        public List<List<string>> DetectLicensePlate(Image<Bgr, byte> img, List<Image<Gray, Byte>> licensePlateList, List<Image<Gray, Byte>> filteredLicensePlateList, List<MCvBox2D> boxList)
        {
            //Stopwatch w = Stopwatch.StartNew();
            List<List<string>> licenses = new List<List<string>>();
            using (Image<Gray, byte> gray = img.Convert<Gray, Byte>())
            using (Image<Gray, Byte> canny = new Image<Gray, byte>(gray.Size))
            using (MemStorage stor = new MemStorage())
            {
                CvInvoke.cvCanny(gray, canny, 100, 50, 3);

                Contour<Point> contours = canny.FindContours(
                     Emgu.CV.CvEnum.CHAIN_APPROX_METHOD.CV_CHAIN_APPROX_SIMPLE,
                     Emgu.CV.CvEnum.RETR_TYPE.CV_RETR_TREE,
                     stor);
                FindLicensePlate(contours, gray, canny, licensePlateList, filteredLicensePlateList, boxList, licenses);
            }
            //w.Stop();
            return licenses;
        }

        private void FindLicensePlate(
           Contour<Point> contours, Image<Gray, Byte> gray, Image<Gray, Byte> canny,
           List<Image<Gray, Byte>> licensePlateList, List<Image<Gray, Byte>> filteredLicensePlateList, List<MCvBox2D> boxList,
           List<List<string>> licenses)
        {
            for (; contours != null; contours = contours.HNext)
            {
                Contour<Point> approxContour = contours.ApproxPoly(contours.Perimeter * 0.05, contours.Storage);

                if (approxContour.Area > 100 && approxContour.Total == 4)
                {
                    //img.Draw(contours, new Bgr(Color.Red), 1);
                    if (!IsParallelogram(approxContour.ToArray()))
                    {
                        Contour<Point> child = contours.VNext;
                        if (child != null)
                            FindLicensePlate(child, gray, canny, licensePlateList, filteredLicensePlateList, boxList, licenses);
                        continue;
                    }

                    MCvBox2D box = approxContour.GetMinAreaRect();

                    double whRatio = (double)box.size.Width / box.size.Height;
                    if (!(3.0 < whRatio && whRatio < 8.0))
                    {
                        Contour<Point> child = contours.VNext;
                        if (child != null)
                            FindLicensePlate(child, gray, canny, licensePlateList, filteredLicensePlateList, boxList, licenses);
                        continue;
                    }

                    Image<Gray, Byte> plate = gray.Copy(box);
                    Image<Gray, Byte> filteredPlate = FilterPlate(plate);

                    List<string> words;
                    using (Bitmap bmp = filteredPlate.Bitmap)
                        words = _ocr.DoOCR(bmp, filteredPlate.ROI);

                    licenses.Add(words);
                    licensePlateList.Add(plate);
                    filteredLicensePlateList.Add(filteredPlate);
                    boxList.Add(box);
                }
            }
        }

        /// <summary>
        /// Check if the four points forms a parallelogram
        /// </summary>
        /// <param name="pts">The four points that defines a polygon</param>
        /// <returns>True if the four points defines a parallelogram</returns>
        private static bool IsParallelogram(Point[] pts)
        {
            LineSegment2D[] edges = PointCollection.PolyLine(pts, true);

            double diff1 = Math.Abs(edges[0].Length - edges[2].Length);
            double diff2 = Math.Abs(edges[1].Length - edges[3].Length);
            if (diff1 / edges[0].Length <= 0.05 && diff1 / edges[2].Length <= 0.05
               && diff2 / edges[1].Length <= 0.05 && diff2 / edges[3].Length <= 0.05)
            {
                return true;
            }
            return false;
        }

        /// <summary>
        /// Filter the license plate to remove noise
        /// </summary>
        /// <param name="plate">The license plate image</param>
        /// <returns>License plate image without the noise</returns>
        private static Image<Gray, Byte> FilterPlate(Image<Gray, Byte> plate)
        {
            Image<Gray, Byte> thresh = plate.ThresholdBinaryInv(new Gray(120), new Gray(255));

            using (Image<Gray, Byte> plateMask = new Image<Gray, byte>(plate.Size))
            using (Image<Gray, Byte> plateCanny = plate.Canny(new Gray(100), new Gray(50)))
            using (MemStorage stor = new MemStorage())
            {
                plateMask.SetValue(255.0);
                for (
                   Contour<Point> contours = plateCanny.FindContours(
                      Emgu.CV.CvEnum.CHAIN_APPROX_METHOD.CV_CHAIN_APPROX_SIMPLE,
                      Emgu.CV.CvEnum.RETR_TYPE.CV_RETR_EXTERNAL,
                      stor);
                   contours != null; contours = contours.HNext)
                {
                    Rectangle rect = contours.BoundingRectangle;
                    if (rect.Height > (plate.Height >> 1))
                    {
                        rect.X -= 1; rect.Y -= 1; rect.Width += 2; rect.Height += 2;
                        rect.Intersect(plate.ROI);

                        plateMask.Draw(rect, new Gray(0.0), -1);
                    }
                }

                thresh.SetValue(0, plateMask);
            }

            thresh._Erode(1);
            thresh._Dilate(1);

            return thresh;
        }

        protected override void DisposeObject()
        {
            _ocr.Dispose();
        }
    }
}

Seeing is believing, here’s a sample of the algorithm run on a live feed off the camera:

And even though I say it’s slow, it’s still fast enough to process my movements and only goes into recognition when the plate is detected, so the performance is acceptable anyways :). The code above is for anyone who just want to test the algorithm to decide whether or not they will use it, I don’t think it’s a good idea to put these into production code 🙂

Leave a Reply

Your email address will not be published. Required fields are marked *