OCR a Selected Area of an Image in JavaScript

Since version 8.2, Dynamsoft’s ImageCapture Suite, an image acquisition SDK optimized for web applications, supports seamless integration with the 1-D/2-D Barcode Reader and OCR add-ons.

The following source code will show you how to use ImageCapture Suite to capture images from webcams and scanners, select an area of the scanned image using your mouse and convert the selected area to a searchable text/PDF file.

If you’d like to test out the source code, you can first download and install the suite from Dynamsoft’s website: ImageCapture Suite 30-Day Free Trial Download

1.       Capture images from scanners, webcams and other TWAIN/WIA/UVC compatible devices.

function AcquireImageInner(){
    if (DW_DWTSourceContainerID == "")
        DWObject.SelectSource();
    else
        DWObject.SelectSourceByIndex(document.getElementById(DW_DWTSourceContainerID).selectedIndex);
    DWObject.CloseSource();
    DWObject.OpenSource();
    var iSelectedIndex = document.getElementById(DW_DWTSourceContainerID).selectedIndex;
    var iTwainType = DWObject.GetSourceType(iSelectedIndex);
    
    if(iTwainType == 0)
    {
        DWObject.IfShowUI = document.getElementById("ShowUI").checked;

        var i;
        for(i=0;i<3;i++)
        {
            if(document.getElementsByName("PixelType").item(i).checked==true)
            DWObject.PixelType = i;
        }  
        DWObject.Resolution = Resolution.value;
        DWObject.IfFeederEnabled = document.getElementById("ADF").checked ;
        DWObject.IfDuplexEnabled = document.getElementById("Duplex").checked ;
        AppendMessage("Pixel Type: " + DWObject.PixelType + "<br />Resolution: " + DWObject.Resolution + "<br />");
    }
    else
    {
        DWObject.IfShowUI = document.getElementById("ShowUIForWebcam").checked;
        
        DWObject.SelectMediaTypeByIndex(document.getElementById("MediaType").selectedIndex);
        DWObject.SelectResolutionForCamByIndex(document.getElementById("ResolutionWebcam").selectedIndex);

        AppendMessage("MediaType: " + DWObject.MediaType + "<br />Resolution: " + DWObject.ResolutionForCam + "<br />");  
    }
    DWObject.IfDisableSourceAfterAcquire = true;
    DWObject.AcquireImage();
}

2.       Enable your customers to select an area of the scanned image.

Use the OnImageAreaSelected event to select the target area. The event is triggered when a user drags the mouse to select an area.

function DynamicWebTwain_OnImageAreaSelected(index, left, top, right, bottom) { 
ileft = left; 
itop = top; 
iright = right; 
ibottom = bottom; 
} 

3.       Convert the selected area to a searchable file.

function J_OCRing() {

    var OCRVerStr = DWObject.OCRVersion;
    if (!OCRVerStr || OCRVerStr != DW_OCRVersion) {
        if (location.hostname != "") {
            var CurrentPathName = unescape(location.pathname); // get current PathName in plain ASCII	
            var CurrentPath = CurrentPathName.substring(0, CurrentPathName.lastIndexOf("/") + 1);
            var strOCRfilepath = CurrentPath + "Resources/OCR.zip";
            var strHostIP = location.hostname;
            DWObject.HTTPPort = location.port == "" ? 80 : location.port;
            DWObject.HTTPDownloadResource(strHostIP, strOCRfilepath, "OCR.zip");
        }
    }
    
    var aryLanguage = [
    "eng", "English",
    "ara", "Arabic",
    "bul", "Bulgarian",
    "cat", "Catalan",
    "ces", "Czech",
    "chi_sim", "Chinese (Simplified)",
    "chi_tra", "Chinese (Traditional)",
    "chr", "Cherokee",
    "dan-frak", "Danish (Fraktur)",
    "dan", "Danish",
    "nld", "Dutch",
    "deu-frak", "German (Fraktur)",
    "deu", "German",
    "ell", "Greek",
    "fin", "Finnish",
    "fra", "French",
    "heb-ras", "Hebrew",
    "heb-seg", "Hebrew",
    "heb", "Hebrew",
    "hin", "Hindi",
    "hun", "Hungarian",
    "ind", "Indonesian",
    "ita", "Italian",
    "jpn", "Japanese",
    "kor", "Korean",
    "lav", "Latvian",
    "lit", "Lithuanian",
    "nor", "Norwegian",
    "pol", "Polish",
    "por", "Portuguese",
    "ron", "Romanian",
    "rus", "Russian",
    "slk-frak", "Slovakian (Fraktur)",
    "slk", "Slovakian",
    "slv", "Slovenian",
    "spa", "Spanish",
    "srp", "Serbian (Latin)",
    "swe-frak", "Swedish (Fraktur)",
    "swe", "Swedish",
    "tgl", "Tagalog",
    "tha", "Thai",
    "tur", "Turkish",
    "ukr", "Ukrainian",
    "vie", "Vietnamese",
    ];
    var tmpObj = document.getElementById("ddl_language");
    var strlanguage = tmpObj.value;
    var zipName = "";
    for (var i = 0; i < aryLanguage.length; ++i) {
        if (aryLanguage[i] == strlanguage) {
            zipName = aryLanguage[i + 1];
        }
    }
    tmpObj = document.getElementById("ddl_fileType");
    var result;
    var fileType = tmpObj.value;
    if (fileType == 0)
        result = DWObject.ShowFileDialog(true, "*.TXT", 0, "TXT", "OCRResult.txt", true, true, 0);
    else if (fileType == 1)
        result = DWObject.ShowFileDialog(true, "*.PDF", 0, "PDF", "OCRPTResult.pdf", true, true, 0);
    else
        result = DWObject.ShowFileDialog(true, "*.PDF", 0, "PDF", "OCRIOTResult.pdf", true, true, 0);



    if (result == true) {
        var strFileName = fileName;
        //DWObject.  OCR Call XActive.
        DWObject.OCRLanguage = strlanguage;
        DWObject.OCRResultFormat = fileType;
        DWObject.HTTPPort = 80;
        DWObject.HTTPDownloadOCRLangData("www.dynamsoft.com", "/download/OCR Language/" + zipName + ".zip", true);
        DWObject.IfShowProgressBar = true;
        if (iLeft != 0 || iTop != 0 || iRight != 0 || iBottom != 0) {
            if (!DWObject.OCR1(DWObject.CurrentImageIndexInBuffer, iLeft, iTop, iRight, iBottom, strFileName)) {
                ShowErrorInMessageBox(DWObject.ErrorString);
            }
            else {
                AppendMessage("OCR result file has been saved to " + strFileName + ".<br />");
            }
        }
        else{
            if (!DWObject.OCRSelectedImages(strFileName)) {
                //alert(DWObject.ErrorString);
                ShowErrorInMessageBox(DWObject.ErrorString);
            } else {
                AppendMessage("OCR result file has been saved to " + strFileName + ".<br />");
            }
        }
    }

    ////
    J_SetBtnProcessingAndText("btnOCRClient", false, "OCR");
}

This entry was posted in CodeProject, Document Imaging and tagged . Bookmark the permalink.