I have a web service that listens for incoming requests, using .NET HttpListener. When it gets that request it will do any number of actions. One of those actions is to OCR a document using PDF XChange(code below).
However when I get a web request to do such activity it hangs until the timeout I've setup(as indicated by the TASK in the below code) and fails. I wasn't sure exactly what was going on so I wrote a simple 2 line program to OCR the same file, and it worked. I can confirm it hangs on the Op.Do() line.
OCR Code:
Init Function:
Code: Select all
if (viewerInstance == null)
{
if (existingViewer == null)
{
try
{
viewerInstance = new PXV_Inst();
viewerInstance.Init(null, DolphinCorePDF.devKey);
Logger.Log("PDF Converter Object Initialized", 5);
}
catch (Exception ex)
{
Logger.Log("Error During Init for PDFTools", ex, 1);
int hr = Marshal.GetHRForException(ex);
PDFTools.LogErrMsg(hr);
}
}
else
{
viewerInstance = existingViewer;
}
string pluginLoadPath = Environment.GetEnvironmentVariable("DolphinPath");
viewerInstance.StartLoadingPlugins();
viewerInstance.AddPluginFromFile(pluginLoadPath + @"OCRPlugin.pvp");
viewerInstance.AddPluginFromFile(pluginLoadPath + @"ConvertPDF.pvp");
viewerInstance.FinishLoadingPlugins();
Logger.Log("Conversion and OCR Plugin Loaded", 5);
try
{
PDFTools.pdfToolsTimeoutInMin = Convert.ToInt32(ApplicationSettings.ReadSettingFromProfile(ConfigurationSettings.PDFToolsTimeoutInMin.ToString(), "Default"));
}
catch (Exception ex)
{
Logger.Log("Failed to read PDFTool Timeout. Defaulting to 10 min", ex, 1);
PDFTools.pdfToolsTimeoutInMin = 10;
ApplicationSettings.WriteSetting(ConfigurationSettings.PDFToolsTimeoutInMin.ToString(), Convert.ToString(PDFTools.pdfToolsTimeoutInMin), "Default");
}
}
if (auxInst == null)
{
auxInst = (IAUX_Inst)viewerInstance.GetExtension("AUX");
}
Code: Select all
Logger.Log("Attempting to execute OCR on document: " + inputPDF, 5);
var myTask = Task.Run(() =>
{
try
{
if (File.Exists(inputPDF))
{
Init();
IPXC_Inst pxcInst = (IPXC_Inst)viewerInstance.GetExtension("PXC");
IPXC_Document doc = pxcInst.OpenDocumentFromFile(inputPDF, clbk);
int nID = viewerInstance.Str2ID("op.document.OCRPages", false);
PDFXEdit.IOperation Op = viewerInstance.CreateOp(nID);
PDFXEdit.ICabNode input = Op.Params.Root["Input"];
input.v = doc;
PDFXEdit.ICabNode options = Op.Params.Root["Options"];
if (pages.Length == 0 || (pages.Length == 1 && pages[0] == -1))
{
options["PagesRange.Type"].v = "All";
}
else
{
options["PagesRange.Type"].v = "Exactly";
string pageValues = "";
for (int count = 0; count < pages.Length; count++)
{
if (pageValues != "")
{
pageValues += ",";
}
pageValues += Convert.ToString(pages[count]);
}
options["PagesRange.Text"].v = pageValues;
}
options["OutputType"].v = 0;
options["OutputDPI"].v = 300;
Op.Do();
doc.WriteToFile(inputPDF);
doc.Close();
options.Clear();
input.Clear();
Logger.Log("PDF File: " + inputPDF + " had OCR completed", 5);
}
else
{
Logger.Log("PDF File: " + inputPDF + ", does not exist. Cannot execute OCR", 1);
}
}
catch (Exception ex)
{
Logger.Log("Error runnong OCR on PDF: " + inputPDF, ex, 1);
int hr = Marshal.GetHRForException(ex);
PDFTools.LogErrMsg(hr);
}
});
bool completed = myTask.Wait(1000 * 60 * PDFTools.pdfToolsTimeoutInMin);
if (!completed)
{
Logger.Log("Timeout for PDF Tools reached. OCR Process cancelled: " + PDFTools.pdfToolsTimeoutInMin, 1);
}
Code: Select all
public void QuickOCRTest(string filePath)
{
PDFTools.Init();
PDFTools.RunOCRAndAddText(filePath, new int[] { });
}
The INIT call is made when the Windows Service is started, but obviously each request comes in separately and fires off the chain of events for processing.