How can we properly detect that cut off has occurred for the printed content via your API? (Using OCR is not a preferred option)
Environment Info:
Operating Systems:
• Windows 7 Enterprise SP1
• Windows Server 2012 R2 Standard
PDF-Xchange Printer:
• PDF-XChange 5.0
In the case of page 1 for the attached PDF, the following code detects that:
1. The PDF only contains a Media Box and does not contain a Crop Box or Trim Box
2. The pageRect has dimensions of Bottom: 0, Left: 0, Right: 595.2, Top: 841.919
3. The results below show the rightmost coordinate of the detected text elements. Notice that none are greater than the right most coordinate for the page (adjusted for margin).
4. Since no coordinates go off the page, we can’t say that it has any cut off occurred.
element 0: Matrix: a:1, b:0, c:0, d:1, e:26.879, f:801.12 | Matrix.e + HighestOffset = 55.3330397803783 | pageRect.Right - rightMatginInPoints = 577.2
element 1: Matrix: a:1, b:0, c:0, d:1, e:26.879, f:788.64 | Matrix.e + HighestOffset = 52.0789998092651 | pageRect.Right - rightMatginInPoints = 577.2
element 2: Matrix: a:1, b:0, c:0, d:1, e:26.879, f:776.160000457764 | Matrix.e + HighestOffset = 66.4732397003174 | pageRect.Right - rightMatginInPoints = 577.2
element 3: Matrix: a:1, b:0, c:0, d:1, e:139.439, f:776.16 | Matrix.e + HighestOffset = 451.389557667136 | pageRect.Right - rightMatginInPoints = 577.2
element 4: Matrix: a:1, b:0, c:0, d:1, e:24, f:732.96 | Matrix.e + HighestOffset = 53.5847197766285 | pageRect.Right - rightMatginInPoints = 577.2
element 5: Matrix: a:1, b:0, c:0, d:1, e:53.519, f:732.96 | Matrix.e + HighestOffset = 103.432679632157 | pageRect.Right - rightMatginInPoints = 577.2
element 6: Matrix: a:1, b:0, c:0, d:1, e:24, f:720 | Matrix.e + HighestOffset = 50.3385198062062 | pageRect.Right - rightMatginInPoints = 577.2
element 7: Matrix: a:1, b:0, c:0, d:1, e:50.399, f:720 | Matrix.e + HighestOffset = 245.35147852432 | pageRect.Right - rightMatginInPoints = 577.2
element 8: Matrix: a:1, b:0, c:0, d:1, e:24, f:707.04 | Matrix.e + HighestOffset = 39.6725998860597 | pageRect.Right - rightMatginInPoints = 577.2
element 9: Matrix: a:1, b:0, c:0, d:1, e:39.839, f:707.04 | Matrix.e + HighestOffset = 89.9490796190128 | pageRect.Right - rightMatginInPoints = 577.2
element 10: Matrix: a:1, b:0, c:0, d:1, e:24, f:693.841 | Matrix.e + HighestOffset = 65.3715196957588 | pageRect.Right - rightMatginInPoints = 577.2
element 11: Matrix: a:1, b:0, c:0, d:1, e:65.759, f:693.841 | Matrix.e + HighestOffset = 363.907117758945 | pageRect.Right - rightMatginInPoints = 577.2
element 12: Matrix: a:1, b:0, c:0, d:1, e:30.239, f:667.442 | Matrix.e + HighestOffset = 127.722642072678 | pageRect.Right - rightMatginInPoints = 577.2
element 13: Matrix: a:1, b:0, c:0, d:1, e:182.158, f:667.442 | Matrix.e + HighestOffset = 217.129550598144 | pageRect.Right - rightMatginInPoints = 577.2
element 14: Matrix: a:1, b:0, c:0, d:1, e:235.437998779297, f:667.442 | Matrix.e + HighestOffset = 258.056908872604 | pageRect.Right - rightMatginInPoints = 577.2
element 15: Matrix: a:1, b:0, c:0, d:1, e:329.997996337891, f:667.442 | Matrix.e + HighestOffset = 350.850666501999 | pageRect.Right - rightMatginInPoints = 577.2
element 16: Matrix: a:1, b:0, c:0, d:1, e:383.037, f:667.442 | Matrix.e + HighestOffset = 548.580021342754 | pageRect.Right - rightMatginInPoints = 577.2
element 17: Matrix: a:1, b:0, c:0, d:1, e:30.2369999999999, f:639.842 | Matrix.e + HighestOffset = 89.8198625887632 | pageRect.Right - rightMatginInPoints = 577.2
element 18: Matrix: a:1, b:0, c:0, d:1, e:111.356997497559, f:639.84199961853 | Matrix.e + HighestOffset = 170.66954211998 | pageRect.Right - rightMatginInPoints = 577.2
element 19: Matrix: a:1, b:0, c:0, d:1, e:235.436, f:639.842 | Matrix.e + HighestOffset = 318.106867652893 | pageRect.Right - rightMatginInPoints = 577.2
element 20: Matrix: a:1, b:0, c:0, d:1, e:383.034998901367, f:639.84199961853 | Matrix.e + HighestOffset = 433.17413489151 | pageRect.Right - rightMatginInPoints = 577.2
element 21: Matrix: a:1, b:0, c:0, d:1, e:30.2349999999998, f:626.163 | Matrix.e + HighestOffset = 67.1017235374449 | pageRect.Right - rightMatginInPoints = 577.2
element 22: Matrix: a:1, b:0, c:0, d:1, e:329.993998657226, f:626.163000099182 | Matrix.e + HighestOffset = 371.158327990532 | pageRect.Right - rightMatginInPoints = 577.2
element 23: Matrix: a:1, b:0, c:0, d:1, e:494.87301171875, f:626.163000099182 | Matrix.e + HighestOffset = 524.656157512426 | pageRect.Right - rightMatginInPoints = 577.2
element 24: Matrix: a:1, b:0, c:0, d:1, e:182.152999267578, f:612.484000099182 | Matrix.e + HighestOffset = 224.067194604873 | pageRect.Right - rightMatginInPoints = 577.2
element 25: Matrix: a:1, b:0, c:0, d:1, e:329.993, f:612.484 | Matrix.e + HighestOffset = 451.753599164963 | pageRect.Right - rightMatginInPoints = 577.2
element 26: Matrix: a:1, b:0, c:0, d:1, e:494.871997802734, f:612.484 | Matrix.e + HighestOffset = 527.364628513336 | pageRect.Right - rightMatginInPoints = 577.2
element 27: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:558.725 | Matrix.e + HighestOffset = 70.7866396543233 | pageRect.Right - rightMatginInPoints = 577.2
element 28: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:543.606 | Matrix.e + HighestOffset = 189.605000077128 | pageRect.Right - rightMatginInPoints = 577.2
element 29: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:522.006 | Matrix.e + HighestOffset = 118.61392074582 | pageRect.Right - rightMatginInPoints = 577.2
element 30: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:512.407 | Matrix.e + HighestOffset = 538.6857240085 | pageRect.Right - rightMatginInPoints = 577.2
element 31: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:503.048 | Matrix.e + HighestOffset = 515.003643845044 | pageRect.Right - rightMatginInPoints = 577.2
element 32: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:493.449 | Matrix.e + HighestOffset = 544.959204099506 | pageRect.Right - rightMatginInPoints = 577.2
element 33: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:483.85 | Matrix.e + HighestOffset = 546.720324099928 | pageRect.Right - rightMatginInPoints = 577.2
element 34: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:474.491 | Matrix.e + HighestOffset = 541.265044062718 | pageRect.Right - rightMatginInPoints = 577.2
element 35: Matrix: a:1, b:0, c:0, d:1, e:23.9929999999998, f:464.892 | Matrix.e + HighestOffset = 430.209323174625 | pageRect.Right - rightMatginInPoints = 577.2
Code Snippet:
Code: Select all
/* -- Initialization calls that happen prior to calling DetectTextOffPage
int pdfHandle = 0;
//once per doc
PxcvErrors.CheckDSReturnCode(PdfExchangeApi.PXCp_Init(out pdfHandle, PdfExchangeApi.REG_KEY, PdfExchangeApi.DEV_CODE));
PxcvErrors.CheckDSReturnCode(PdfExchangeApi.PXCp_ReadDocumentW(pdfHandle, pdfFileName, 0));
// once per page
PxcvErrors.CheckDSReturnCode(PdfExchangeApi.PXCp_ET_Prepare(pdfHandle));
*/
/// <summary>
/// Iterates through the text elements in the PDF page looking for anything cut off of the right margin.
/// </summary>
/// <param name="pdfHandle">Initialized handle to the PDF. Expects PXCp_ET_Prepare() having been called.</param>
/// <param name="pageNumber">1-based page number.</param>
/// <param name="etPrepared">Forced to true</param>
/// <param name="dRightMargin">The right margin of the page.</param>
/// <param name="textOffLeft">Ref variable set to true if any coordinates have negative values.</param>
/// <param name="textOffRight">Ref variable set to true if any coordinates exceed the right side of the page.</param>
private void DetectTextOffPage(int pdfHandle, int pageNumber, bool etPrepared, double dRightMargin, ref bool textOffLeft, ref bool textOffRight)
{
double rightMatginInPoints = dRightMargin * 72; // Take right margin into count for text cutoff detection.
int res = 0;
if (!etPrepared)
PdfExchangeApi.PXCp_ET_Prepare(pdfHandle);
try
{
// Initialize buffers to 1024 characters to minimize reallocation.
int bufferCharacters = 1024;
// Get the media box so we know what the right margin is.
PxcRectF pageRect = new PxcRectF();
res = PdfExchangeApi.PXCp_PageGetBox(pdfHandle, pageNumber - 1, PxcPageBox.TrimBox, out pageRect);
if (res == PxcvErrors.PS_ERR_REQUIRED_PROP_NOT_SET_EX)
{
// TrimBox not set, try CropBox
res = PdfExchangeApi.PXCp_PageGetBox(pdfHandle, pageNumber - 1, PxcPageBox.CropBox, out pageRect);
if (res == PxcvErrors.PS_ERR_REQUIRED_PROP_NOT_SET_EX)
{
// CropBox not set, try MediaBox, MediaBox is the paper size, so it is always set.
res = PdfExchangeApi.PXCp_PageGetBox(pdfHandle, pageNumber - 1, PxcPageBox.MediaBox, out pageRect);
}
}
res = PdfExchangeApi.PXCp_ET_AnalyzePageContent(pdfHandle, pageNumber - 1);
int textElementCount = 0;
res = PdfExchangeApi.PXCp_ET_GetElementCount(pdfHandle, out textElementCount);
PxcTextElement textElement;
textElement.cbSize = 132; // Size of the structure.
textElement.Characters = Marshal.AllocHGlobal(bufferCharacters * 4); // two bytes per character, double again for 64 bits
textElement.Offsets = Marshal.AllocHGlobal(bufferCharacters * 8); // 4 bytes per double, twice as much for 64 bits
_logger.DebugFormat("textElementCount = {0}; textElement.Characters = {1}; textElement.Offsets = {2}", textElementCount, textElement.Characters.ToString(), textElement.Offsets.ToString());
try
{
for (int i = 0; i < textElementCount; i++)
{
textElement.Count = 0;
textElement.Mask = 0;
res = PdfExchangeApi.PXCp_ET_GetElement(pdfHandle, i, out textElement, 0);
_logger.DebugFormat("PXCp_ET_GetElement - i = {0}; textElement.Count = {1}", i.ToString(), textElement.Count);
if (res >= 0 && textElement.Count > 2) // Only check more than one character in the text element.
{
// Reallocate buffers if they're not big enough.
if (bufferCharacters < textElement.Count)
{
Marshal.FreeHGlobal(textElement.Characters);
Marshal.FreeHGlobal(textElement.Offsets);
bufferCharacters = textElement.Count + 16; // some extra room to try to avoid the memory access violation errors.
textElement.Characters = Marshal.AllocHGlobal(bufferCharacters * 4);
textElement.Offsets = Marshal.AllocHGlobal(bufferCharacters * 8);
_logger.DebugFormat("Relocated global memory - textElement.Characters = {0}; textElement.Offsets = {1}",
textElement.Characters.ToString(), textElement.Offsets.ToString());
}
textElement.Mask = (int)PxcTextElementMaskFlags.Text + (int)PxcTextElementMaskFlags.Offsets + (int)PxcTextElementMaskFlags.Matrix;
res = PdfExchangeApi.PXCp_ET_GetElement(pdfHandle, i, out textElement, 0);
if (res >= 0)
{
double[] offSets = new double[textElement.Count];
Marshal.Copy(textElement.Offsets, offSets, 0, textElement.Count);
if (!textOffRight && (textElement.Matrix.e + offSets[textElement.Count - 1]) > (pageRect.Right - rightMatginInPoints))
textOffRight = true;
if (!textOffLeft && (textElement.Matrix.e < (pageRect.Left - (72 * 25)))) // Magic numbers are 25 pixels, converted to points.
textOffLeft = true;
// short circuit if both set
if (textOffLeft && textOffRight)
return;
}
}
}
}
finally
{
Marshal.FreeHGlobal(textElement.Characters);
Marshal.FreeHGlobal(textElement.Offsets);
}
}
finally
{
if (!etPrepared)
PdfExchangeApi.PXCp_ET_Finish(pdfHandle);
}
}