Link Search Menu Expand Document

Extract line items from tables on multiple pages - TEMPLATES-SAMPLES

Document Parser sample in TEMPLATES-SAMPLES demonstrating ‘Extract line items from tables on multiple pages’

MultiPageTable-template1.json
{
  "templateName": "Multipage Table Test",
  "templateVersion": 4,
  "templatePriority": 0,
  "detectionRules": {
    "keywords": [
      "Sample document with multi-page table"
    ]
  },
  "objects": [
    {
      "name": "total",
      "objectType": "field",
      "fieldProperties": {
        "fieldType": "macros",
        "expression": "TOTAL{{Spaces}}({{Number}})",
        "regex": true,
        "dataType": "decimal"
      },
      "id": 1634132995054
    },
    {
      "name": "table1",
      "objectType": "table",
      "tableProperties": {
        "start": {
          "expression": "Item{{Spaces}}Description{{Spaces}}Price",
          "regex": true
        },
        "end": {
          "expression": "TOTAL{{Spaces}}{{Number}}",
          "regex": true
        },
        "row": {
          "expression": "{{LineStart}}{{Spaces}}(?<itemNo>{{Digits}}){{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<price>{{Number}}){{Spaces}}(?<qty>{{Digits}}){{Spaces}}(?<extPrice>{{Number}})",
          "regex": true
        },
        "columns": [
          {
            "name": "itemNo",
            "dataType": "integer"
          },
          {
            "name": "description",
            "dataType": "string"
          },
          {
            "name": "price",
            "dataType": "decimal"
          },
          {
            "name": "qty",
            "dataType": "integer"
          },
          {
            "name": "extPrice",
            "dataType": "decimal"
          }
        ],
        "multipage": true
      },
      "id": 1634132995055
    }
  ]
}
MultiPageTable-template2.json
{
  "templateName": "Multipage Table Test",
  "templateVersion": 4,
  "templatePriority": 0,
  "detectionRules": {
    "keywords": [
      "Sample document with multi-page table"
    ]
  },
  "objects": [
    {
      "name": "total",
      "objectType": "field",
      "fieldProperties": {
        "fieldType": "regex",
        "expression": "TOTAL{{Spaces}}({{Number}})",
        "regex": true,
        "dataType": "decimal"
      },
      "id": 1634132986558
    },
    {
      "name": "table1",
      "objectType": "table",
      "tableProperties": {
        "start": {
          "expression": "Item{{Spaces}}Description{{Spaces}}Price",
          "regex": true
        },
        "end": {
          "expression": "(Page {{Digits}} of {{Digits}})|(TOTAL{{Spaces}}{{Number}})",
          "regex": true
        },
        "left": 51,
        "right": 528,
        "columns": [
          {
            "x": 51,
            "name": "itemNo",
            "dataType": "integer"
          },
          {
            "x": 102,
            "name": "description",
            "dataType": "string"
          },
          {
            "x": 324,
            "name": "price",
            "dataType": "decimal"
          },
          {
            "x": 396,
            "name": "qty",
            "dataType": "integer"
          },
          {
            "x": 441,
            "name": "extPrice",
            "dataType": "decimal"
          }
        ],
        "multipage": true
      },
      "id": 1634132986559
    }
  ]
}

PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Download Source Code (.zip)

return to the previous page explore Document Parser endpoint