Link Search Menu Expand Document

Parse with OCR - TEMPLATES-SAMPLES

Document Parser sample in TEMPLATES-SAMPLES demonstrating ‘Parse with OCR’

DigitalOcean.json
{
  "templateName": "DigitalOcean Invoice",
  "templateVersion": 4,
  "templatePriority": 0,
  "detectionRules": {
    "keywords": [
      "DigitalOcean",
      "101 Avenue of the Americas",
      "Invoice Number"
    ]
  },
  "objects": [
    {
      "name": "companyName",
      "objectType": "field",
      "fieldProperties": {
        "fieldType": "static",
        "expression": "DigitalOcean",
        "regex": true
      },
      "id": 1634132917751
    },
    {
      "name": "invoiceId",
      "objectType": "field",
      "fieldProperties": {
        "fieldType": "macros",
        "expression": "Invoice Number: ({{Digits}})",
        "regex": true
      },
      "id": 1634132917752
    },
    {
      "name": "dateIssued",
      "objectType": "field",
      "fieldProperties": {
        "fieldType": "macros",
        "expression": "Date Issued: ({{SmartDate}})",
        "regex": true,
        "dataType": "date",
        "dateFormat": "auto-mdy"
      },
      "id": 1634132917753
    },
    {
      "name": "total",
      "objectType": "field",
      "fieldProperties": {
        "fieldType": "macros",
        "expression": "Total: ({{Money}})",
        "regex": true,
        "dataType": "decimal"
      },
      "id": 1634132917754
    },
    {
      "name": "currency",
      "objectType": "field",
      "fieldProperties": {
        "fieldType": "static",
        "expression": "USD",
        "regex": true
      },
      "id": 1634132917755
    },
    {
      "name": "table1",
      "objectType": "table",
      "tableProperties": {
        "start": {
          "expression": "Description{{Spaces}}Hours",
          "regex": true
        },
        "end": {
          "expression": "Total:",
          "regex": true
        },
        "row": {
          "expression": "{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<hours>{{Digits}}){{Spaces}}(?<start>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}(?<end>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}})",
          "regex": true
        },
        "columns": [
          {
            "name": "hours",
            "dataType": "integer"
          },
          {
            "name": "unitPrice",
            "dataType": "decimal"
          }
        ]
      },
      "id": 1634132917756
    }
  ]
}

PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Download Source Code (.zip)

return to the previous page explore Document Parser endpoint