Parse Simple Document - TEMPLATES-SAMPLES
Document Parser sample in TEMPLATES-SAMPLES demonstrating ‘Parse Simple Document’
AmazonAWS.json
{
"templateName": "Amazon Web Services Invoice",
"templateVersion": 4,
"templatePriority": 0,
"detectionRules": {
"keywords": [
"Amazon Web Services",
"ATTN",
"Invoice"
]
},
"objects": [
{
"name": "total",
"objectType": "field",
"fieldProperties": {
"fieldType": "macros",
"expression": "TOTAL AMOUNT DUE ON{{Anything}}{{Dollar}}({{Number}})",
"regex": true,
"dataType": "decimal"
},
"id": 1634132948923
},
{
"name": "subTotal",
"objectType": "field",
"fieldProperties": {
"fieldType": "macros",
"expression": "{{LineStart}}{{Spaces}}Charges{{Spaces}}{{Dollar}}({{Number}})",
"regex": true,
"dataType": "decimal"
},
"id": 1634132948924
},
{
"name": "dateIssued",
"objectType": "field",
"fieldProperties": {
"fieldType": "macros",
"expression": "Invoice Date:{{Spaces}}({{Anything}}){{LineEnd}}",
"regex": true,
"dataType": "date",
"dateFormat": "MMMM d , yyyy"
},
"id": 1634132948925
},
{
"name": "invoiceId",
"objectType": "field",
"fieldProperties": {
"fieldType": "macros",
"expression": "Invoice Number:{{Spaces}}({{Digits}})",
"regex": true
},
"id": 1634132948926
},
{
"name": "companyName",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "Amazon Web Services, Inc.",
"regex": true
},
"id": 1634132948927
},
{
"name": "companyWebsite",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "aws.amazon.com",
"regex": true
},
"id": 1634132948928
},
{
"name": "billTo",
"objectType": "field",
"fieldProperties": {
"fieldType": "rectangle",
"expression": "Bill to Address:{{ToggleSingleLineMode}}({{AnythingGreedy}})",
"regex": true,
"rectangle": [
33,
115.5,
213.75,
72.75
],
"pageIndex": 0
},
"id": 1634132948929
},
{
"name": "currency",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "USD",
"regex": true
},
"id": 1634132948930
},
{
"name": "table1",
"objectType": "table",
"tableProperties": {
"start": {
"expression": "{{LineStart}}{{Spaces}}Detail{{LineEnd}}",
"regex": true
},
"end": {
"expression": "{{EndOfPage}}",
"regex": true
},
"row": {
"expression": "{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}}){{LineEnd}}",
"regex": true
},
"columns": [
{
"name": "unitPrice",
"dataType": "decimal"
}
]
},
"id": 1634132948931
}
]
}
DigitalOcean.json
{
"templateName": "DigitalOcean Invoice",
"templateVersion": 4,
"templatePriority": 0,
"detectionRules": {
"keywords": [
"DigitalOcean",
"101 Avenue of the Americas",
"Invoice Number"
]
},
"objects": [
{
"name": "companyName",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "DigitalOcean",
"regex": true
},
"id": 1634132940020
},
{
"name": "invoiceId",
"objectType": "field",
"fieldProperties": {
"fieldType": "macros",
"expression": "Invoice Number: ({{Digits}})",
"regex": true
},
"id": 1634132940021
},
{
"name": "dateIssued",
"objectType": "field",
"fieldProperties": {
"fieldType": "macros",
"expression": "Date Issued: ({{SmartDate}})",
"regex": true,
"dataType": "date",
"dateFormat": "auto-mdy"
},
"id": 1634132940022
},
{
"name": "total",
"objectType": "field",
"fieldProperties": {
"fieldType": "macros",
"expression": "Total: {{Dollar}}({{Number}})",
"regex": true,
"dataType": "decimal"
},
"id": 1634132940023
},
{
"name": "currency",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "USD",
"regex": true
},
"id": 1634132940024
},
{
"name": "table1",
"objectType": "table",
"tableProperties": {
"start": {
"expression": "Description{{Spaces}}Hours",
"regex": true
},
"end": {
"expression": "Total:",
"regex": true
},
"row": {
"expression": "{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<hours>{{Digits}}){{Spaces}}(?<start>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}(?<end>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}})",
"regex": true
},
"columns": [
{
"name": "hours",
"dataType": "integer"
},
{
"name": "unitPrice",
"dataType": "decimal"
}
]
},
"id": 1634132940025
}
]
}
Google.yml
{
"templateName": "Google Invoice",
"templateVersion": 4,
"templatePriority": 0,
"detectionRules": {
"keywords": [
"Google",
"77-0493581",
"Invoice"
]
},
"objects": [
{
"name": "invoiceId",
"objectType": "field",
"fieldProperties": {
"expression": "Invoice number:{{Spaces}}({{Digits}})",
"regex": true
},
"id": 1634132930520
},
{
"name": "dateIssued",
"objectType": "field",
"fieldProperties": {
"expression": "Issue date:{{Spaces}}({{SmartDate}})",
"regex": true,
"dataType": "date",
"dateFormat": "MMM d, yyyy"
},
"id": 1634132930521
},
{
"name": "total",
"objectType": "field",
"fieldProperties": {
"expression": "Amount due in USD:{{Spaces}}{{Number}}",
"regex": true,
"dataType": "decimal"
},
"id": 1634132930522
},
{
"name": "subTotal",
"objectType": "field",
"fieldProperties": {
"expression": "Subtotal in USD:{{Spaces}}{{Number}}",
"regex": true,
"dataType": "decimal"
},
"id": 1634132930523
},
{
"name": "taxRate",
"objectType": "field",
"fieldProperties": {
"expression": "State sales tax {{OpeningParenthesis}}{{Digits}}{{Percent}}{{ClosingParenthesis}}",
"regex": true,
"dataType": "integer"
},
"id": 1634132930524
},
{
"name": "tax",
"objectType": "field",
"fieldProperties": {
"expression": "State sales tax{{Anything}}{{Number}}{{LineEnd}}",
"regex": true,
"dataType": "decimal"
},
"id": 1634132930525
},
{
"name": "companyName",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "Google LLC",
"regex": true
},
"id": 1634132930526
},
{
"name": "billTo",
"objectType": "field",
"fieldProperties": {
"fieldType": "rectangle",
"regex": true,
"rectangle": [
0,
152,
280,
72
],
"pageIndex": 0
},
"id": 1634132930527
},
{
"name": "billingId",
"objectType": "field",
"fieldProperties": {
"expression": "Billing ID:{{Spaces}}({{DigitsOrSymbols}})",
"regex": true
},
"id": 1634132930528
},
{
"name": "currency",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "USD",
"regex": true
},
"id": 1634132930529
},
{
"name": "table1",
"objectType": "table",
"tableProperties": {
"start": {
"expression": "Description{{Spaces}}Interval{{Spaces}}Quantity{{Spaces}}Amount",
"regex": true
},
"end": {
"expression": "Subtotal in USD",
"regex": true
},
"row": {
"expression": "{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<interval>{{3Letters}}{{Space}}{{Digits}}{{Space}}{{Minus}}{{Space}}{{3Letters}}{{Space}}{{Digits}}){{Spaces}}(?<quantity>{{Digits}}){{Spaces}}(?<amount>{{Number}})",
"regex": true
},
"columns": [
{
"name": "quantity",
"dataType": "integer"
},
{
"name": "amount",
"dataType": "decimal"
}
]
},
"id": 1634132930530
}
]
}
PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.
Download Source Code (.zip)
return to the previous page explore Document Parser endpoint
Copyright © 2016 - 2023 PDF.co