Parse Invoice Information - SharePoint
Document Parser sample in SharePoint demonstrating ‘Parse Invoice Information’
AmazonAWS.yml
templateName: Amazon Web Services Invoice
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- Amazon Web Services
- ATTN
- Invoice
objects:
- name: total
objectType: field
fieldProperties:
fieldType: macros
expression: TOTAL AMOUNT DUE ON{{Anything}}{{Dollar}}({{Number}})
regex: true
dataType: decimal
- name: subTotal
objectType: field
fieldProperties:
fieldType: macros
expression: '{{LineStart}}{{Spaces}}Charges{{Spaces}}{{Dollar}}({{Number}})'
regex: true
dataType: decimal
- name: dateIssued
objectType: field
fieldProperties:
fieldType: macros
expression: Invoice Date:{{Spaces}}({{Anything}}){{LineEnd}}
regex: true
dataType: date
dateFormat: MMMM d , yyyy
- name: invoiceId
objectType: field
fieldProperties:
fieldType: macros
expression: Invoice Number:{{Spaces}}({{Digits}})
regex: true
- name: companyName
objectType: field
fieldProperties:
fieldType: static
expression: Amazon Web Services, Inc.
regex: true
- name: companyWebsite
objectType: field
fieldProperties:
fieldType: static
expression: aws.amazon.com
regex: true
- name: billTo
objectType: field
fieldProperties:
fieldType: rectangle
expression: Bill to Address:{{ToggleSingleLineMode}}({{AnythingGreedy}})
regex: true
rectangle:
- 33
- 115.5
- 213.75
- 72.75
pageIndex: 0
- name: currency
objectType: field
fieldProperties:
fieldType: static
expression: USD
regex: true
- name: table1
objectType: table
tableProperties:
start:
expression: '{{LineStart}}{{Spaces}}Detail{{LineEnd}}'
regex: true
end:
expression: '{{EndOfPage}}'
regex: true
row:
expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}}){{LineEnd}}'
regex: true
columns:
- name: unitPrice
dataType: decimal
DigitalOcean.yml
templateName: DigitalOcean Invoice
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- DigitalOcean
- 101 Avenue of the Americas
- Invoice Number
objects:
- name: companyName
objectType: field
fieldProperties:
fieldType: static
expression: DigitalOcean
regex: true
- name: invoiceId
objectType: field
fieldProperties:
fieldType: macros
expression: 'Invoice Number: ({{Digits}})'
regex: true
- name: dateIssued
objectType: field
fieldProperties:
fieldType: macros
expression: 'Date Issued: ({{SmartDate}})'
regex: true
dataType: date
dateFormat: auto-mdy
- name: total
objectType: field
fieldProperties:
fieldType: macros
expression: 'Total: {{Dollar}}({{Number}})'
regex: true
dataType: decimal
- name: currency
objectType: field
fieldProperties:
fieldType: static
expression: USD
regex: true
- name: table1
objectType: table
tableProperties:
start:
expression: Description{{Spaces}}Hours
regex: true
end:
expression: 'Total:'
regex: true
row:
expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<hours>{{Digits}}){{Spaces}}(?<start>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}(?<end>{{2Digits}}{{Minus}}{{2Digits}}{{Space}}{{2Digits}}{{Colon}}{{2Digits}}){{Spaces}}{{Dollar}}(?<unitPrice>{{Number}})'
regex: true
columns:
- name: hours
dataType: integer
- name: unitPrice
dataType: decimal
Google.yml
templateName: Google Invoice
templateVersion: 4
templatePriority: 0
detectionRules:
keywords:
- Google
- 77-0493581
- Invoice
objects:
- name: invoiceId
objectType: field
fieldProperties:
expression: Invoice number:{{Spaces}}({{Digits}})
regex: true
- name: dateIssued
objectType: field
fieldProperties:
expression: Issue date:{{Spaces}}({{SmartDate}})
regex: true
dataType: date
dateFormat: MMM d, yyyy
- name: total
objectType: field
fieldProperties:
expression: Amount due in USD:{{Spaces}}{{Number}}
regex: true
dataType: decimal
- name: subTotal
objectType: field
fieldProperties:
expression: Subtotal in USD:{{Spaces}}{{Number}}
regex: true
dataType: decimal
- name: taxRate
objectType: field
fieldProperties:
expression: State sales tax {{OpeningParenthesis}}{{Digits}}{{Percent}}{{ClosingParenthesis}}
regex: true
dataType: integer
- name: tax
objectType: field
fieldProperties:
expression: State sales tax{{Anything}}{{Number}}{{LineEnd}}
regex: true
dataType: decimal
- name: companyName
objectType: field
fieldProperties:
fieldType: static
expression: Google LLC
regex: true
- name: billTo
objectType: field
fieldProperties:
fieldType: rectangle
regex: true
rectangle:
- 0
- 152
- 280
- 72
pageIndex: 0
- name: billingId
objectType: field
fieldProperties:
expression: Billing ID:{{Spaces}}({{DigitsOrSymbols}})
regex: true
- name: currency
objectType: field
fieldProperties:
fieldType: static
expression: USD
regex: true
- name: table1
objectType: table
tableProperties:
start:
expression: Description{{Spaces}}Interval{{Spaces}}Quantity{{Spaces}}Amount
regex: true
end:
expression: Subtotal in USD
regex: true
row:
expression: '{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<interval>{{3Letters}}{{Space}}{{Digits}}{{Space}}{{Minus}}{{Space}}{{3Letters}}{{Space}}{{Digits}}){{Spaces}}(?<quantity>{{Digits}}){{Spaces}}(?<amount>{{Number}})'
regex: true
columns:
- name: quantity
dataType: integer
- name: amount
dataType: decimal
Utils.cs
namespace ParseSimpleDocumentWebPart.VisualWebPart1
{
public class Utils
{
public static string API_KEY = "--ADD-YOUR-PDF-CO-API-KEY-HERE--";
}
}
VisualWebPart1.cs
using Microsoft.SharePoint;
using Microsoft.SharePoint.WebControls;
using System;
using System.ComponentModel;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
namespace ParseSimpleDocumentWebPart.VisualWebPart1
{
[ToolboxItemAttribute(false)]
public class VisualWebPart1 : WebPart
{
// Visual Studio might automatically update this path when you change the Visual Web Part project item.
private const string _ascxPath = @"~/_CONTROLTEMPLATES/15/ParseSimpleDocumentWebPart/VisualWebPart1/VisualWebPart1UserControl.ascx";
protected override void CreateChildControls()
{
Control control = Page.LoadControl(_ascxPath);
Controls.Add(control);
}
}
}
VisualWebPart1UserControl.ascx.cs
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Net;
using System.Threading;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
namespace ParseSimpleDocumentWebPart.VisualWebPart1
{
public partial class VisualWebPart1UserControl : UserControl
{
// The authentication key (API Key).
// Get your own by registering at https://app.pdf.co
string API_KEY = Utils.API_KEY;
// PDF document password. Leave empty for unprotected documents.
const string Password = "";
// (!) Make asynchronous job
const bool Async = true;
protected void Page_Load(object sender, EventArgs e)
{
}
protected void StartButton_Click(object sender, EventArgs e)
{
// Template text. Use Document Parser (https://pdf.co/document-parser, https://app.pdf.co/document-parser)
// to create templates.
// Create standard .NET web client instance
ServicePointManager.Expect100Continue = true;
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
WebClient webClient = new WebClient();
if (!FileUpload1.HasFile && String.IsNullOrWhiteSpace(TemplateTextBox.Text))
{
LogTextBox.Text += "Select file and template \n";
return;
}
// Set API Key
webClient.Headers.Add("x-api-key", API_KEY);
// 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
// * If you already have a direct file URL, skip to the step 3.
// Prepare URL for `Get Presigned URL` API call
string query = Uri.EscapeUriString(string.Format(
"https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name={0}",
FileUpload1.FileName));
try
{
// Execute request
string response = webClient.DownloadString(query);
// Parse JSON response
JObject json = JObject.Parse(response);
if (json["error"].ToObject<bool>() == false)
{
// Get URL to use for the file upload
string uploadUrl = json["presignedUrl"].ToString();
string uploadedFileUrl = json["url"].ToString();
// 2. UPLOAD THE FILE TO CLOUD.
webClient.Headers.Add("content-type", "application/octet-stream");
webClient.UploadData(uploadUrl, "PUT", FileUpload1.FileBytes);
webClient.Headers.Remove("content-type");
// 3. PARSE UPLOADED PDF DOCUMENT
// URL of `Document Parser` API call
string url = "https://api.pdf.co/v1/pdf/documentparser";
Dictionary<string, object> requestBody = new Dictionary<string, object>();
requestBody.Add("template", TemplateTextBox.Text);
requestBody.Add("name", FileUpload1.FileName);
requestBody.Add("url", uploadedFileUrl);
requestBody.Add("async", Async);
// Convert dictionary of params to JSON
string jsonPayload = JsonConvert.SerializeObject(requestBody);
// Execute request
response = webClient.UploadString(url, "POST", jsonPayload);
// Parse JSON response
json = JObject.Parse(response);
if (json["error"].ToObject<bool>() == false)
{
// Asynchronous job ID
string jobId = json["jobId"].ToString();
// Get URL of generated JSON file
string resultFileUrl = json["url"].ToString();
// Check the job status in a loop.
// If you don't want to pause the main thread you can rework the code
// to use a separate thread for the status checking and completion.
do
{
string status = CheckJobStatus(jobId); // Possible statuses: "working", "failed", "aborted", "success".
// Display timestamp and status (for demo purposes)
LogTextBox.Text += DateTime.Now.ToLongTimeString() + ": " + status + "\n";
if (status == "success")
{
// Download JSON result
var result = webClient.DownloadString(resultFileUrl);
LogTextBox.Text += "Generated JSON.\n";
ResultTextBox.Text += result;
break;
}
else if (status == "working")
{
// Pause for a few seconds
Thread.Sleep(3000);
}
else
{
LogTextBox.Text += status + " \n";
break;
}
}
while (true);
}
else
{
LogTextBox.Text += json["message"].ToString() + " \n";
}
}
else
{
LogTextBox.Text += json["message"].ToString() + " \n";
}
}
catch (Exception ex)
{
LogTextBox.Text += ex.ToString() + " \n";
}
webClient.Dispose();
LogTextBox.Text += "\n";
LogTextBox.Text += "Done...\n";
}
protected string CheckJobStatus(string jobId)
{
using (WebClient webClient = new WebClient())
{
// Set API Key
webClient.Headers.Add("x-api-key", API_KEY);
string url = "https://api.pdf.co/v1/job/check?jobid=" + jobId;
string response = webClient.DownloadString(url);
JObject json = JObject.Parse(response);
return Convert.ToString(json["status"]);
}
}
}
}
VisualWebPart1UserControl.ascx.designer.cs
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace ParseSimpleDocumentWebPart.VisualWebPart1
{
public partial class VisualWebPart1UserControl
{
/// <summary>
/// FileUpload1 control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.FileUpload FileUpload1;
/// <summary>
/// TemplateTextBox control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.TextBox TemplateTextBox;
/// <summary>
/// StartButton control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.Button StartButton;
/// <summary>
/// LogTextBox control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.TextBox LogTextBox;
/// <summary>
/// ResultTextBox control.
/// </summary>
/// <remarks>
/// Auto-generated field.
/// To modify move field declaration from designer file to code-behind file.
/// </remarks>
protected global::System.Web.UI.WebControls.TextBox ResultTextBox;
}
}
PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.
Download Source Code (.zip)
return to the previous page explore Document Parser endpoint
Copyright © 2016 - 2023 PDF.co