Parse Invoice and Save Table Data to mySql Database - PHP
Document Parser sample in PHP demonstrating ‘Parse Invoice and Save Table Data to mySql Database’
Google Invoice.json
{
"templateName": "Google Invoice",
"templateVersion": 4,
"templatePriority": 0,
"detectionRules": {
"keywords": [
"Google",
"77-0493581",
"Invoice"
]
},
"objects": [
{
"name": "invoiceId",
"objectType": "field",
"fieldProperties": {
"expression": "Invoice number:{{Spaces}}({{Digits}})",
"regex": true
},
"id": 0
},
{
"name": "dateIssued",
"objectType": "field",
"fieldProperties": {
"expression": "Issue date:{{Spaces}}({{SmartDate}})",
"regex": true,
"dataType": "date",
"dateFormat": "MMM d, yyyy"
},
"id": 1
},
{
"name": "total",
"objectType": "field",
"fieldProperties": {
"expression": "Amount due in USD:{{Spaces}}{{Number}}",
"regex": true,
"dataType": "decimal"
},
"id": 2
},
{
"name": "subTotal",
"objectType": "field",
"fieldProperties": {
"expression": "Subtotal in USD:{{Spaces}}{{Number}}",
"regex": true,
"dataType": "decimal"
},
"id": 3
},
{
"name": "taxRate",
"objectType": "field",
"fieldProperties": {
"expression": "State sales tax {{OpeningParenthesis}}{{Digits}}{{Percent}}{{ClosingParenthesis}}",
"regex": true,
"dataType": "integer"
},
"id": 4
},
{
"name": "tax",
"objectType": "field",
"fieldProperties": {
"expression": "State sales tax{{Anything}}{{Number}}{{LineEnd}}",
"regex": true,
"dataType": "decimal"
},
"id": 5
},
{
"name": "companyName",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "Google LLC",
"regex": true
},
"id": 6
},
{
"name": "billTo",
"objectType": "field",
"fieldProperties": {
"fieldType": "rectangle",
"regex": true,
"rectangle": [
0,
152,
280,
72
],
"pageIndex": 0
},
"id": 7
},
{
"name": "billingId",
"objectType": "field",
"fieldProperties": {
"expression": "Billing ID:{{Spaces}}({{DigitsOrSymbols}})",
"regex": true
},
"id": 8
},
{
"name": "currency",
"objectType": "field",
"fieldProperties": {
"fieldType": "static",
"expression": "USD",
"regex": true
},
"id": 9
},
{
"name": "table1",
"objectType": "table",
"tableProperties": {
"start": {
"expression": "Description{{Spaces}}Interval{{Spaces}}Quantity{{Spaces}}Amount",
"regex": true
},
"end": {
"expression": "Subtotal in USD",
"regex": true
},
"row": {
"expression": "{{LineStart}}{{Spaces}}(?<description>{{SentenceWithSingleSpaces}}){{Spaces}}(?<interval>{{3Letters}}{{Space}}{{Digits}}{{Space}}{{Minus}}{{Space}}{{3Letters}}{{Space}}{{Digits}}){{Spaces}}(?<quantity>{{Digits}}){{Spaces}}(?<amount>{{Number}})",
"regex": true
},
"columns": [
{
"name": "quantity",
"dataType": "integer"
},
{
"name": "amount",
"dataType": "decimal"
}
]
},
"id": 10
}
],
"culture": "en-US",
"description": "",
"options": {
"ocrMode": "auto",
"ocrLanguage": "eng",
"ocrResolution": 300,
"ocrImageFilters": "",
"ocrWhiteList": "",
"ocrBlackList": ""
}
}
program.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Document Parser API Example</title>
</head>
<body>
<form name="form1" enctype="multipart/form-data" method="post" action="program.php">
<p>
<label>Authentication key (API Key). Get your own by registering at <a href="https://app.pdf.co/documentation/api">https://app.pdf.co/documentation/api</a>.</label>
<br/>
<input type="text" name="apiKey" placeholder="API Key"/>
</p>
<p>
<label>Input File (*.pdf, *.jpg, *.png, *.tif, *.gif, *.bmp)</label>
</br>
<input type="hidden" name="MAX_FILE_SIZE" value="8000000"/>
<input type="file" name="fileInput"/>
</p>
<p>
<label>Input Template (*.yml)</label>
</br>
<input type="file" name="fileTemplate"/>
</p>
<input type="submit" name="submit" value="Proceed" />
</form>
</body>
</html>
program.php
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Document Parse Results</title>
</head>
<body>
<?php
// Get submitted form data
$apiKey = $_POST["apiKey"]; // The authentication key (API Key). Get your own by registering at https://app.pdf.co/documentation/api
// 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
// * If you already have the direct PDF file link, go to the step 3.
// Create URL
$url = "https://api.pdf.co/v1/file/upload/get-presigned-url" .
"?name=" . $_FILES["fileInput"]["name"] .
"&contenttype=application/octet-stream";
// Create request
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
// Execute request
$result = curl_exec($curl);
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
$json = json_decode($result, true);
// Get URL to use for the file upload
$uploadFileUrl = $json["presignedUrl"];
// Get URL of uploaded file to use with later API calls
$uploadedFileUrl = $json["url"];
// 2. UPLOAD THE FILE TO CLOUD.
$localFile = $_FILES["fileInput"]["tmp_name"];
$fileHandle = fopen($localFile, "r");
curl_setopt($curl, CURLOPT_URL, $uploadFileUrl);
curl_setopt($curl, CURLOPT_HTTPHEADER, array("content-type: application/octet-stream"));
curl_setopt($curl, CURLOPT_PUT, true);
curl_setopt($curl, CURLOPT_INFILE, $fileHandle);
curl_setopt($curl, CURLOPT_INFILESIZE, filesize($localFile));
// Execute request
curl_exec($curl);
fclose($fileHandle);
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
// Read all template texts
$templateText = file_get_contents($_FILES["fileTemplate"]["tmp_name"]);
// 3. PARSE UPLOADED PDF DOCUMENT
ParseDocument($apiKey, $uploadedFileUrl, $templateText);
}
else
{
// Display request error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
}
else
{
// Display service reported error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
curl_close($curl);
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
// Insert parsed output to db
function InsertToDb($resultFileUrl)
{
$servername = "localhost";
$username = "root";
$password = "";
$dbname = "sample_db";
// Create connection
$conn = new mysqli($servername, $username, $password, $dbname);
// Check connection
if ($conn->connect_error) {
die("Connection failed: " . $conn->connect_error);
}
//read the json file contents
$jsondata = file_get_contents($resultFileUrl);
$data = json_decode(preg_replace('/[\x00-\x1F\x80-\xFF]/', '', $jsondata), true);
$data_parsed = GetFormattedInvoiceData($data);
// var_dump($data_parsed);
echo '<hr/><p><strong><u>Saving to Database</u></strong></p>';
// echo $data_parsed->invId;
$sql = "INSERT INTO invoice (invId, dateIssued, total, subTotal, taxRate, tax, companyName, billTo, billingId, currency)
VALUES ('$data_parsed->invId','$data_parsed->dateIssued','$data_parsed->total','$data_parsed->subTotal','$data_parsed->taxRate','$data_parsed->tax','$data_parsed->companyName','$data_parsed->billTo','$data_parsed->billingId','$data_parsed->currency')";
if ($conn->query($sql) === TRUE) {
echo "<p>New Invoice record added successfully!</p>";
} else {
echo "<p>Error: " . $sql . "<br>" . $conn->error . "</p>";
}
foreach ($data_parsed->invItems as $itmInv) {
$sql_itm = "INSERT INTO invoice_items (invId, description, intervl, quantity, amount)
VALUES ('$data_parsed->invId','$itmInv->description','$itmInv->interval','$itmInv->quantity','$itmInv->amount')";
if ($conn->query($sql_itm) === TRUE) {
echo "<p>New Invoice Item record added successfully!</p>";
} else {
echo "<p>Error: " . $sql_itm . "<br>" . $conn->error . "</p>";
}
}
$conn->close();
}
function ParseDocument($apiKey, $uploadedFileUrl, $templateText)
{
// (!) Make asynchronous job
$async = TRUE;
// Prepare URL for Document parser API call.
// See documentation: https://apidocs.pdf.co/?#1-pdfdocumentparser
$url = "https://api.pdf.co/v1/pdf/documentparser";
// Prepare requests params
$parameters = array();
$parameters["url"] = $uploadedFileUrl;
$parameters["template"] = $templateText;
$parameters["async"] = $async;
// Create Json payload
$data = json_encode($parameters);
// Create request
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json"));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
// Execute request
$result = curl_exec($curl);
echo $result . "<br/>";
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
$json = json_decode($result, true);
if (!isset($json["error"]) || $json["error"] == false)
{
// URL of generated JSON file that will available after the job completion
$resultFileUrl = $json["url"];
// Asynchronous job ID
$jobId = $json["jobId"];
// Check the job status in a loop
do
{
$status = CheckJobStatus($jobId, $apiKey); // Possible statuses: "working", "failed", "aborted", "success".
// Display timestamp and status (for demo purposes)
echo "<p>" . date(DATE_RFC2822) . ": " . $status . "</p>";
if ($status == "success")
{
// Display link to JSON file with information about parsed fields
echo "<div><a href='" . $resultFileUrl . "' target='_blank'>" . $resultFileUrl . "</a></div>";
//Calls InsertToDb function to get key value pair from JSON file
InsertToDb($resultFileUrl);
break;
}
else if ($status == "working")
{
// Pause for a few seconds
sleep(3);
}
else
{
echo $status . "<br/>";
break;
}
}
while (true);
}
else
{
// Display service reported error
echo "<p>Error: " . $json["message"] . "</p>";
}
}
else
{
// Display request error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
}
function CheckJobStatus($jobId, $apiKey)
{
$status = null;
// Create URL
$url = "https://api.pdf.co/v1/job/check";
// Prepare requests params
$parameters = array();
$parameters["jobid"] = $jobId;
// Create Json payload
$data = json_encode($parameters);
// Create request
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json"));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
// Execute request
$result = curl_exec($curl);
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
$json = json_decode($result, true);
$status = $json["status"];
if ($json["status"] == "failed")
{
// Display service reported error
echo "<p>Error: " . $json["message"] . "</p>";
}
}
else
{
// Display request error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
// Cleanup
curl_close($curl);
return $status;
}
class InvoiceData{
public $invId;
public $dateIssued;
public $total;
public $subTotal;
public $taxRate;
public $tax;
public $companyName;
public $billTo;
public $billingId;
public $currency;
public $invItems = array();
}
class InvoiceItem{
public $description;
public $quantity;
public $amount;
public $interval;
}
function GetFormattedInvoiceData($inpJson){
$oRet = new InvoiceData();
$oRet->invId = FindJsonNodeValue($inpJson, 'invoiceId');
$oRet->dateIssued = FindJsonNodeValue($inpJson, 'dateIssued');
$oRet->total = FindJsonNodeValue($inpJson, 'total');
$oRet->subTotal = FindJsonNodeValue($inpJson, 'subTotal');
$oRet->taxRate = FindJsonNodeValue($inpJson, 'taxRate');
$oRet->tax = FindJsonNodeValue($inpJson, 'tax');
$oRet->companyName = FindJsonNodeValue($inpJson, 'companyName');
$oRet->billingId = FindJsonNodeValue($inpJson, 'billingId');
$oRet->billTo = FindJsonNodeValue($inpJson, 'billTo');
$oRet->currency = FindJsonNodeValue($inpJson, 'currency');
$invoiceItems = FindJsonNodeValue($inpJson, 'table1');
if($invoiceItems != NULL){
foreach ($invoiceItems as $keyRow => $itmRow) {
$invItm = new InvoiceItem();
foreach ($itmRow as $key => $value) {
if($key == "description"){
$invItm->description = $value["value"];
}
if($key == "interval"){
$invItm->interval = $value["value"];
}
if($key == "quantity"){
$invItm->quantity = $value["value"];
}
if($key == "amount"){
$invItm->amount = $value["value"];
}
}
$oRet->invItems[] = $invItm;
}
}
// echo '<p>----</p>';
//echo var_dump($oRet);
return $oRet;
}
function FindJsonNodeValue($inpJson, $propName){
$elmRet = array_values(array_filter($inpJson['objects'], function($obj) use ($propName){
return ($obj['name'] == $propName);
}));
if(count($elmRet) > 0){
if($elmRet[0]["objectType"] == "field"){
return $elmRet[0]["value"];
}
else if($elmRet[0]["objectType"] == "table"){
return $elmRet[0]["rows"];
}
}
return NULL;
}
?>
</body>
</html>
res.json
{
"objects": [
{
"name": "invoiceId",
"objectType": "field",
"value": "12340801944",
"pageIndex": 0,
"rectangle": [0.0, 0.0, 0.0, 0.0]
},
{
"name": "dateIssued",
"objectType": "field",
"value": "2018-01-31T00:00:00",
"pageIndex": 0,
"rectangle": [0.0, 0.0, 0.0, 0.0]
},
{
"name": "total",
"objectType": "field",
"value": 25.0,
"pageIndex": 0,
"rectangle": [0.0, 0.0, 0.0, 0.0]
},
{
"name": "subTotal",
"objectType": "field",
"value": 25.0,
"pageIndex": 0,
"rectangle": [0.0, 0.0, 0.0, 0.0]
},
{
"name": "taxRate",
"objectType": "field",
"value": 0,
"pageIndex": 0,
"rectangle": [0.0, 0.0, 0.0, 0.0]
},
{
"name": "tax",
"objectType": "field",
"value": 0.0,
"pageIndex": 0,
"rectangle": [0.0, 0.0, 0.0, 0.0]
},
{ "name": "companyName", "objectType": "field", "value": "Google LLC" },
{
"name": "billTo",
"objectType": "field",
"value": "John Doe\n Acme, Inc.\n 1811 Silverside Road\n Wilmington, DE 19810\n United States",
"pageIndex": 0,
"rectangle": [0.0, 152.0, 280.0, 72.0]
},
{
"name": "billingId",
"objectType": "field",
"value": "8282-4611-4229",
"pageIndex": 0,
"rectangle": [0.0, 0.0, 0.0, 0.0]
},
{ "name": "currency", "objectType": "field", "value": "USD" },
{
"objectType": "table",
"name": "table1",
"rows": [
{
"description": { "pageIndex": 0, "value": "G Suite Basic: Usage" },
"interval": { "pageIndex": 0, "value": "Jan 1 - Jan 31" },
"quantity": { "pageIndex": 0, "value": 5 },
"amount": { "pageIndex": 0, "value": 25.0 }
}
]
}
],
"elapsed": 0.011421299999999999,
"templateName": "Google Invoice",
"templateVersion": "4",
"timestamp": "2022-06-08T06:22:07"
}
PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.
Download Source Code (.zip)
return to the previous page explore Document Parser endpoint
Copyright © 2016 - 2023 PDF.co