PDF Get Search Table Data - Python
PDF Find Table sample in Python demonstrating ‘PDF Get Search Table Data’
program.py
import requests
import os
# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co
API_KEY = "***************************************"
# Direct URL of source PDF file.
SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf"
# Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
Pages = ""
# PDF document password. Leave empty for unprotected documents.
Password = ""
# Prepare URL for PDF Table Search API call.
query = "https://api.pdf.co/v1/pdf/find/table"
reqOptions = {
'password': Password,
'pages': Pages,
'url': SourceFileUrl
}
headers = {
'x-api-key': API_KEY
}
def getJSONFromCoordinates(fileUrl, pageIndex, rect, outputFileName):
# Prepare request to `PDF To JSON` API endpoint
jsonQueryPath = "https://api.pdf.co/v1/pdf/convert/to/json"
# Json Request
jsonReqOptions = {
'pages': pageIndex,
'url': fileUrl,
'rect': rect
}
# Send request
response = requests.post(jsonQueryPath, headers=headers, data=jsonReqOptions)
if response.status_code == 200:
outputJsonUrl = response.json()['url']
# Download JSON file
res = requests.get(outputJsonUrl)
with open(outputFileName, 'wb') as outfile:
outfile.write(res.content)
print(f'Generated JSON file saved as "{outputFileName}" file.')
else:
print(f"Request error: {response.status_code} {response.reason}")
# Send request
response = requests.post(query, headers=headers, data=reqOptions)
if response.status_code == 200:
jsonBody = response.json()
# Loop through all found tables, and get json data
if 'tables' in jsonBody['body'] and len(jsonBody['body']['tables']) > 0:
for i, table in enumerate(jsonBody['body']['tables']):
getJSONFromCoordinates(SourceFileUrl, table['PageIndex'], table['rect'], f"table_{i + 1}.json")
else:
print(f"Request error: {response.status_code} {response.reason}")
PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.
Download Source Code (.zip)
return to the previous page explore PDF Find Table endpoint
Copyright © 2016 - 2023 PDF.co