Link Search Menu Expand Document

PDF Get Search Table Data - Python

PDF Find Table sample in Python demonstrating ‘PDF Get Search Table Data’

program.py
import requests
import os

# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co
API_KEY = "***************************************"

# Direct URL of source PDF file.
SourceFileUrl = "https://bytescout-com.s3.amazonaws.com/files/demo-files/cloud-api/pdf-to-text/sample.pdf"

# Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
Pages = ""

# PDF document password. Leave empty for unprotected documents.
Password = ""

# Prepare URL for PDF Table Search API call.
query = "https://api.pdf.co/v1/pdf/find/table"
reqOptions = {
    'password': Password,
    'pages': Pages,
    'url': SourceFileUrl
}
headers = {
    'x-api-key': API_KEY
}


def getJSONFromCoordinates(fileUrl, pageIndex, rect, outputFileName):
    # Prepare request to `PDF To JSON` API endpoint
    jsonQueryPath = "https://api.pdf.co/v1/pdf/convert/to/json"

    # Json Request
    jsonReqOptions = {
        'pages': pageIndex,
        'url': fileUrl,
        'rect': rect
    }

    # Send request
    response = requests.post(jsonQueryPath, headers=headers, data=jsonReqOptions)
    if response.status_code == 200:
        outputJsonUrl = response.json()['url']

        # Download JSON file
        res = requests.get(outputJsonUrl)
        with open(outputFileName, 'wb') as outfile:
            outfile.write(res.content)
        print(f'Generated JSON file saved as "{outputFileName}" file.')
    else:
        print(f"Request error: {response.status_code} {response.reason}")


# Send request
response = requests.post(query, headers=headers, data=reqOptions)
if response.status_code == 200:
    jsonBody = response.json()

    # Loop through all found tables, and get json data
    if 'tables' in jsonBody['body'] and len(jsonBody['body']['tables']) > 0:
        for i, table in enumerate(jsonBody['body']['tables']):
            getJSONFromCoordinates(SourceFileUrl, table['PageIndex'], table['rect'], f"table_{i + 1}.json")
else:
    print(f"Request error: {response.status_code} {response.reason}")


PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Download Source Code (.zip)

return to the previous page explore PDF Find Table endpoint