return to previous page explore PDF To JSON endpoint

Extract Images from PDF Asynchronously - PHP

PDF To JSON sample in PHP demonstrating ‘Extract Images from PDF Asynchronously’

View on GitHub Download Source Code (.zip)

extract-images-async.php

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Extract Images from PDF - Results</title>
</head>
<body>

<?php 
// Validate inputs
if (!isset($_POST["apiKey"], $_POST["pages"], $_FILES["file"]["tmp_name"])) {
    die("Error: Missing required form parameters.");
}

$apiKey = htmlspecialchars($_POST["apiKey"]);
$pages = htmlspecialchars($_POST["pages"]);
$fileName = htmlspecialchars($_FILES["file"]["name"]);
$localFile = $_FILES["file"]["tmp_name"];

// 1. Retrieve the presigned URL for file upload
$url = "https://api.pdf.co/v1/file/upload/get-presigned-url" .
    "?name=" . urlencode($fileName) .
    "&contenttype=application/octet-stream";

$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);

$result = curl_exec($curl);
if (curl_errno($curl) == 0) {
    $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
    if ($status_code == 200) {
        $json = json_decode($result, true);
        $uploadFileUrl = $json["presignedUrl"];
        $uploadedFileUrl = $json["url"];

        // 2. Upload the file to cloud
        $fileHandle = fopen($localFile, "r");
        curl_setopt($curl, CURLOPT_URL, $uploadFileUrl);
        curl_setopt($curl, CURLOPT_HTTPHEADER, array("content-type: application/octet-stream"));
        curl_setopt($curl, CURLOPT_PUT, true);
        curl_setopt($curl, CURLOPT_INFILE, $fileHandle);
        curl_setopt($curl, CURLOPT_INFILESIZE, filesize($localFile));
        curl_exec($curl);
        fclose($fileHandle);

        if (curl_errno($curl) == 0) {
            $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
            if ($status_code == 200) {
                // 3. Extract images from the uploaded PDF
                ExtractJSON($apiKey, $uploadedFileUrl, $pages);
            } else {
                echo "<p>Status code: " . $status_code . "</p>";
                echo "<p>Error: " . $result . "</p>";
            }
        } else {
            echo "Error: " . curl_error($curl);
        }
    } else {
        echo "<p>Status code: " . $status_code . "</p>";
        echo "<p>Error: " . $result . "</p>";
    }
}

curl_close($curl);

function ExtractJSON($apiKey, $uploadedFileUrl, $pages) 
{
    $url = "https://api.pdf.co/v1/pdf/convert/to/json2";

    $parameters = array();
    $parameters["url"] = $uploadedFileUrl;
    $parameters["inline"] = true;
    $parameters["pages"] = $pages;
    $parameters["profiles"] = '{ "SaveImages": "Embed" }';
    $parameters["async"] = true; // Ensure async mode is explicitly set

    $data = json_encode($parameters);

    $curl = curl_init();
    curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json"));
    curl_setopt($curl, CURLOPT_URL, $url);
    curl_setopt($curl, CURLOPT_POST, true);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curl, CURLOPT_POSTFIELDS, $data);

    $result = curl_exec($curl);
    if (curl_errno($curl) == 0) {
        $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
        if ($status_code == 200) {
            $json = json_decode($result, true);
            if (isset($json["jobId"])) {
                CheckJobStatusAndFetchResult($json["jobId"], $apiKey, $json["url"]);
            } else {
                echo "<p>Error: No job ID returned in the response.</p>";
            }
        } else {
            echo "<p>Status code: " . $status_code . "</p>";
            echo "<p>Error: " . $result . "</p>";
        }
    } else {
        echo "Error: " . curl_error($curl);
    }

    curl_close($curl);
}

function CheckJobStatusAndFetchResult($jobId, $apiKey, $jsonUrl)
{
    $url = "https://api.pdf.co/v1/job/check";
    $retryCount = 0;
    $maxRetries = 30;

    do {
        $parameters = array("jobid" => $jobId);
        $data = json_encode($parameters);

        $curl = curl_init();
        curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json"));
        curl_setopt($curl, CURLOPT_URL, $url);
        curl_setopt($curl, CURLOPT_POST, true);
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($curl, CURLOPT_POSTFIELDS, $data);

        $result = curl_exec($curl);
        if (curl_errno($curl) == 0) {
            $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
            if ($status_code == 200) {
                $json = json_decode($result, true);
                if (!isset($json["error"]) || $json["error"] == false) {
                    if ($json["status"] == "success") {
                        echo "<p>Job ID: $jobId</p>";
echo "<p>JSON URL: <a href='$jsonUrl' target='_blank'>$jsonUrl</a></p>";
if (isset($jsonUrl)) {
    $jsonContent = file_get_contents($jsonUrl);
                            echo "<pre>Extracted JSON Content: " . htmlspecialchars($jsonContent) . "</pre>";
                        } else {
                            echo "<p>Error: No URL found in the job result.</p>";
                        }
                        break;
                    } elseif ($json["status"] == "working") {
                        sleep(3);
                    } else {
                        echo "<p>Job failed with status: " . $json["status"] . "</p>";
                        break;
                    }
                } else {
                    echo "<p>Error: " . $json["message"] . "</p>";
                    break;
                }
            } else {
                echo "<p>Status code: " . $status_code . "</p>";
                echo "<p>Error: " . $result . "</p>";
                break;
            }
        } else {
            echo "Error: " . curl_error($curl);
            break;
        }

        $retryCount++;
        curl_close($curl);
    } while ($retryCount < $maxRetries);
}

?>

</body>
</html>

sample.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Extract Images from PDF</title>
</head>
<body>
    <form name="form1" enctype="multipart/form-data" method="post" action="extract-images-async.php">
        <p>
            <label>Authentication key (API Key). Get your own by registering at <a href="https://apidocs.pdf.co">https://apidocs.pdf.co</a>.</label>
            <br/>
            <input type="text" name="apiKey" placeholder="API Key"/>
        </p>
        <p>
            <label>Input File (*.pdf)</label>
            <input type="hidden" name="MAX_FILE_SIZE" value="8000000"/>
            <input type="file" name="file"/>
        </p>
        <p>
            <label>Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.</label>
            <input type="text" name="pages">
        </p>
        <input type="submit" name="submit" value="Proceed" />
    </form>
</body>
</html>

PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Extract Images from PDF Asynchronously - PHP

extract-images-async.php

sample.html

Download Source Code (.zip)

return to the previous page explore PDF To JSON endpoint