return to previous page explore PDF To JSON endpoint

Extract Images from PDF - PHP

PDF To JSON sample in PHP demonstrating ‘Extract Images from PDF’

View on GitHub Download Source Code (.zip)

extract-images.php

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Extract Images from PDF - Results</title>
</head>
<body>

<?php 
// Note: If you have input files large than 200kb we highly recommend to check "async" mode example.

// Get submitted form data
$apiKey = $_POST["apiKey"]; // The authentication key (API Key). Get your own by registering at https://app.pdf.co
$pages = $_POST["pages"];


// 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
// * If you already have the direct PDF file link, go to the step 3.

// Create URL
$url = "https://api.pdf.co/v1/file/upload/get-presigned-url" . 
    "?name=" . urlencode($_FILES["file"]["name"]) .
    "&contenttype=application/octet-stream";
    
// Create request
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
// Execute request
$result = curl_exec($curl);

if (curl_errno($curl) == 0)
{
    $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
    
    if ($status_code == 200)
    {
        $json = json_decode($result, true);
        
        // Get URL to use for the file upload
        $uploadFileUrl = $json["presignedUrl"];
        // Get URL of uploaded file to use with later API calls
        $uploadedFileUrl = $json["url"];
        
        // 2. UPLOAD THE FILE TO CLOUD.
        
        $localFile = $_FILES["file"]["tmp_name"];
        $fileHandle = fopen($localFile, "r");
        
        curl_setopt($curl, CURLOPT_URL, $uploadFileUrl);
        curl_setopt($curl, CURLOPT_HTTPHEADER, array("content-type: application/octet-stream"));
        curl_setopt($curl, CURLOPT_PUT, true);
        curl_setopt($curl, CURLOPT_INFILE, $fileHandle);
        curl_setopt($curl, CURLOPT_INFILESIZE, filesize($localFile));

        // Execute request
        curl_exec($curl);
        
        fclose($fileHandle);
        
        if (curl_errno($curl) == 0)
        {
            $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
            
            if ($status_code == 200)
            {
                // 3. CONVERT UPLOADED PDF FILE TO JSON
                
                ExtractJSON($apiKey, $uploadedFileUrl, $pages);
            }
            else
            {
                // Display request error
                echo "<p>Status code: " . $status_code . "</p>"; 
                echo "<p>" . $result . "</p>"; 
            }
        }
        else
        {
            // Display CURL error
            echo "Error: " . curl_error($curl);
        }
    }
    else
    {
        // Display service reported error
        echo "<p>Status code: " . $status_code . "</p>"; 
        echo "<p>" . $result . "</p>"; 
    }
    
    curl_close($curl);
}
else
{
    // Display CURL error
    echo "Error: " . curl_error($curl);
}

function ExtractJSON($apiKey, $uploadedFileUrl, $pages) 
{
    // Create URL
    $url = "https://api.pdf.co/v1/pdf/convert/to/json2";
    
    // Prepare requests params
    $parameters = array();
    $parameters["url"] = $uploadedFileUrl;
    $parameters["inline"] = true;
    $parameters["pages"] = $pages;
    $parameters["profiles"] = "{ 'SaveImages': 'Embed' }";

    // Create Json payload
    $data = json_encode($parameters);

    // Create request
    $curl = curl_init();
    curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json"));
    curl_setopt($curl, CURLOPT_URL, $url);
    curl_setopt($curl, CURLOPT_POST, true);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($curl, CURLOPT_POSTFIELDS, $data);

    // Execute request
    $result = curl_exec($curl);
    
    if (curl_errno($curl) == 0)
    {
        $status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
        
        if ($status_code == 200)
        {
            $json = json_decode($result, true);
            
            if (!isset($json["error"]) || $json["error"] == false)
            {
                // Display images
                echo "<h2>Extracted Images:</h2>";
                if (isset($json["body"]["document"]["page"]["image"])) {
                    echo "<ul>";
                    foreach ($json["body"]["document"]["page"]["image"] as $image){
                        if ($image['type'] === 'base64encoded') {
                            // If the image is in base64 format, decode it
                            $image_data = base64_decode($image['data']);
                    
                            // Generate a unique filename for the image
                            $filename = uniqid() . '.' . $image['format'];
                    
                            // Save the image to disk
                            file_put_contents($filename, $image_data);
                    
                            // Display the image as a link
                            echo '<li><a target="_blank" href="' . $filename . '">Image ' . $filename . '</a></li>';
                        } else if ($image['type'] === 'url') {
                            // If the image is a URL, display it as a link
                            echo '<li><a target="_blank" href="' . $image['url'] . '">Image ' . $image['url'] . '</a></li>';
                        }
                    }
                    echo "</ul>";
                }
                else{
                    echo "<p>No images found</p>";
                }
            }
            else
            {
                // Display service reported error
                echo "<p>Error: " . $json["message"] . "</p>"; 
            }
        }
        else
        {
            // Display request error
            echo "<p>Status code: " . $status_code . "</p>"; 
            echo "<p>" . $result . "</p>"; 
        }
    }
    else
    {
        // Display CURL error
        echo "Error: " . curl_error($curl);
    }
    
    // Cleanup
    curl_close($curl);
}

?>

</body>
</html>

sample.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Extract Images from PDF</title>
</head>
<body>
    <form name="form1" enctype="multipart/form-data" method="post" action="extract-images.php">
        <p>
            <label>Authentication key (API Key). Get your own by registering at <a href="https://apidocs.pdf.co">https://apidocs.pdf.co</a>.</label>
            <br/>
            <input type="text" name="apiKey" placeholder="API Key"/>
        </p>
        <p>
            <label>Input File (*.pdf)</label>
            <input type="hidden" name="MAX_FILE_SIZE" value="8000000"/>
            <input type="file" name="file"/>
        </p>
        <p>
            <label>Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.</label>
            <input type="text" name="pages">
        </p>
        <input type="submit" name="submit" value="Proceed" />
    </form>
</body>
</html>

PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Extract Images from PDF - PHP

extract-images.php

sample.html

Download Source Code (.zip)

return to the previous page explore PDF To JSON endpoint