Extract Images from PDF - PHP
PDF To JSON sample in PHP demonstrating ‘Extract Images from PDF’
extract-images.php
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Extract Images from PDF - Results</title>
</head>
<body>
<?php
// Note: If you have input files large than 200kb we highly recommend to check "async" mode example.
// Get submitted form data
$apiKey = $_POST["apiKey"]; // The authentication key (API Key). Get your own by registering at https://app.pdf.co
$pages = $_POST["pages"];
// 1. RETRIEVE THE PRESIGNED URL TO UPLOAD THE FILE.
// * If you already have the direct PDF file link, go to the step 3.
// Create URL
$url = "https://api.pdf.co/v1/file/upload/get-presigned-url" .
"?name=" . urlencode($_FILES["file"]["name"]) .
"&contenttype=application/octet-stream";
// Create request
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
// Execute request
$result = curl_exec($curl);
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
$json = json_decode($result, true);
// Get URL to use for the file upload
$uploadFileUrl = $json["presignedUrl"];
// Get URL of uploaded file to use with later API calls
$uploadedFileUrl = $json["url"];
// 2. UPLOAD THE FILE TO CLOUD.
$localFile = $_FILES["file"]["tmp_name"];
$fileHandle = fopen($localFile, "r");
curl_setopt($curl, CURLOPT_URL, $uploadFileUrl);
curl_setopt($curl, CURLOPT_HTTPHEADER, array("content-type: application/octet-stream"));
curl_setopt($curl, CURLOPT_PUT, true);
curl_setopt($curl, CURLOPT_INFILE, $fileHandle);
curl_setopt($curl, CURLOPT_INFILESIZE, filesize($localFile));
// Execute request
curl_exec($curl);
fclose($fileHandle);
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
// 3. CONVERT UPLOADED PDF FILE TO JSON
ExtractJSON($apiKey, $uploadedFileUrl, $pages);
}
else
{
// Display request error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
}
else
{
// Display service reported error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
curl_close($curl);
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
function ExtractJSON($apiKey, $uploadedFileUrl, $pages)
{
// Create URL
$url = "https://api.pdf.co/v1/pdf/convert/to/json2";
// Prepare requests params
$parameters = array();
$parameters["url"] = $uploadedFileUrl;
$parameters["inline"] = true;
$parameters["pages"] = $pages;
$parameters["profiles"] = "{ 'SaveImages': 'Embed' }";
// Create Json payload
$data = json_encode($parameters);
// Create request
$curl = curl_init();
curl_setopt($curl, CURLOPT_HTTPHEADER, array("x-api-key: " . $apiKey, "Content-type: application/json"));
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_POST, true);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $data);
// Execute request
$result = curl_exec($curl);
if (curl_errno($curl) == 0)
{
$status_code = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($status_code == 200)
{
$json = json_decode($result, true);
if (!isset($json["error"]) || $json["error"] == false)
{
// Display images
echo "<h2>Extracted Images:</h2>";
if (isset($json["body"]["document"]["page"]["image"])) {
echo "<ul>";
foreach ($json["body"]["document"]["page"]["image"] as $image){
if ($image['type'] === 'base64encoded') {
// If the image is in base64 format, decode it
$image_data = base64_decode($image['data']);
// Generate a unique filename for the image
$filename = uniqid() . '.' . $image['format'];
// Save the image to disk
file_put_contents($filename, $image_data);
// Display the image as a link
echo '<li><a target="_blank" href="' . $filename . '">Image ' . $filename . '</a></li>';
} else if ($image['type'] === 'url') {
// If the image is a URL, display it as a link
echo '<li><a target="_blank" href="' . $image['url'] . '">Image ' . $image['url'] . '</a></li>';
}
}
echo "</ul>";
}
else{
echo "<p>No images found</p>";
}
}
else
{
// Display service reported error
echo "<p>Error: " . $json["message"] . "</p>";
}
}
else
{
// Display request error
echo "<p>Status code: " . $status_code . "</p>";
echo "<p>" . $result . "</p>";
}
}
else
{
// Display CURL error
echo "Error: " . curl_error($curl);
}
// Cleanup
curl_close($curl);
}
?>
</body>
</html>
sample.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Extract Images from PDF</title>
</head>
<body>
<form name="form1" enctype="multipart/form-data" method="post" action="extract-images.php">
<p>
<label>Authentication key (API Key). Get your own by registering at <a href="https://apidocs.pdf.co">https://apidocs.pdf.co</a>.</label>
<br/>
<input type="text" name="apiKey" placeholder="API Key"/>
</p>
<p>
<label>Input File (*.pdf)</label>
<input type="hidden" name="MAX_FILE_SIZE" value="8000000"/>
<input type="file" name="file"/>
</p>
<p>
<label>Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.</label>
<input type="text" name="pages">
</p>
<input type="submit" name="submit" value="Proceed" />
</form>
</body>
</html>
PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.
Download Source Code (.zip)
return to the previous page explore PDF To JSON endpoint
Copyright © 2016 - 2023 PDF.co