Link Search Menu Expand Document

Convert PDF to HTML From Uploaded File Asynchronously - Java

PDF to HTML sample in Java demonstrating ‘Convert PDF to HTML From Uploaded File Asynchronously’

Main.java
package com.company;

import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import okhttp3.*;

import java.io.*;
import java.net.*;
import java.nio.file.Path;
import java.nio.file.Paths;

public class Main {
    // The authentication key (API Key).
    final static String API_KEY = "*******************************";

    // Source PDF file
    final static Path SourceFile = Paths.get(".\\sample.pdf");
    // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
    final static String Pages = "";
    // PDF document password. Leave empty for unprotected documents.
    final static String Password = "";
    // Destination HTML file name
    final static Path DestinationFile = Paths.get(".\\result.html");
    // Set to `true` to get simplified HTML without CSS. Default is the rich HTML keeping the document design.
    final static boolean PlainHtml = false;
    // Set to `true` if your document has the column layout like a newspaper.
    final static boolean ColumnLayout = false;
    // Enable async processing
    final static boolean Async = true;

    public static void main(String[] args) throws IOException {
        OkHttpClient webClient = new OkHttpClient();

        // Step 1: Retrieve presigned URL to upload the file
        String query = String.format(
                "https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name=%s",
                SourceFile.getFileName());

        Request request = new Request.Builder()
                .url(query)
                .addHeader("x-api-key", API_KEY)
                .build();

        Response response = webClient.newCall(request).execute();

        if (response.code() == 200) {
            JsonObject json = JsonParser.parseString(response.body().string()).getAsJsonObject();
            boolean error = json.get("error").getAsBoolean();

            if (!error) {
                String uploadUrl = json.get("presignedUrl").getAsString();
                String uploadedFileUrl = json.get("url").getAsString();

                if (uploadFile(webClient, uploadUrl, SourceFile)) {
                    // Step 2: Start asynchronous PDF to HTML conversion
                    PdfToHtml(webClient, uploadedFileUrl);
                }
            } else {
                System.out.println("Error: " + json.get("message").getAsString());
            }
        } else {
            System.out.println(response.code() + " " + response.message());
        }
    }

    public static void PdfToHtml(OkHttpClient webClient, String uploadedFileUrl) throws IOException {
        String query = "https://api.pdf.co/v1/pdf/convert/to/html";

        URL url = null;
        try {
            // Proper URI handling
            url = new URI(query).toURL();
        } catch (URISyntaxException e) {
            System.out.println("Invalid URI: " + e.getMessage());
            return; // Exit gracefully if the URI is invalid
        }

        // JSON payload with async flag
        String jsonPayload = String.format(
                "{\"name\": \"%s\", \"password\": \"%s\", \"pages\": \"%s\", \"simple\": \"%s\", \"columns\": \"%s\", \"url\": \"%s\", \"async\": \"%s\"}",
                DestinationFile.getFileName(),
                Password,
                Pages,
                PlainHtml,
                ColumnLayout,
                uploadedFileUrl,
                Async
        );

        RequestBody body = RequestBody.create(MediaType.parse("application/json"), jsonPayload);

        Request request = new Request.Builder()
                .url(url)
                .addHeader("x-api-key", API_KEY)
                .addHeader("Content-Type", "application/json")
                .post(body)
                .build();

        Response response = webClient.newCall(request).execute();

        if (response.code() == 200) {
            JsonObject json = JsonParser.parseString(response.body().string()).getAsJsonObject();
            String status = json.get("status").getAsString();

            if (!"error".equals(status)) {
                String jobId = json.get("jobId").getAsString();
                pollJobStatus(webClient, jobId);
            } else {
                System.out.println("Error: " + json.get("message").getAsString());
            }
        } else {
            System.out.println(response.code() + " " + response.message());
        }
    }

    public static void pollJobStatus(OkHttpClient webClient, String jobId) throws IOException {
        String url = String.format("https://api.pdf.co/v1/job/check?jobid=%s", jobId);

        while (true) {
            Request request = new Request.Builder()
                    .url(url)
                    .addHeader("x-api-key", API_KEY)
                    .build();

            Response response = webClient.newCall(request).execute();
            JsonObject json = JsonParser.parseString(response.body().string()).getAsJsonObject();

            String status = json.get("status").getAsString();
            System.out.println(java.time.LocalDateTime.now() + ": " + status);

            if ("success".equals(status)) {
                String resultFileUrl = json.get("url").getAsString();
                downloadFile(webClient, resultFileUrl, DestinationFile.toFile());
                System.out.println("File downloaded to: " + DestinationFile.toString());
                break;
            } else if ("working".equals(status)) {
                try {
                    Thread.sleep(3000); // Wait for 3 seconds before polling again
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            } else {
                System.out.println("Job finished with status: " + status);
                break;
            }
        }
    }

    public static boolean uploadFile(OkHttpClient webClient, String url, Path sourceFile) throws IOException {
        RequestBody body = RequestBody.create(MediaType.parse("application/octet-stream"), sourceFile.toFile());

        Request request = new Request.Builder()
                .url(url)
                .addHeader("content-type", "application/octet-stream")
                .put(body)
                .build();

        Response response = webClient.newCall(request).execute();
        return response.code() == 200;
    }

    public static void downloadFile(OkHttpClient webClient, String url, File destinationFile) throws IOException {
        Request request = new Request.Builder().url(url).build();
        Response response = webClient.newCall(request).execute();

        byte[] fileBytes = response.body().bytes();

        try (OutputStream output = new FileOutputStream(destinationFile)) {
            output.write(fileBytes);
        }
    }
}
App.java
package $org.example;

/**
 * Hello world!
 *
 */
public class App 
{
    public static void main( String[] args )
    {
        System.out.println( "Hello World!" );
    }
}

AppTest.java
package $org.example;

import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;

/**
 * Unit test for simple App.
 */
public class AppTest 
    extends TestCase
{
    /**
     * Create the test case
     *
     * @param testName name of the test case
     */
    public AppTest( String testName )
    {
        super( testName );
    }

    /**
     * @return the suite of tests being tested
     */
    public static Test suite()
    {
        return new TestSuite( AppTest.class );
    }

    /**
     * Rigourous Test :-)
     */
    public void testApp()
    {
        assertTrue( true );
    }
}

App.java
package $org.example;

/**
 * Hello world!
 *
 */
public class App 
{
    public static void main( String[] args )
    {
        System.out.println( "Hello World!" );
    }
}

AppTest.java
package $org.example;

import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;

/**
 * Unit test for simple App.
 */
public class AppTest 
    extends TestCase
{
    /**
     * Create the test case
     *
     * @param testName name of the test case
     */
    public AppTest( String testName )
    {
        super( testName );
    }

    /**
     * @return the suite of tests being tested
     */
    public static Test suite()
    {
        return new TestSuite( AppTest.class );
    }

    /**
     * Rigourous Test :-)
     */
    public void testApp()
    {
        assertTrue( true );
    }
}

PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Download Source Code (.zip)

return to the previous page explore PDF to HTML endpoint