return to previous page explore PDF to HTML endpoint

Convert PDF to HTML From Uploaded File Asynchronously - Java

PDF to HTML sample in Java demonstrating ‘Convert PDF to HTML From Uploaded File Asynchronously’

View on GitHub Download Source Code (.zip)

Main.java

package com.company;

import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import okhttp3.*;

import java.io.*;
import java.net.*;
import java.nio.file.Path;
import java.nio.file.Paths;

public class Main {
    // The authentication key (API Key).
    final static String API_KEY = "*******************************";

    // Source PDF file
    final static Path SourceFile = Paths.get(".\\sample.pdf");
    // Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
    final static String Pages = "";
    // PDF document password. Leave empty for unprotected documents.
    final static String Password = "";
    // Destination HTML file name
    final static Path DestinationFile = Paths.get(".\\result.html");
    // Set to `true` to get simplified HTML without CSS. Default is the rich HTML keeping the document design.
    final static boolean PlainHtml = false;
    // Set to `true` if your document has the column layout like a newspaper.
    final static boolean ColumnLayout = false;
    // Enable async processing
    final static boolean Async = true;

    public static void main(String[] args) throws IOException {
        OkHttpClient webClient = new OkHttpClient();

        // Step 1: Retrieve presigned URL to upload the file
        String query = String.format(
                "https://api.pdf.co/v1/file/upload/get-presigned-url?contenttype=application/octet-stream&name=%s",
                SourceFile.getFileName());

        Request request = new Request.Builder()
                .url(query)
                .addHeader("x-api-key", API_KEY)
                .build();

        Response response = webClient.newCall(request).execute();

        if (response.code() == 200) {
            JsonObject json = JsonParser.parseString(response.body().string()).getAsJsonObject();
            boolean error = json.get("error").getAsBoolean();

            if (!error) {
                String uploadUrl = json.get("presignedUrl").getAsString();
                String uploadedFileUrl = json.get("url").getAsString();

                if (uploadFile(webClient, uploadUrl, SourceFile)) {
                    // Step 2: Start asynchronous PDF to HTML conversion
                    PdfToHtml(webClient, uploadedFileUrl);
                }
            } else {
                System.out.println("Error: " + json.get("message").getAsString());
            }
        } else {
            System.out.println(response.code() + " " + response.message());
        }
    }

    public static void PdfToHtml(OkHttpClient webClient, String uploadedFileUrl) throws IOException {
        String query = "https://api.pdf.co/v1/pdf/convert/to/html";

        URL url = null;
        try {
            // Proper URI handling
            url = new URI(query).toURL();
        } catch (URISyntaxException e) {
            System.out.println("Invalid URI: " + e.getMessage());
            return; // Exit gracefully if the URI is invalid
        }

        // JSON payload with async flag
        String jsonPayload = String.format(
                "{\"name\": \"%s\", \"password\": \"%s\", \"pages\": \"%s\", \"simple\": \"%s\", \"columns\": \"%s\", \"url\": \"%s\", \"async\": \"%s\"}",
                DestinationFile.getFileName(),
                Password,
                Pages,
                PlainHtml,
                ColumnLayout,
                uploadedFileUrl,
                Async
        );

        RequestBody body = RequestBody.create(MediaType.parse("application/json"), jsonPayload);

        Request request = new Request.Builder()
                .url(url)
                .addHeader("x-api-key", API_KEY)
                .addHeader("Content-Type", "application/json")
                .post(body)
                .build();

        Response response = webClient.newCall(request).execute();

        if (response.code() == 200) {
            JsonObject json = JsonParser.parseString(response.body().string()).getAsJsonObject();
            String status = json.get("status").getAsString();

            if (!"error".equals(status)) {
                String jobId = json.get("jobId").getAsString();
                pollJobStatus(webClient, jobId);
            } else {
                System.out.println("Error: " + json.get("message").getAsString());
            }
        } else {
            System.out.println(response.code() + " " + response.message());
        }
    }

    public static void pollJobStatus(OkHttpClient webClient, String jobId) throws IOException {
        String url = String.format("https://api.pdf.co/v1/job/check?jobid=%s", jobId);

        while (true) {
            Request request = new Request.Builder()
                    .url(url)
                    .addHeader("x-api-key", API_KEY)
                    .build();

            Response response = webClient.newCall(request).execute();
            JsonObject json = JsonParser.parseString(response.body().string()).getAsJsonObject();

            String status = json.get("status").getAsString();
            System.out.println(java.time.LocalDateTime.now() + ": " + status);

            if ("success".equals(status)) {
                String resultFileUrl = json.get("url").getAsString();
                downloadFile(webClient, resultFileUrl, DestinationFile.toFile());
                System.out.println("File downloaded to: " + DestinationFile.toString());
                break;
            } else if ("working".equals(status)) {
                try {
                    Thread.sleep(3000); // Wait for 3 seconds before polling again
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            } else {
                System.out.println("Job finished with status: " + status);
                break;
            }
        }
    }

    public static boolean uploadFile(OkHttpClient webClient, String url, Path sourceFile) throws IOException {
        RequestBody body = RequestBody.create(MediaType.parse("application/octet-stream"), sourceFile.toFile());

        Request request = new Request.Builder()
                .url(url)
                .addHeader("content-type", "application/octet-stream")
                .put(body)
                .build();

        Response response = webClient.newCall(request).execute();
        return response.code() == 200;
    }

    public static void downloadFile(OkHttpClient webClient, String url, File destinationFile) throws IOException {
        Request request = new Request.Builder().url(url).build();
        Response response = webClient.newCall(request).execute();

        byte[] fileBytes = response.body().bytes();

        try (OutputStream output = new FileOutputStream(destinationFile)) {
            output.write(fileBytes);
        }
    }
}

App.java

package $org.example;

/**
 * Hello world!
 *
 */
public class App 
{
    public static void main( String[] args )
    {
        System.out.println( "Hello World!" );
    }
}

AppTest.java

package $org.example;

import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;

/**
 * Unit test for simple App.
 */
public class AppTest 
    extends TestCase
{
    /**
     * Create the test case
     *
     * @param testName name of the test case
     */
    public AppTest( String testName )
    {
        super( testName );
    }

    /**
     * @return the suite of tests being tested
     */
    public static Test suite()
    {
        return new TestSuite( AppTest.class );
    }

    /**
     * Rigourous Test :-)
     */
    public void testApp()
    {
        assertTrue( true );
    }
}

App.java

package $org.example;

/**
 * Hello world!
 *
 */
public class App 
{
    public static void main( String[] args )
    {
        System.out.println( "Hello World!" );
    }
}

AppTest.java

package $org.example;

import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;

/**
 * Unit test for simple App.
 */
public class AppTest 
    extends TestCase
{
    /**
     * Create the test case
     *
     * @param testName name of the test case
     */
    public AppTest( String testName )
    {
        super( testName );
    }

    /**
     * @return the suite of tests being tested
     */
    public static Test suite()
    {
        return new TestSuite( AppTest.class );
    }

    /**
     * Rigourous Test :-)
     */
    public void testApp()
    {
        assertTrue( true );
    }
}

PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Convert PDF to HTML From Uploaded File Asynchronously - Java

Main.java

App.java

AppTest.java

App.java

AppTest.java

Download Source Code (.zip)

return to the previous page explore PDF to HTML endpoint