return to previous page explore PDF Split endpoint

Split PDF By Text Search - Salesforce

PDF Split sample in Salesforce demonstrating ‘Split PDF By Text Search’

View on GitHub Download Source Code (.zip)

SplitPDFByTextSearch.cls

public class SplitPDFByTextSearch {

    String API_KEY = '*****************';
    string DestinationFile = 'SplitPDFByTextSearch_Result';
    string endPointUrl = 'https://api.pdf.co/v1/pdf/split2';
    
    public void splitPdfByText()
    {
        List<ContentVersion> toBeInserted = new List<ContentVersion>();
        try
        {
            String jsonBody = '{ "url": "https://pdfco-test-files.s3.us-west-2.amazonaws.com/pdf-split/multiple-invoices.pdf", "searchString": "invoice number", "excludeKeyPages": false, "regexSearch": false, "caseSensitive": false, "inline": true, "name": "invoice-extracted", "async": false }';
            Http http = new Http();
            HttpRequest request = new HttpRequest();            
            request.setHeader('x-api-key', API_KEY);
            request.setEndpoint(endPointUrl);            
            request.setHeader('Content-Type', 'application/json');
            request.setMethod('POST');            
            request.setBody(jsonBody);
            HttpResponse response =  http.send(request);            
            Map<String, Object> json = (Map<String, Object>)JSON.deserializeUntyped(response.getBody());            
            if(response.getStatusCode() == 200) 
            {
                if ((Boolean)json.get('error') == false)
                {
                    System.debug('response.getBody() :: '+response.getBody());
                    List<Object> resultFileUrl =(List<Object>)json.get('urls');
                    Integer count=1;
                    for(Object fileUrl : resultFileUrl)
                    {
                        downloadFile(String.valueOf(fileUrl), DestinationFile+'_'+count, toBeInserted);
                        count++;
                    }                    
                    System.debug('Generated PDF file saved as \'{0}\' file.'+ DestinationFile);
                }
            }
            else
            {
                System.debug('Error Response ' + response.getBody());
                System.Debug(' Status ' + response.getStatus());
                System.Debug(' Status Code' + response.getStatusCode());
                System.Debug(' Response String' + response.toString());
            }
            if(toBeInserted.size() > 0)
                insert toBeInserted;
        }
        catch(Exception ex)
        {
            String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString();
            System.Debug(errorBody);
        }
    }
    
     @TestVisible
    private static void downloadFile(String extFileUrl, String DestinationFile, List<ContentVersion> toBeInserted)
    {
        Http h = new Http(); 
        HttpRequest req = new HttpRequest(); 
        extFileUrl = extFileUrl.replace(' ', '%20'); 
        req.setEndpoint(extFileUrl); 
        req.setMethod('GET'); 
        req.setHeader('Content-Type', 'application/pdf');
        req.setCompressed(true); 
        req.setTimeout(60000); 
        HttpResponse res  = h.send(req); 
        if(res.getStatusCode() == 200) 
        {
            blob fileContent = res.getBodyAsBlob();
            ContentVersion conVer = new ContentVersion();
            conVer.ContentLocation = 'S'; 
            conVer.PathOnClient = DestinationFile + '.pdf'; // The files name, extension is very important here which will help the file in preview.
            conVer.Title = DestinationFile; // Display name of the files
            conVer.VersionData = fileContent;
            toBeInserted.add(conVer);
            System.Debug('Success');
        }
        else
        {
            System.debug('Error Response ' + res.getBody());
            System.Debug(' Status ' + res.getStatus());
            System.Debug(' Status Code' + res.getStatusCode());
            System.Debug(' Response String' + res.toString());
        }
    }
}

SplitPDFByTextSearchTest.cls

@isTest
private class SplitPDFByTextSearchTest {

    static testMethod void testSplitPdfUsingPage()
    {
        Test.startTest();
        Test.setMock(HttpCalloutMock.class, new SplitPDFByTextSearchTest.SplitPDFByTextMock());
        SplitPDFByTextSearch splitPdf = new SplitPDFByTextSearch();
        splitPdf.splitPdfByText();
        Test.stopTest();
    }
    
    static testMethod void testSplitPdfUsingPageForCatch()
    {
        Test.startTest();
        SplitPDFByTextSearch splitPdf = new SplitPDFByTextSearch();
        splitPdf.splitPdfByText();
        Test.stopTest();
    }
    
    public class SplitPDFByTextMock implements HttpCalloutMock {
        public HTTPResponse respond(HTTPRequest req) {
            HttpResponse res = new HttpResponse();
            String testBody = '{ "urls": [ "https://pdf-temp-files.s3.us-west-2.amazonaws.com/BV5R88QIPOZPKVO0PE7GYKVBETULHN0Y/invoice-extracted_page1.pdf?X-Amz-Expires=3600&X-Amz-Security-Token=FwoGZXIvYXdzEG8aDKuBm3gDES1tmO7uSSKCATaP8OHW3vpxMhhGOh4utp7hCQzkW5aPbntexlu9TwfIMzceDB6zBv%2F5L4kWTEf8xwOqTckcDrAeDd0oL%2FbWpbPgP68r01OjXwSBbB0TMu3KftSf7VN%2B7NhOl4MXX1HIOSCrvcCUvLOql2DcpVhjFwPJ%2B1JMx6yorVx1zNMLwj98Kuco%2FI6nnwYyKBbvDvsM%2FQpG1PaeKq7jq7kfl4MIt7KkwPs8LyuEMIkaLznfkdpSMFw%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA4NRRSZPHFPQ3GI5S/20230213/us-west-2/s3/aws4_request&X-Amz-Date=20230213T054546Z&X-Amz-SignedHeaders=host&X-Amz-Signature=f3d62c0952b8bc772a77c3d146b7fb55a8e86129b5cb351500f73aead8c1dbfa", "https://pdf-temp-files.s3.us-west-2.amazonaws.com/98TZ8L6PIAX3QWHF4POX4Q9TN6BHPIIH/invoice-extracted_page2.pdf?X-Amz-Expires=3600&X-Amz-Security-Token=FwoGZXIvYXdzEG8aDKuBm3gDES1tmO7uSSKCATaP8OHW3vpxMhhGOh4utp7hCQzkW5aPbntexlu9TwfIMzceDB6zBv%2F5L4kWTEf8xwOqTckcDrAeDd0oL%2FbWpbPgP68r01OjXwSBbB0TMu3KftSf7VN%2B7NhOl4MXX1HIOSCrvcCUvLOql2DcpVhjFwPJ%2B1JMx6yorVx1zNMLwj98Kuco%2FI6nnwYyKBbvDvsM%2FQpG1PaeKq7jq7kfl4MIt7KkwPs8LyuEMIkaLznfkdpSMFw%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA4NRRSZPHFPQ3GI5S/20230213/us-west-2/s3/aws4_request&X-Amz-Date=20230213T054546Z&X-Amz-SignedHeaders=host&X-Amz-Signature=98d5bf380c9176f410178b1554dae798faad7158f52153bbe1dbe1b86cb91fe7" ], "pageCount": 3, "error": false, "status": 200, "name": "invoice-extracted.pdf", "credits": 105, "duration": 465, "remainingCredits": 1166270 }';
            res.setHeader('Content-Type', 'application/json');
            res.setBody(testBody);
            res.setStatusCode(200);
            return res;
        }
    }
}

PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Split PDF By Text Search - Salesforce

SplitPDFByTextSearch.cls

SplitPDFByTextSearchTest.cls

Download Source Code (.zip)

return to the previous page explore PDF Split endpoint