Link Search Menu Expand Document

Split PDF By Text Search - Salesforce

PDF Split sample in Salesforce demonstrating ‘Split PDF By Text Search’

SplitPDFByTextSearch.cls
public class SplitPDFByTextSearch {

    String API_KEY = '*****************';
    string DestinationFile = 'SplitPDFByTextSearch_Result';
    string endPointUrl = 'https://api.pdf.co/v1/pdf/split2';
    
    public void splitPdfByText()
    {
        List<ContentVersion> toBeInserted = new List<ContentVersion>();
        try
        {
            String jsonBody = '{ "url": "https://pdfco-test-files.s3.us-west-2.amazonaws.com/pdf-split/multiple-invoices.pdf", "searchString": "invoice number", "excludeKeyPages": false, "regexSearch": false, "caseSensitive": false, "inline": true, "name": "invoice-extracted", "async": false }';
            Http http = new Http();
            HttpRequest request = new HttpRequest();            
            request.setHeader('x-api-key', API_KEY);
            request.setEndpoint(endPointUrl);            
            request.setHeader('Content-Type', 'application/json');
            request.setMethod('POST');            
            request.setBody(jsonBody);
            HttpResponse response =  http.send(request);            
            Map<String, Object> json = (Map<String, Object>)JSON.deserializeUntyped(response.getBody());            
            if(response.getStatusCode() == 200) 
            {
                if ((Boolean)json.get('error') == false)
                {
                    System.debug('response.getBody() :: '+response.getBody());
                    List<Object> resultFileUrl =(List<Object>)json.get('urls');
                    Integer count=1;
                    for(Object fileUrl : resultFileUrl)
                    {
                        downloadFile(String.valueOf(fileUrl), DestinationFile+'_'+count, toBeInserted);
                        count++;
                    }                    
                    System.debug('Generated PDF file saved as \'{0}\' file.'+ DestinationFile);
                }
            }
            else
            {
                System.debug('Error Response ' + response.getBody());
                System.Debug(' Status ' + response.getStatus());
                System.Debug(' Status Code' + response.getStatusCode());
                System.Debug(' Response String' + response.toString());
            }
            if(toBeInserted.size() > 0)
                insert toBeInserted;
        }
        catch(Exception ex)
        {
            String errorBody = 'Message: ' + ex.getMessage() + ' -- Cause: ' + ex.getCause() + ' -- Stacktrace: ' + ex.getStackTraceString();
            System.Debug(errorBody);
        }
    }
    
     @TestVisible
    private static void downloadFile(String extFileUrl, String DestinationFile, List<ContentVersion> toBeInserted)
    {
        Http h = new Http(); 
        HttpRequest req = new HttpRequest(); 
        extFileUrl = extFileUrl.replace(' ', '%20'); 
        req.setEndpoint(extFileUrl); 
        req.setMethod('GET'); 
        req.setHeader('Content-Type', 'application/pdf');
        req.setCompressed(true); 
        req.setTimeout(60000); 
        HttpResponse res  = h.send(req); 
        if(res.getStatusCode() == 200) 
        {
            blob fileContent = res.getBodyAsBlob();
            ContentVersion conVer = new ContentVersion();
            conVer.ContentLocation = 'S'; 
            conVer.PathOnClient = DestinationFile + '.pdf'; // The files name, extension is very important here which will help the file in preview.
            conVer.Title = DestinationFile; // Display name of the files
            conVer.VersionData = fileContent;
            toBeInserted.add(conVer);
            System.Debug('Success');
        }
        else
        {
            System.debug('Error Response ' + res.getBody());
            System.Debug(' Status ' + res.getStatus());
            System.Debug(' Status Code' + res.getStatusCode());
            System.Debug(' Response String' + res.toString());
        }
    }
}
SplitPDFByTextSearchTest.cls
@isTest
private class SplitPDFByTextSearchTest {

    static testMethod void testSplitPdfUsingPage()
    {
        Test.startTest();
        Test.setMock(HttpCalloutMock.class, new SplitPDFByTextSearchTest.SplitPDFByTextMock());
        SplitPDFByTextSearch splitPdf = new SplitPDFByTextSearch();
        splitPdf.splitPdfByText();
        Test.stopTest();
    }
    
    static testMethod void testSplitPdfUsingPageForCatch()
    {
        Test.startTest();
        SplitPDFByTextSearch splitPdf = new SplitPDFByTextSearch();
        splitPdf.splitPdfByText();
        Test.stopTest();
    }
    
    public class SplitPDFByTextMock implements HttpCalloutMock {
        public HTTPResponse respond(HTTPRequest req) {
            HttpResponse res = new HttpResponse();
            String testBody = '{ "urls": [ "https://pdf-temp-files.s3.us-west-2.amazonaws.com/BV5R88QIPOZPKVO0PE7GYKVBETULHN0Y/invoice-extracted_page1.pdf?X-Amz-Expires=3600&X-Amz-Security-Token=FwoGZXIvYXdzEG8aDKuBm3gDES1tmO7uSSKCATaP8OHW3vpxMhhGOh4utp7hCQzkW5aPbntexlu9TwfIMzceDB6zBv%2F5L4kWTEf8xwOqTckcDrAeDd0oL%2FbWpbPgP68r01OjXwSBbB0TMu3KftSf7VN%2B7NhOl4MXX1HIOSCrvcCUvLOql2DcpVhjFwPJ%2B1JMx6yorVx1zNMLwj98Kuco%2FI6nnwYyKBbvDvsM%2FQpG1PaeKq7jq7kfl4MIt7KkwPs8LyuEMIkaLznfkdpSMFw%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA4NRRSZPHFPQ3GI5S/20230213/us-west-2/s3/aws4_request&X-Amz-Date=20230213T054546Z&X-Amz-SignedHeaders=host&X-Amz-Signature=f3d62c0952b8bc772a77c3d146b7fb55a8e86129b5cb351500f73aead8c1dbfa", "https://pdf-temp-files.s3.us-west-2.amazonaws.com/98TZ8L6PIAX3QWHF4POX4Q9TN6BHPIIH/invoice-extracted_page2.pdf?X-Amz-Expires=3600&X-Amz-Security-Token=FwoGZXIvYXdzEG8aDKuBm3gDES1tmO7uSSKCATaP8OHW3vpxMhhGOh4utp7hCQzkW5aPbntexlu9TwfIMzceDB6zBv%2F5L4kWTEf8xwOqTckcDrAeDd0oL%2FbWpbPgP68r01OjXwSBbB0TMu3KftSf7VN%2B7NhOl4MXX1HIOSCrvcCUvLOql2DcpVhjFwPJ%2B1JMx6yorVx1zNMLwj98Kuco%2FI6nnwYyKBbvDvsM%2FQpG1PaeKq7jq7kfl4MIt7KkwPs8LyuEMIkaLznfkdpSMFw%3D&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIA4NRRSZPHFPQ3GI5S/20230213/us-west-2/s3/aws4_request&X-Amz-Date=20230213T054546Z&X-Amz-SignedHeaders=host&X-Amz-Signature=98d5bf380c9176f410178b1554dae798faad7158f52153bbe1dbe1b86cb91fe7" ], "pageCount": 3, "error": false, "status": 200, "name": "invoice-extracted.pdf", "credits": 105, "duration": 465, "remainingCredits": 1166270 }';
            res.setHeader('Content-Type', 'application/json');
            res.setBody(testBody);
            res.setStatusCode(200);
            return res;
        }
    }
}

PDF.co Web API: the Web API with a set of tools for documents manipulation, data conversion, data extraction, splitting and merging of documents. Includes image recognition, built-in OCR, barcode generation and barcode decoders to decode bar codes from scans, pictures and pdf.

Get your PDF.co API key here!

Download Source Code (.zip)

return to the previous page explore PDF Split endpoint