Skip to main content

Prerequisites

Before you begin, make sure you have:

API Key

Get your API key from the Console

SDK or HTTP Client

Install the official SDK or use curl/fetch

Step 1: Install the SDK

pip install pulse-python-sdk

Step 2: Basic Document Extraction

Extract content from a document URL:
from pulse import Pulse

client = Pulse(api_key="YOUR_API_KEY")

# Extract from a URL
response = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf",
    extract_figure=True,
    return_html=True
)

print(f"Job ID: {response.job_id}")
print(f"Content: {response.content[:200]}...")

Step 3: Uploading Files Directly

Both /extract and /extract_async accept file uploads directly via multipart/form-data:
from pulse import Pulse

client = Pulse(api_key="YOUR_API_KEY")

# Upload and extract a local file
with open("invoice.pdf", "rb") as f:
    response = client.extract(
        file=f,
        pages="1-5",  # 1-indexed page range
        extract_figure=True
    )

print(f"Job ID: {response.job_id}")
print(f"Content: {response.content}")
Use file for direct uploads or file_url when you have a public/presigned URL. Both work with /extract and /extract_async.

Step 4: Asynchronous Processing for Large Documents

For documents over 50 pages or when processing multiple files, use async extraction:
import time
from pulse import Pulse

client = Pulse(api_key="YOUR_API_KEY")

# Submit async extraction
submission = client.extract_async(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf",
    extract_figure=True
)

print(f"Job submitted: {submission.job_id}")

# Poll for completion
job_id = submission.job_id
while True:
    job_status = client.jobs.get_job(job_id=job_id)
    print(f"Status: {job_status.status}")
    
    if job_status.status == "completed":
        print("Job completed!")
        print(f"Result: {job_status.result}")
        break
    elif job_status.status in ["failed", "canceled"]:
        print(f"Job ended: {job_status.status}")
        break
    
    time.sleep(2)

Common Use Cases

Extract structured data from invoices:
schema = {
    "type": "object",
    "properties": {
        "invoice_number": {"type": "string"},
        "vendor_name": {"type": "string"},
        "total": {"type": "number"},
        "line_items": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "description": {"type": "string"},
                    "amount": {"type": "number"}
                }
            }
        }
    }
}

response = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf",
    structured_output={"schema": schema}
)
Extract key terms from contracts:
schema = {
    "type": "object",
    "properties": {
        "parties": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "role": {"type": "string"}
                }
            }
        },
        "effective_date": {"type": "string"},
        "payment_terms": {"type": "string"}
    }
}

response = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf",
    structured_output={"schema": schema}
)
Extract structured content from academic papers:
schema = {
    "type": "object",
    "properties": {
        "title": {"type": "string"},
        "authors": {"type": "array", "items": {"type": "string"}},
        "abstract": {"type": "string"},
        "keywords": {"type": "array", "items": {"type": "string"}}
    }
}

response = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf",
    structured_output={"schema": schema}
)

Next Steps