Skip to main content

Prerequisites

Before you begin, make sure you have:

API Key

Get your API key from the Console

SDK or HTTP Client

Install the official SDK or use curl/fetch

Step 1: Install the SDK

pip install pulse-python-sdk

Step 2: Basic Document Extraction

Extract content from a document URL:
from pulse import Pulse

client = Pulse(api_key="YOUR_API_KEY")

# Extract from a URL
response = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf",
    extract_figure=True,
    return_html=True
)

print(f"Markdown: {response.markdown}")
print(f"Extraction ID: {response.extraction_id}")

Step 3: Uploading Files Directly

/extract accepts file uploads directly via multipart/form-data:
from pulse import Pulse

client = Pulse(api_key="YOUR_API_KEY")

# Upload and extract a local file
with open("invoice.pdf", "rb") as f:
    response = client.extract(
        file=f,
        pages="1-5",  # 1-indexed page range
        extract_figure=True
    )

print(f"Extraction ID: {response.extraction_id}")
print(f"Markdown: {response.markdown}")
Use file for direct uploads or file_url when you have a public/presigned URL.

Step 4: Asynchronous Processing for Large Documents

For documents over 50 pages or when processing multiple files, use async: true on the /extract endpoint:
import time
from pulse import Pulse

client = Pulse(api_key="YOUR_API_KEY")

# Submit async extraction
submission = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf",
    extract_figure=True,
    async_=True  # Note: async_ in Python (async is reserved)
)

print(f"Job submitted: {submission.job_id}")

# Poll for completion
job_id = submission.job_id
while True:
    job_status = client.jobs.get_job(job_id=job_id)
    print(f"Status: {job_status.status}")
    
    if job_status.status == "completed":
        print("Job completed!")
        print(f"Result: {job_status.result}")
        break
    elif job_status.status in ["failed", "canceled"]:
        print(f"Job ended: {job_status.status}")
        break
    
    time.sleep(2)
POST /extract_async is deprecated. Use POST /extract with async: true instead. See Async Processing for details.

Common Use Cases

Extract structured data from invoices:
schema = {
    "type": "object",
    "properties": {
        "invoice_number": {"type": "string"},
        "vendor_name": {"type": "string"},
        "total": {"type": "number"},
        "line_items": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "description": {"type": "string"},
                    "amount": {"type": "number"}
                }
            }
        }
    }
}

# Step 1: Extract the document
response = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf"
)

# Step 2: Apply schema via /schema endpoint
schema_result = client.schema(
    extraction_id=response.extraction_id,
    schema_config={"schema": schema}
)
Extract key terms from contracts:
schema = {
    "type": "object",
    "properties": {
        "parties": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "name": {"type": "string"},
                    "role": {"type": "string"}
                }
            }
        },
        "effective_date": {"type": "string"},
        "payment_terms": {"type": "string"}
    }
}

# Step 1: Extract
response = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf"
)

# Step 2: Apply schema
schema_result = client.schema(
    extraction_id=response.extraction_id,
    schema_config={"schema": schema}
)
Extract structured content from academic papers:
schema = {
    "type": "object",
    "properties": {
        "title": {"type": "string"},
        "authors": {"type": "array", "items": {"type": "string"}},
        "abstract": {"type": "string"},
        "keywords": {"type": "array", "items": {"type": "string"}}
    }
}

# Step 1: Extract
response = client.extract(
    file_url="https://www.impact-bank.com/user/file/dummy_statement.pdf"
)

# Step 2: Apply schema
schema_result = client.schema(
    extraction_id=response.extraction_id,
    schema_config={"schema": schema}
)

Next Steps