Prerequisites
Before you begin, make sure you have:
API Key Get your API key from the Console
SDK or HTTP Client Install the official SDK or use curl/fetch
Step 1: Install the SDK
pip install pulse-python-sdk
Extract content from a document URL:
from pulse import Pulse
client = Pulse( api_key = "YOUR_API_KEY" )
# Extract from a URL
response = client.extract(
file_url = "https://www.impact-bank.com/user/file/dummy_statement.pdf" ,
extract_figure = True ,
return_html = True
)
print ( f "Job ID: { response.job_id } " )
print ( f "Content: { response.content[: 200 ] } ..." )
Step 3: Uploading Files Directly
Both /extract and /extract_async accept file uploads directly via multipart/form-data:
from pulse import Pulse
client = Pulse( api_key = "YOUR_API_KEY" )
# Upload and extract a local file
with open ( "invoice.pdf" , "rb" ) as f:
response = client.extract(
file = f,
pages = "1-5" , # 1-indexed page range
extract_figure = True
)
print ( f "Job ID: { response.job_id } " )
print ( f "Content: { response.content } " )
Use file for direct uploads or file_url when you have a public/presigned URL. Both work with /extract and /extract_async.
Step 4: Asynchronous Processing for Large Documents
For documents over 50 pages or when processing multiple files, use async extraction:
import time
from pulse import Pulse
client = Pulse( api_key = "YOUR_API_KEY" )
# Submit async extraction
submission = client.extract_async(
file_url = "https://www.impact-bank.com/user/file/dummy_statement.pdf" ,
extract_figure = True
)
print ( f "Job submitted: { submission.job_id } " )
# Poll for completion
job_id = submission.job_id
while True :
job_status = client.jobs.get_job( job_id = job_id)
print ( f "Status: { job_status.status } " )
if job_status.status == "completed" :
print ( "Job completed!" )
print ( f "Result: { job_status.result } " )
break
elif job_status.status in [ "failed" , "canceled" ]:
print ( f "Job ended: { job_status.status } " )
break
time.sleep( 2 )
Common Use Cases
Extract structured data from invoices: schema = {
"type" : "object" ,
"properties" : {
"invoice_number" : { "type" : "string" },
"vendor_name" : { "type" : "string" },
"total" : { "type" : "number" },
"line_items" : {
"type" : "array" ,
"items" : {
"type" : "object" ,
"properties" : {
"description" : { "type" : "string" },
"amount" : { "type" : "number" }
}
}
}
}
}
response = client.extract(
file_url = "https://www.impact-bank.com/user/file/dummy_statement.pdf" ,
structured_output = { "schema" : schema}
)
Extract key terms from contracts: schema = {
"type" : "object" ,
"properties" : {
"parties" : {
"type" : "array" ,
"items" : {
"type" : "object" ,
"properties" : {
"name" : { "type" : "string" },
"role" : { "type" : "string" }
}
}
},
"effective_date" : { "type" : "string" },
"payment_terms" : { "type" : "string" }
}
}
response = client.extract(
file_url = "https://www.impact-bank.com/user/file/dummy_statement.pdf" ,
structured_output = { "schema" : schema}
)
Research Paper Processing
Extract structured content from academic papers: schema = {
"type" : "object" ,
"properties" : {
"title" : { "type" : "string" },
"authors" : { "type" : "array" , "items" : { "type" : "string" }},
"abstract" : { "type" : "string" },
"keywords" : { "type" : "array" , "items" : { "type" : "string" }}
}
}
response = client.extract(
file_url = "https://www.impact-bank.com/user/file/dummy_statement.pdf" ,
structured_output = { "schema" : schema}
)
Next Steps