Tables

Extract tables from a saved extraction

curl --request POST \
  --url https://api.runpulse.com/tables \
  --header 'Content-Type: application/json' \
  --header 'x-api-key: <api-key>' \
  --data '
{
  "extraction_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "tables_config": {
    "merge": false,
    "table_format": "html",
    "charts_to_tables": false
  },
  "async": false
}
'

import requests

url = "https://api.runpulse.com/tables"

payload = {
    "extraction_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
    "tables_config": {
        "merge": False,
        "table_format": "html",
        "charts_to_tables": False
    },
    "async": False
}
headers = {
    "x-api-key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'x-api-key': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    extraction_id: '3c90c3cc-0d44-4b50-8888-8dd25736052a',
    tables_config: {merge: false, table_format: 'html', charts_to_tables: false},
    async: false
  })
};

fetch('https://api.runpulse.com/tables', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.runpulse.com/tables",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'extraction_id' => '3c90c3cc-0d44-4b50-8888-8dd25736052a',
    'tables_config' => [
        'merge' => false,
        'table_format' => 'html',
        'charts_to_tables' => false
    ],
    'async' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "x-api-key: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.runpulse.com/tables"

	payload := strings.NewReader("{\n  \"extraction_id\": \"3c90c3cc-0d44-4b50-8888-8dd25736052a\",\n  \"tables_config\": {\n    \"merge\": false,\n    \"table_format\": \"html\",\n    \"charts_to_tables\": false\n  },\n  \"async\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("x-api-key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.runpulse.com/tables")
  .header("x-api-key", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"extraction_id\": \"3c90c3cc-0d44-4b50-8888-8dd25736052a\",\n  \"tables_config\": {\n    \"merge\": false,\n    \"table_format\": \"html\",\n    \"charts_to_tables\": false\n  },\n  \"async\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.runpulse.com/tables")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["x-api-key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"extraction_id\": \"3c90c3cc-0d44-4b50-8888-8dd25736052a\",\n  \"tables_config\": {\n    \"merge\": false,\n    \"table_format\": \"html\",\n    \"charts_to_tables\": false\n  },\n  \"async\": false\n}"

response = http.request(request)
puts response.read_body

{
  "tables_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "tables_output": {
    "tables": [
      {
        "table_content": "<unknown>",
        "citations": [
          "<string>"
        ],
        "from_chart": true
      }
    ]
  },
  "credits_used": 123,
  "plan_info": {
    "tier": "<string>",
    "total_credits_used": 123,
    "pages_used": 1,
    "note": "<string>"
  }
}

{
  "job_id": "<string>",
  "message": "<string>",
  "queuedAt": "2023-11-07T05:31:56Z",
  "credits_used": 123
}

POST

tables

Extract tables from a saved extraction

curl --request POST \
  --url https://api.runpulse.com/tables \
  --header 'Content-Type: application/json' \
  --header 'x-api-key: <api-key>' \
  --data '
{
  "extraction_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "tables_config": {
    "merge": false,
    "table_format": "html",
    "charts_to_tables": false
  },
  "async": false
}
'

import requests

url = "https://api.runpulse.com/tables"

payload = {
    "extraction_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
    "tables_config": {
        "merge": False,
        "table_format": "html",
        "charts_to_tables": False
    },
    "async": False
}
headers = {
    "x-api-key": "<api-key>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {'x-api-key': '<api-key>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    extraction_id: '3c90c3cc-0d44-4b50-8888-8dd25736052a',
    tables_config: {merge: false, table_format: 'html', charts_to_tables: false},
    async: false
  })
};

fetch('https://api.runpulse.com/tables', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://api.runpulse.com/tables",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'extraction_id' => '3c90c3cc-0d44-4b50-8888-8dd25736052a',
    'tables_config' => [
        'merge' => false,
        'table_format' => 'html',
        'charts_to_tables' => false
    ],
    'async' => false
  ]),
  CURLOPT_HTTPHEADER => [
    "Content-Type: application/json",
    "x-api-key: <api-key>"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://api.runpulse.com/tables"

	payload := strings.NewReader("{\n  \"extraction_id\": \"3c90c3cc-0d44-4b50-8888-8dd25736052a\",\n  \"tables_config\": {\n    \"merge\": false,\n    \"table_format\": \"html\",\n    \"charts_to_tables\": false\n  },\n  \"async\": false\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("x-api-key", "<api-key>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://api.runpulse.com/tables")
  .header("x-api-key", "<api-key>")
  .header("Content-Type", "application/json")
  .body("{\n  \"extraction_id\": \"3c90c3cc-0d44-4b50-8888-8dd25736052a\",\n  \"tables_config\": {\n    \"merge\": false,\n    \"table_format\": \"html\",\n    \"charts_to_tables\": false\n  },\n  \"async\": false\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://api.runpulse.com/tables")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["x-api-key"] = '<api-key>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"extraction_id\": \"3c90c3cc-0d44-4b50-8888-8dd25736052a\",\n  \"tables_config\": {\n    \"merge\": false,\n    \"table_format\": \"html\",\n    \"charts_to_tables\": false\n  },\n  \"async\": false\n}"

response = http.request(request)
puts response.read_body

{
  "tables_id": "3c90c3cc-0d44-4b50-8888-8dd25736052a",
  "tables_output": {
    "tables": [
      {
        "table_content": "<unknown>",
        "citations": [
          "<string>"
        ],
        "from_chart": true
      }
    ]
  },
  "credits_used": 123,
  "plan_info": {
    "tier": "<string>",
    "total_credits_used": 123,
    "pages_used": 1,
    "note": "<string>"
  }
}

{
  "job_id": "<string>",
  "message": "<string>",
  "queuedAt": "2023-11-07T05:31:56Z",
  "credits_used": 123
}

Overview

Pipeline Step 2 (terminal) — Tables requires a prior extraction. This is a terminal step — no further pipeline steps can be chained after it.

Extract structured tables from a saved extraction using Pulse’s semantic and table-structure algorithms. The /tables endpoint detects and reconstructs tables from your document, handling:

Span tables — cells that merge across rows or columns (e.g., “Year Ended December 31” spanning three columns)
Multi-level header hierarchies — nested spans like period → segment → line item
Cross-page tables — tables that continue across page breaks, automatically merged with row-continuity tracking

This is particularly valuable for financial documents (10-Ks, 10-Qs, proxy statements) where span tables encode hierarchy visually rather than explicitly, causing most extraction tools to silently misalign values with the wrong columns.

This endpoint operates on saved extractions (created via /extract with storage enabled, which is the default).

To extract tables from many extractions at once, use Batch Tables.

Async Mode

Set async: true to return immediately with a job ID for polling. See Polling for Results for details.

{
  "extraction_id": "abc123-def456",
  "async": true
}

Request

Request Body

Field	Type	Required	Description
`extraction_id`	string (uuid)	Yes	ID of the saved extraction to process
`tables_config`	object	No	Configuration options for table processing
`async`	boolean	No	If `true`, returns immediately with a `tables_id` for polling. Default: `false`.

Tables Config (`tables_config`)

Field	Type	Default	Description
`merge`	boolean	`false`	Merge tables that continue across pages into a single table
`table_format`	string	`"html"`	Output format for table content. Use `"html"` for an HTML `<table>` string or `"json"` for structured headers and rows.
`charts_to_tables`	boolean	`false`	Convert figures and charts into tables using LLM processing. Resulting tables have `from_chart: true` in the response.

Response

Synchronous Response (200)

Field	Type	Description
`tables_id`	string (uuid)	Unique identifier for this tables result
`tables_output`	object	Contains the extracted tables
`tables_output.tables`	array	List of extracted table objects

Each table object:

Field	Type	Description
`citations`	array of strings	Bounding box table IDs for the table (e.g., `["tbl-1"]` or `["tbl-1", "tbl-2"]` for merged tables)
`table_content`	string	The table content in HTML format
`from_chart`	boolean	Whether the table was derived from a chart/figure rather than a native table

{
  "tables_id": "uuid-123",
  "tables_output": {
    "tables": [
      {
        "citations": ["tbl-1", "tbl-2"],
        "table_content": "<table data-bb-table-id=\"tbl-1\" data-merged-from=\"tbl-1,tbl-2\">...</table>",
        "from_chart": false
      }
    ]
  }
}

Async Response (200)

Field	Type	Description
`tables_id`	string (uuid)	Job ID for polling
`status`	string	`"pending"`
`message`	string	Human-readable status message

{
  "tables_id": "uuid-123",
  "status": "pending",
  "message": "Table processing started. Poll GET /job/{tables_id} for results."
}

Example Usage

Basic Table Extraction

from pulse import Pulse

client = Pulse(api_key="YOUR_API_KEY")

# Step 1: Extract the document
extract_result = client.extract(
    file=open("10k-filing.pdf", "rb")
)

# Step 2: Extract tables
tables_result = client.tables(
    extraction_id=extract_result.extraction_id
)

for table in tables_result.tables_output.tables:
    print(f"Citations: {table.citations}")
    print(f"From chart: {table.from_chart}")
    print(table.table_content)

import { PulseClient } from "pulse-ts-sdk";
import * as fs from "fs";

const client = new PulseClient({
  apiKey: "YOUR_API_KEY",
});

// Step 1: Extract the document
const extractResult = await client.extract({
  file: fs.createReadStream("10k-filing.pdf"),
});

// Step 2: Extract tables
const tablesResult = await client.tables({
  extraction_id: extractResult.extraction_id,
});

for (const table of tablesResult.tables_output.tables) {
  console.log("Citations:", table.citations);
  console.log(table.table_content);
}

# Step 1: Extract the document
curl -X POST https://api.runpulse.com/extract \
  -H "x-api-key: YOUR_API_KEY" \
  -F "file=@10k-filing.pdf"

# Response includes extraction_id: "abc123-..."

# Step 2: Extract tables
curl -X POST https://api.runpulse.com/tables \
  -H "x-api-key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "extraction_id": "abc123-..."
  }'

With Cross-Page Table Merging

tables_result = client.tables(
    extraction_id=extract_result.extraction_id,
    tables_config={
        "merge": True,
        "table_format": "html"
    }
)

curl -X POST https://api.runpulse.com/tables \
  -H "x-api-key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "extraction_id": "abc123-...",
    "tables_config": {
      "merge": true,
      "table_format": "html"
    }
  }'

With Chart-to-Table Conversion

Convert figures and charts into structured tables using LLM processing. Chart-derived tables are marked with from_chart: true in the response.

tables_result = client.tables(
    extraction_id=extract_result.extraction_id,
    tables_config={
        "merge": True,
        "charts_to_tables": True
    }
)

for table in tables_result.tables_output.tables:
    if table.from_chart:
        print("Chart-derived table:")
    print(table.table_content)

curl -X POST https://api.runpulse.com/tables \
  -H "x-api-key: YOUR_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "extraction_id": "abc123-...",
    "tables_config": {
      "merge": true,
      "charts_to_tables": true
    }
  }'

Async Processing

# Start async table extraction
job = client.tables(
    extraction_id=extract_result.extraction_id,
    tables_config={"merge": True},
    async_=True
)

# Poll for results
result = client.jobs.get_job(job.tables_id)  # Repeat until status is "completed"

Error Responses

Status	Error	Description
400	Invalid request	Missing required fields or invalid configuration
401	Unauthorized	Invalid or missing API key
404	Extraction not found	The `extraction_id` doesn’t exist or you don’t have access
429	Rate limit exceeded	Too many requests
500	Processing error	Table processing failed

When to Use Tables vs. Basic Extraction

Basic extraction via /extract already returns tables in the markdown output. Use the /tables endpoint when you need:

Span-aware table parsing — correct handling of merged cells, multi-level headers, and column/row spans
Cross-page table merging — tables that continue across page breaks reconstructed into a single table
Financial document accuracy — SEC filings, annual reports, and other documents where misaligned columns mean wrong data
Dedicated table output — clean HTML tables with citation tracking, separated from the rest of the document content

Authorizations

x-api-key

string

header

required

Body

application/json

Input for the /tables endpoint.

extraction_id

string<uuid>

required

ID of a completed extraction to extract tables from.

tables_config

object

Table extraction configuration. If omitted, defaults are used (merge: false, table_format: "html").

Show child attributes

async

boolean

default:false

When true, returns immediately with a job ID. Poll GET /job/{tables_id} for the result.

Response

Table extraction result (when async=false or omitted).

Result of table extraction.

tables_id

string<uuid>

required

Persisted tables version ID. Can be used to retrieve the tables result later.

tables_output

object

required

The extracted tables data.

Show child attributes

credits_used

number<float> | null

Number of credits consumed by this request. Only present when the organization has the credit billing system enabled.

plan_info

object

Billing tier and cumulative usage information for the calling org, including this tables run.

Show child attributes

Schema Extraction Split Document

⌘I

API Reference

Pipeline Steps

Batch & Pipelines

Forms

Jobs, Results & Webhooks

Account & Usage

Legacy Support

Overview

Async Mode

Request

Request Body

Tables Config (`tables_config`)

Response

Synchronous Response (200)

Async Response (200)

Example Usage

Basic Table Extraction

With Cross-Page Table Merging

With Chart-to-Table Conversion

Async Processing

Error Responses

When to Use Tables vs. Basic Extraction

Authorizations

Body

Response

​Overview

​Async Mode

​Request

​Request Body

​Tables Config (tables_config)

​Response

​Synchronous Response (200)

​Async Response (200)

​Example Usage

​Basic Table Extraction

​With Cross-Page Table Merging

​With Chart-to-Table Conversion

​Async Processing

​Error Responses

​When to Use Tables vs. Basic Extraction

Authorizations

Body

Response

Overview

Async Mode

Request

Request Body

Tables Config (`tables_config`)

Response

Synchronous Response (200)

Async Response (200)

Example Usage

Basic Table Extraction

With Cross-Page Table Merging

With Chart-to-Table Conversion

Async Processing

Error Responses

When to Use Tables vs. Basic Extraction