Module veryfi.documents
View Source
import os
import base64
from typing import Dict, List, Optional
from veryfi._documents.line_items import LineItems
from veryfi._documents.tags import Tags
from veryfi._documents.pdf_split import PDFSplit
from veryfi.client_base import Client
class Documents(Tags, LineItems, PDFSplit):
DEFAULT_CATEGORIES = [
"Advertising & Marketing",
"Automotive",
"Bank Charges & Fees",
"Legal & Professional Services",
"Insurance",
"Meals & Entertainment",
"Office Supplies & Software",
"Taxes & Licenses",
"Travel",
"Rent & Lease",
"Repairs & Maintenance",
"Payroll",
"Utilities",
"Job Supplies",
"Grocery",
]
def __init__(self, client: Client):
self.client = client
LineItems.__init__(self, self.client)
Tags.__init__(self, self.client)
PDFSplit.__init__(self, self.client)
def get_documents(
self,
q: Optional[str] = None,
external_id: Optional[str] = None,
tag: Optional[str] = None,
created_gt: Optional[str] = None,
created_gte: Optional[str] = None,
created_lt: Optional[str] = None,
created_lte: Optional[str] = None,
**kwargs,
) -> Dict:
"""
Get list of documents.
https://docs.veryfi.com/api/receipts-invoices/search-documents/
:param q: Search query
:param external_id: Search by external ID
:param tag: Search by tag
:param created_gt: Search by created date greater than
:param created_gte: Search by created date greater than or equal to
:param created_lt: Search by created date less than
:param created_lte: Search by created date less than or equal to
:param kwargs: Additional query parameters
:return: List of previously processed documents
"""
query_params = {}
if q:
query_params["q"] = q
if external_id:
query_params["external_id"] = external_id
if tag:
query_params["tag"] = tag
if created_gt:
query_params["created__gt"] = created_gt
if created_gte:
query_params["created__gte"] = created_gte
if created_lt:
query_params["created__lt"] = created_lt
if created_lte:
query_params["created__lte"] = created_lte
query_params.update(kwargs)
endpoint_name = "/documents/"
return self.client._request("GET", endpoint_name, {}, query_params)
def get_document(self, document_id: int, **kwargs) -> Dict:
"""
Retrieve document by ID
https://docs.veryfi.com/api/receipts-invoices/get-a-document/
:param document_id: ID of the document you'd like to retrieve
:param kwargs: Additional query parameters
:return: Data extracted from the Document
"""
endpoint_name = f"/documents/{document_id}/"
return self.client._request("GET", endpoint_name, {}, kwargs)
def process_document(
self,
file_path: str,
categories: Optional[List] = None,
delete_after_processing: bool = False,
**kwargs,
) -> Dict:
"""
Process a document and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/process-a-document/
:param file_path: Path on disk to a file to submit for data extraction
:param categories: List of categories Veryfi can use to categorize the document
:param delete_after_processing: Delete this document from Veryfi after data has been extracted
:param kwargs: Additional body parameters
:return: Data extracted from the document
"""
if not categories:
categories = self.DEFAULT_CATEGORIES
file_name = os.path.basename(file_path)
with open(file_path, "rb") as image_file:
base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
request_arguments = {
"file_name": file_name,
"file_data": base64_encoded_string,
"categories": categories,
"auto_delete": delete_after_processing,
}
request_arguments.update(kwargs)
return self.client._request("POST", "/documents/", request_arguments)
def process_document_url(
self,
file_url: Optional[str] = None,
categories: Optional[List[str]] = None,
delete_after_processing: bool = False,
boost_mode: bool = False,
external_id: Optional[str] = None,
max_pages_to_process: Optional[int] = None,
file_urls: Optional[List[str]] = None,
**kwargs,
) -> Dict:
"""Process Document from url and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/process-a-document/
:param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg".
:param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]
:param categories: List of categories to use when categorizing the document
:param delete_after_processing: Delete this document from Veryfi after data has been extracted
:param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1.
:param boost_mode: Flag that tells Veryfi whether boost mode should be enabled. When set to 1, Veryfi will skip data enrichment steps, but will process the document faster. Default value for this flag is 0
:param external_id: Optional custom document identifier. Use this if you would like to assign your own ID to documents
:param kwargs: Additional body parameters
:return: Data extracted from the document.
"""
endpoint_name = "/documents/"
request_arguments = {
"auto_delete": delete_after_processing,
"boost_mode": boost_mode,
"categories": categories,
"external_id": external_id,
"file_url": file_url,
"file_urls": file_urls,
"max_pages_to_process": max_pages_to_process,
}
request_arguments.update(kwargs)
return self.client._request("POST", endpoint_name, request_arguments)
def process_documents_bulk(self, file_urls: List[str]) -> List[int]:
"""
Process multiple documents from urls and extract all the fields from it.
If you want to use this endpoint, please contact support@veryfi.com first. Veryfi's Bulk upload allows you to process multiple Documents.
https://docs.veryfi.com/api/receipts-invoices/bulk-process-multiple-documents/
:param file_urls: List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]
:return: List of document IDs being processed
"""
endpoint_name = "/documents/bulk/"
request_arguments = {"file_urls": file_urls}
return self.client._request("POST", endpoint_name, request_arguments)
def delete_document(self, document_id: int):
"""
Delete Document from Veryfi
https://docs.veryfi.com/api/receipts-invoices/delete-a-document/
:param document_id: ID of the document you'd like to delete
"""
self.client._request("DELETE", f"/documents/{document_id}/", {"id": document_id})
def update_document(self, document_id: int, **kwargs) -> Dict:
"""
Update data for a previously processed document, including almost any field like `vendor`, `date`, `notes` and etc.
https://docs.veryfi.com/api/receipts-invoices/update-a-document/
```veryfi_client.update_document(id, date="2021-01-01", notes="look what I did")```
:param document_id: ID of the document you'd like to update
:param kwargs: fields to update
:return: A document json with updated fields, if fields are writable. Otherwise a document with unchanged fields.
"""
return self.client._request("PUT", f"/documents/{document_id}/", kwargs)
Classes
Documents
class Documents(
client: veryfi.client_base.Client
)
View Source
class Documents(Tags, LineItems, PDFSplit):
DEFAULT_CATEGORIES = [
"Advertising & Marketing",
"Automotive",
"Bank Charges & Fees",
"Legal & Professional Services",
"Insurance",
"Meals & Entertainment",
"Office Supplies & Software",
"Taxes & Licenses",
"Travel",
"Rent & Lease",
"Repairs & Maintenance",
"Payroll",
"Utilities",
"Job Supplies",
"Grocery",
]
def __init__(self, client: Client):
self.client = client
LineItems.__init__(self, self.client)
Tags.__init__(self, self.client)
PDFSplit.__init__(self, self.client)
def get_documents(
self,
q: Optional[str] = None,
external_id: Optional[str] = None,
tag: Optional[str] = None,
created_gt: Optional[str] = None,
created_gte: Optional[str] = None,
created_lt: Optional[str] = None,
created_lte: Optional[str] = None,
**kwargs,
) -> Dict:
"""
Get list of documents.
https://docs.veryfi.com/api/receipts-invoices/search-documents/
:param q: Search query
:param external_id: Search by external ID
:param tag: Search by tag
:param created_gt: Search by created date greater than
:param created_gte: Search by created date greater than or equal to
:param created_lt: Search by created date less than
:param created_lte: Search by created date less than or equal to
:param kwargs: Additional query parameters
:return: List of previously processed documents
"""
query_params = {}
if q:
query_params["q"] = q
if external_id:
query_params["external_id"] = external_id
if tag:
query_params["tag"] = tag
if created_gt:
query_params["created__gt"] = created_gt
if created_gte:
query_params["created__gte"] = created_gte
if created_lt:
query_params["created__lt"] = created_lt
if created_lte:
query_params["created__lte"] = created_lte
query_params.update(kwargs)
endpoint_name = "/documents/"
return self.client._request("GET", endpoint_name, {}, query_params)
def get_document(self, document_id: int, **kwargs) -> Dict:
"""
Retrieve document by ID
https://docs.veryfi.com/api/receipts-invoices/get-a-document/
:param document_id: ID of the document you'd like to retrieve
:param kwargs: Additional query parameters
:return: Data extracted from the Document
"""
endpoint_name = f"/documents/{document_id}/"
return self.client._request("GET", endpoint_name, {}, kwargs)
def process_document(
self,
file_path: str,
categories: Optional[List] = None,
delete_after_processing: bool = False,
**kwargs,
) -> Dict:
"""
Process a document and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/process-a-document/
:param file_path: Path on disk to a file to submit for data extraction
:param categories: List of categories Veryfi can use to categorize the document
:param delete_after_processing: Delete this document from Veryfi after data has been extracted
:param kwargs: Additional body parameters
:return: Data extracted from the document
"""
if not categories:
categories = self.DEFAULT_CATEGORIES
file_name = os.path.basename(file_path)
with open(file_path, "rb") as image_file:
base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
request_arguments = {
"file_name": file_name,
"file_data": base64_encoded_string,
"categories": categories,
"auto_delete": delete_after_processing,
}
request_arguments.update(kwargs)
return self.client._request("POST", "/documents/", request_arguments)
def process_document_url(
self,
file_url: Optional[str] = None,
categories: Optional[List[str]] = None,
delete_after_processing: bool = False,
boost_mode: bool = False,
external_id: Optional[str] = None,
max_pages_to_process: Optional[int] = None,
file_urls: Optional[List[str]] = None,
**kwargs,
) -> Dict:
"""Process Document from url and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/process-a-document/
:param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg".
:param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]
:param categories: List of categories to use when categorizing the document
:param delete_after_processing: Delete this document from Veryfi after data has been extracted
:param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1.
:param boost_mode: Flag that tells Veryfi whether boost mode should be enabled. When set to 1, Veryfi will skip data enrichment steps, but will process the document faster. Default value for this flag is 0
:param external_id: Optional custom document identifier. Use this if you would like to assign your own ID to documents
:param kwargs: Additional body parameters
:return: Data extracted from the document.
"""
endpoint_name = "/documents/"
request_arguments = {
"auto_delete": delete_after_processing,
"boost_mode": boost_mode,
"categories": categories,
"external_id": external_id,
"file_url": file_url,
"file_urls": file_urls,
"max_pages_to_process": max_pages_to_process,
}
request_arguments.update(kwargs)
return self.client._request("POST", endpoint_name, request_arguments)
def process_documents_bulk(self, file_urls: List[str]) -> List[int]:
"""
Process multiple documents from urls and extract all the fields from it.
If you want to use this endpoint, please contact support@veryfi.com first. Veryfi's Bulk upload allows you to process multiple Documents.
https://docs.veryfi.com/api/receipts-invoices/bulk-process-multiple-documents/
:param file_urls: List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]
:return: List of document IDs being processed
"""
endpoint_name = "/documents/bulk/"
request_arguments = {"file_urls": file_urls}
return self.client._request("POST", endpoint_name, request_arguments)
def delete_document(self, document_id: int):
"""
Delete Document from Veryfi
https://docs.veryfi.com/api/receipts-invoices/delete-a-document/
:param document_id: ID of the document you'd like to delete
"""
self.client._request("DELETE", f"/documents/{document_id}/", {"id": document_id})
def update_document(self, document_id: int, **kwargs) -> Dict:
"""
Update data for a previously processed document, including almost any field like `vendor`, `date`, `notes` and etc.
https://docs.veryfi.com/api/receipts-invoices/update-a-document/
```veryfi_client.update_document(id, date="2021-01-01", notes="look what I did")```
:param document_id: ID of the document you'd like to update
:param kwargs: fields to update
:return: A document json with updated fields, if fields are writable. Otherwise a document with unchanged fields.
"""
return self.client._request("PUT", f"/documents/{document_id}/", kwargs)
Ancestors (in MRO)
- veryfi._documents.tags.Tags
- veryfi._documents.line_items.LineItems
- veryfi._documents.pdf_split.PDFSplit
Descendants
- veryfi.client.Client
Class variables
DEFAULT_CATEGORIES
Methods
add_line_item
def add_line_item(
self,
document_id: int,
payload: Dict
) -> Dict
Add a new line item on an existing document.
https://docs.veryfi.com/api/receipts-invoices/create-a-line-item/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to update | None |
payload | None | line item object to add | None |
Returns:
Type | Description |
---|---|
None | Added line item data |
View Source
def add_line_item(self, document_id: int, payload: Dict) -> Dict:
"""
Add a new line item on an existing document.
https://docs.veryfi.com/api/receipts-invoices/create-a-line-item/
:param document_id: ID of the document you'd like to update
:param payload: line item object to add
:return: Added line item data
"""
return self.client._request("POST", f"/documents/{document_id}/line-items/", payload)
add_tag
def add_tag(
self,
document_id,
tag_name
)
Add a new tag on an existing document.
https://docs.veryfi.com/api/receipts-invoices/add-a-tag-to-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to update | None |
tag_name | None | name of the new tag | None |
Returns:
Type | Description |
---|---|
None | Added tag data |
View Source
def add_tag(self, document_id, tag_name):
"""
Add a new tag on an existing document.
https://docs.veryfi.com/api/receipts-invoices/add-a-tag-to-a-document/
:param document_id: ID of the document you'd like to update
:param tag_name: name of the new tag
:return: Added tag data
"""
endpoint_name = f"/documents/{document_id}/tags/"
request_arguments = {"name": tag_name}
return self.client._request("PUT", endpoint_name, request_arguments)
add_tags
def add_tags(
self,
document_id,
tags
)
Add multiple tags on an existing document.
https://docs.veryfi.com/api/receipts-invoices/add-tags-to-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to update | None |
tags | None | array of strings | None |
Returns:
Type | Description |
---|---|
None | Added tags data |
View Source
def add_tags(self, document_id, tags):
"""
Add multiple tags on an existing document.
https://docs.veryfi.com/api/receipts-invoices/add-tags-to-a-document/
:param document_id: ID of the document you'd like to update
:param tags: array of strings
:return: Added tags data
"""
endpoint_name = f"/documents/{document_id}/tags/"
request_arguments = {"tags": tags}
return self.client._request("POST", endpoint_name, request_arguments)
delete_document
def delete_document(
self,
document_id: int
)
Delete Document from Veryfi
https://docs.veryfi.com/api/receipts-invoices/delete-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to delete | None |
View Source
def delete_document(self, document_id: int):
"""
Delete Document from Veryfi
https://docs.veryfi.com/api/receipts-invoices/delete-a-document/
:param document_id: ID of the document you'd like to delete
"""
self.client._request("DELETE", f"/documents/{document_id}/", {"id": document_id})
delete_line_item
def delete_line_item(
self,
document_id: int,
line_item_id: int
)
Delete an existing line item on an existing document.
https://docs.veryfi.com/api/receipts-invoices/delete-a-line-item/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to delete | None |
line_item_id | None | ID of the line item you'd like to delete | None |
View Source
def delete_line_item(self, document_id: int, line_item_id: int):
"""
Delete an existing line item on an existing document.
https://docs.veryfi.com/api/receipts-invoices/delete-a-line-item/
:param document_id: ID of the document you'd like to delete
:param line_item_id: ID of the line item you'd like to delete
"""
self.client._request("DELETE", f"/documents/{document_id}/line-items/{line_item_id}")
delete_line_items
def delete_line_items(
self,
document_id: int
)
Delete all line items on an existing document.
https://docs.veryfi.com/api/receipts-invoices/delete-all-document-line-items/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to delete | None |
View Source
def delete_line_items(self, document_id: int):
"""
Delete all line items on an existing document.
https://docs.veryfi.com/api/receipts-invoices/delete-all-document-line-items/
:param document_id: ID of the document you'd like to delete
"""
self.client._request("DELETE", f"/documents/{document_id}/line-items/")
delete_tag
def delete_tag(
self,
document_id,
tag_id
)
Unlink a tag from the list of tags assigned to a specific Document.
https://docs.veryfi.com/api/receipts-invoices/unlink-a-tag-from-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document | None |
tag_id | None | ID of the tag you'd like to unlink | None |
View Source
def delete_tag(self, document_id, tag_id):
"""
Unlink a tag from the list of tags assigned to a specific Document.
https://docs.veryfi.com/api/receipts-invoices/unlink-a-tag-from-a-document/
:param document_id: ID of the document
:param tag_id: ID of the tag you'd like to unlink
"""
endpoint_name = f"/documents/{document_id}/tags/{tag_id}"
self.client._request("DELETE", endpoint_name, {})
delete_tags
def delete_tags(
self,
document_id
)
Unlink all tags assigned to a specific Document.
https://docs.veryfi.com/api/receipts-invoices/unlink-all-tags-from-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document | None |
View Source
def delete_tags(self, document_id):
"""
Unlink all tags assigned to a specific Document.
https://docs.veryfi.com/api/receipts-invoices/unlink-all-tags-from-a-document/
:param document_id: ID of the document
"""
endpoint_name = f"/documents/{document_id}/tags"
self.client._request("DELETE", endpoint_name, {})
get_document
def get_document(
self,
document_id: int,
**kwargs
) -> Dict
Retrieve document by ID
https://docs.veryfi.com/api/receipts-invoices/get-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to retrieve | None |
kwargs | None | Additional query parameters | None |
Returns:
Type | Description |
---|---|
None | Data extracted from the Document |
View Source
def get_document(self, document_id: int, **kwargs) -> Dict:
"""
Retrieve document by ID
https://docs.veryfi.com/api/receipts-invoices/get-a-document/
:param document_id: ID of the document you'd like to retrieve
:param kwargs: Additional query parameters
:return: Data extracted from the Document
"""
endpoint_name = f"/documents/{document_id}/"
return self.client._request("GET", endpoint_name, {}, kwargs)
get_documents
def get_documents(
self,
q: Optional[str] = None,
external_id: Optional[str] = None,
tag: Optional[str] = None,
created_gt: Optional[str] = None,
created_gte: Optional[str] = None,
created_lt: Optional[str] = None,
created_lte: Optional[str] = None,
**kwargs
) -> Dict
Get list of documents.
https://docs.veryfi.com/api/receipts-invoices/search-documents/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
q | None | Search query | None |
external_id | None | Search by external ID | None |
tag | None | Search by tag | None |
created_gt | None | Search by created date greater than | None |
created_gte | None | Search by created date greater than or equal to | None |
created_lt | None | Search by created date less than | None |
created_lte | None | Search by created date less than or equal to | None |
kwargs | None | Additional query parameters | None |
Returns:
Type | Description |
---|---|
None | List of previously processed documents |
View Source
def get_documents(
self,
q: Optional[str] = None,
external_id: Optional[str] = None,
tag: Optional[str] = None,
created_gt: Optional[str] = None,
created_gte: Optional[str] = None,
created_lt: Optional[str] = None,
created_lte: Optional[str] = None,
**kwargs,
) -> Dict:
"""
Get list of documents.
https://docs.veryfi.com/api/receipts-invoices/search-documents/
:param q: Search query
:param external_id: Search by external ID
:param tag: Search by tag
:param created_gt: Search by created date greater than
:param created_gte: Search by created date greater than or equal to
:param created_lt: Search by created date less than
:param created_lte: Search by created date less than or equal to
:param kwargs: Additional query parameters
:return: List of previously processed documents
"""
query_params = {}
if q:
query_params["q"] = q
if external_id:
query_params["external_id"] = external_id
if tag:
query_params["tag"] = tag
if created_gt:
query_params["created__gt"] = created_gt
if created_gte:
query_params["created__gte"] = created_gte
if created_lt:
query_params["created__lt"] = created_lt
if created_lte:
query_params["created__lte"] = created_lte
query_params.update(kwargs)
endpoint_name = "/documents/"
return self.client._request("GET", endpoint_name, {}, query_params)
get_documents_from_pdf
def get_documents_from_pdf(
self,
document_id: int
)
Get Documents from PDF endpoint allows you to retrieve a collection of previously processed documents.
https://docs.veryfi.com/api/receipts-invoices/get-documents-from-pdf/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to retrieve | None |
Returns:
Type | Description |
---|---|
None | The processed Document response. |
View Source
def get_documents_from_pdf(self, document_id: int):
"""
Get Documents from PDF endpoint allows you to retrieve a collection of previously processed documents.
https://docs.veryfi.com/api/receipts-invoices/get-documents-from-pdf/
:param document_id: ID of the document you'd like to retrieve
:return: The processed Document response.
"""
endpoint_name = f"/documents-set/{document_id}"
return self.client._request("GET", endpoint_name, {})
get_line_item
def get_line_item(
self,
document_id: int,
line_item_id: int
)
Retrieve a line item for existing document by ID.
https://docs.veryfi.com/api/receipts-invoices/get-a-line-item/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to retrieve | None |
line_item_id | None | ID of the line item you'd like to retrieve | None |
Returns:
Type | Description |
---|---|
None | Line item extracted from the document |
View Source
def get_line_item(self, document_id: int, line_item_id: int):
"""
Retrieve a line item for existing document by ID.
https://docs.veryfi.com/api/receipts-invoices/get-a-line-item/
:param document_id: ID of the document you'd like to retrieve
:param line_item_id: ID of the line item you'd like to retrieve
:return: Line item extracted from the document
"""
return self.client._request("GET", f"/documents/{document_id}/line-items/{line_item_id}")
get_line_items
def get_line_items(
self,
document_id: int
)
Retrieve all line items for a document.
https://docs.veryfi.com/api/receipts-invoices/get-document-line-items/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to retrieve | None |
Returns:
Type | Description |
---|---|
None | List of line items extracted from the document |
View Source
def get_line_items(self, document_id: int):
"""
Retrieve all line items for a document.
https://docs.veryfi.com/api/receipts-invoices/get-document-line-items/
:param document_id: ID of the document you'd like to retrieve
:return: List of line items extracted from the document
"""
return self.client._request("GET", f"/documents/{document_id}/line-items/")
get_pdf
def get_pdf(
self,
**kwargs
)
Get a Submitted PDF endpoint allows you to retrieve a collection of previously processed.
https://docs.veryfi.com/api/receipts-invoices/get-submitted-pdf/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
kwargs | None | Additional query parameters. | None |
Returns:
Type | Description |
---|---|
None | The processed Document response. |
View Source
def get_pdf(self, **kwargs):
"""
Get a Submitted PDF endpoint allows you to retrieve a collection of previously processed.
https://docs.veryfi.com/api/receipts-invoices/get-submitted-pdf/
:param kwargs: Additional query parameters.
:return: The processed Document response.
"""
endpoint_name = "/documents-set/"
return self.client._request("GET", endpoint_name, {}, kwargs)
get_tags
def get_tags(
self,
document_id
)
Return all Tag assigned to a specific Document.
https://docs.veryfi.com/api/receipts-invoices/get-document-tags/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to get | None |
Returns:
Type | Description |
---|---|
None | Added tags data |
View Source
def get_tags(self, document_id):
"""
Return all Tag assigned to a specific Document.
https://docs.veryfi.com/api/receipts-invoices/get-document-tags/
:param document_id: ID of the document you'd like to get
:return: Added tags data
"""
endpoint_name = f"/documents/{document_id}/tags"
return self.client._request("GET", endpoint_name, {})
process_document
def process_document(
self,
file_path: str,
categories: Optional[List] = None,
delete_after_processing: bool = False,
**kwargs
) -> Dict
Process a document and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/process-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file_path | None | Path on disk to a file to submit for data extraction | None |
categories | None | List of categories Veryfi can use to categorize the document | None |
delete_after_processing | None | Delete this document from Veryfi after data has been extracted | None |
kwargs | None | Additional body parameters | None |
Returns:
Type | Description |
---|---|
None | Data extracted from the document |
View Source
def process_document(
self,
file_path: str,
categories: Optional[List] = None,
delete_after_processing: bool = False,
**kwargs,
) -> Dict:
"""
Process a document and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/process-a-document/
:param file_path: Path on disk to a file to submit for data extraction
:param categories: List of categories Veryfi can use to categorize the document
:param delete_after_processing: Delete this document from Veryfi after data has been extracted
:param kwargs: Additional body parameters
:return: Data extracted from the document
"""
if not categories:
categories = self.DEFAULT_CATEGORIES
file_name = os.path.basename(file_path)
with open(file_path, "rb") as image_file:
base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
request_arguments = {
"file_name": file_name,
"file_data": base64_encoded_string,
"categories": categories,
"auto_delete": delete_after_processing,
}
request_arguments.update(kwargs)
return self.client._request("POST", "/documents/", request_arguments)
process_document_url
def process_document_url(
self,
file_url: Optional[str] = None,
categories: Optional[List[str]] = None,
delete_after_processing: bool = False,
boost_mode: bool = False,
external_id: Optional[str] = None,
max_pages_to_process: Optional[int] = None,
file_urls: Optional[List[str]] = None,
**kwargs
) -> Dict
Process Document from url and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/process-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file_url | None | Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg". | None |
file_urls | None | Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"] | None |
categories | None | List of categories to use when categorizing the document | None |
delete_after_processing | None | Delete this document from Veryfi after data has been extracted | None |
max_pages_to_process | None | When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1. | None |
boost_mode | None | Flag that tells Veryfi whether boost mode should be enabled. When set to 1, Veryfi will skip data enrichment steps, but will process the document faster. Default value for this flag is 0 | None |
external_id | None | Optional custom document identifier. Use this if you would like to assign your own ID to documents | None |
kwargs | None | Additional body parameters | None |
Returns:
Type | Description |
---|---|
None | Data extracted from the document. |
View Source
def process_document_url(
self,
file_url: Optional[str] = None,
categories: Optional[List[str]] = None,
delete_after_processing: bool = False,
boost_mode: bool = False,
external_id: Optional[str] = None,
max_pages_to_process: Optional[int] = None,
file_urls: Optional[List[str]] = None,
**kwargs,
) -> Dict:
"""Process Document from url and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/process-a-document/
:param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg".
:param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]
:param categories: List of categories to use when categorizing the document
:param delete_after_processing: Delete this document from Veryfi after data has been extracted
:param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1.
:param boost_mode: Flag that tells Veryfi whether boost mode should be enabled. When set to 1, Veryfi will skip data enrichment steps, but will process the document faster. Default value for this flag is 0
:param external_id: Optional custom document identifier. Use this if you would like to assign your own ID to documents
:param kwargs: Additional body parameters
:return: Data extracted from the document.
"""
endpoint_name = "/documents/"
request_arguments = {
"auto_delete": delete_after_processing,
"boost_mode": boost_mode,
"categories": categories,
"external_id": external_id,
"file_url": file_url,
"file_urls": file_urls,
"max_pages_to_process": max_pages_to_process,
}
request_arguments.update(kwargs)
return self.client._request("POST", endpoint_name, request_arguments)
process_documents_bulk
def process_documents_bulk(
self,
file_urls: List[str]
) -> List[int]
Process multiple documents from urls and extract all the fields from it.
If you want to use this endpoint, please contact support@veryfi.com first. Veryfi's Bulk upload allows you to process multiple Documents. https://docs.veryfi.com/api/receipts-invoices/bulk-process-multiple-documents/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file_urls | None | List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"] | None |
Returns:
Type | Description |
---|---|
None | List of document IDs being processed |
View Source
def process_documents_bulk(self, file_urls: List[str]) -> List[int]:
"""
Process multiple documents from urls and extract all the fields from it.
If you want to use this endpoint, please contact support@veryfi.com first. Veryfi's Bulk upload allows you to process multiple Documents.
https://docs.veryfi.com/api/receipts-invoices/bulk-process-multiple-documents/
:param file_urls: List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]
:return: List of document IDs being processed
"""
endpoint_name = "/documents/bulk/"
request_arguments = {"file_urls": file_urls}
return self.client._request("POST", endpoint_name, request_arguments)
replace_tags
def replace_tags(
self,
document_id,
tags
)
Replace multiple tags on an existing document.
https://docs.veryfi.com/api/receipts-invoices/update-a-document/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to update | None |
tags | None | array of strings | None |
Returns:
Type | Description |
---|---|
None | Added tags data |
View Source
def replace_tags(self, document_id, tags):
"""
Replace multiple tags on an existing document.
https://docs.veryfi.com/api/receipts-invoices/update-a-document/
:param document_id: ID of the document you'd like to update
:param tags: array of strings
:return: Added tags data
"""
endpoint_name = f"/documents/{document_id}/"
request_arguments = {"tags": tags}
return self.client._request("PUT", endpoint_name, request_arguments)
split_and_process_pdf
def split_and_process_pdf(
self,
file_path: str,
categories: Optional[List] = None,
**kwargs
) -> Dict
Process a document and extract all the fields from it
https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file_path | None | Path on disk to a file to submit for data extraction | None |
categories | None | List of categories Veryfi can use to categorize the document | None |
kwargs | None | Additional body parameters | None |
Returns:
Type | Description |
---|---|
None | Data extracted from the document |
View Source
def split_and_process_pdf(
self,
file_path: str,
categories: Optional[List] = None,
**kwargs,
) -> Dict:
"""
Process a document and extract all the fields from it
https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/
:param file_path: Path on disk to a file to submit for data extraction
:param categories: List of categories Veryfi can use to categorize the document
:param kwargs: Additional body parameters
:return: Data extracted from the document
"""
endpoint_name = "/documents-set/"
categories = categories or []
file_name = os.path.basename(file_path)
with open(file_path, "rb") as image_file:
base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
request_arguments = {
"file_name": file_name,
"file_data": base64_encoded_string,
"categories": categories,
}
request_arguments.update(kwargs)
return self.client._request("POST", endpoint_name, request_arguments)
split_and_process_pdf_url
def split_and_process_pdf_url(
self,
file_url: Optional[str] = None,
categories: Optional[List[str]] = None,
max_pages_to_process: Optional[int] = None,
file_urls: Optional[List[str]] = None,
**kwargs
) -> Dict
Process Document from url and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
file_url | None | Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg". | None |
file_urls | None | Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"] | None |
categories | None | List of categories to use when categorizing the document | None |
max_pages_to_process | None | When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1. | None |
kwargs | None | Additional body parameters | None |
Returns:
Type | Description |
---|---|
None | Data extracted from the document. |
View Source
def split_and_process_pdf_url(
self,
file_url: Optional[str] = None,
categories: Optional[List[str]] = None,
max_pages_to_process: Optional[int] = None,
file_urls: Optional[List[str]] = None,
**kwargs,
) -> Dict:
"""Process Document from url and extract all the fields from it.
https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/
:param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg".
:param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]
:param categories: List of categories to use when categorizing the document
:param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1.
:param kwargs: Additional body parameters
:return: Data extracted from the document.
"""
endpoint_name = "/documents-set/"
categories = categories or []
request_arguments = {
"categories": categories,
"file_url": file_url,
"file_urls": file_urls,
"max_pages_to_process": max_pages_to_process,
}
request_arguments.update(kwargs)
return self.client._request("POST", endpoint_name, request_arguments)
update_document
def update_document(
self,
document_id: int,
**kwargs
) -> Dict
Update data for a previously processed document, including almost any field like vendor
, date
, notes
and etc.
https://docs.veryfi.com/api/receipts-invoices/update-a-document/
veryfi_client.update_document(id, date="2021-01-01", notes="look what I did")
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to update | None |
kwargs | None | fields to update | None |
Returns:
Type | Description |
---|---|
None | A document json with updated fields, if fields are writable. Otherwise a document with unchanged fields. |
View Source
def update_document(self, document_id: int, **kwargs) -> Dict:
"""
Update data for a previously processed document, including almost any field like `vendor`, `date`, `notes` and etc.
https://docs.veryfi.com/api/receipts-invoices/update-a-document/
```veryfi_client.update_document(id, date="2021-01-01", notes="look what I did")```
:param document_id: ID of the document you'd like to update
:param kwargs: fields to update
:return: A document json with updated fields, if fields are writable. Otherwise a document with unchanged fields.
"""
return self.client._request("PUT", f"/documents/{document_id}/", kwargs)
update_line_item
def update_line_item(
self,
document_id: int,
line_item_id: int,
payload: Dict
) -> Dict
Update an existing line item on an existing document.
https://docs.veryfi.com/api/receipts-invoices/update-a-line-item/
Parameters:
Name | Type | Description | Default |
---|---|---|---|
document_id | None | ID of the document you'd like to update | None |
line_item_id | None | ID of the line item you'd like to update | None |
payload | None | line item object to update | None |
Returns:
Type | Description |
---|---|
None | Line item data with updated fields, if fields are writable. Otherwise line item data with unchanged fields. |
View Source
def update_line_item(self, document_id: int, line_item_id: int, payload: Dict) -> Dict:
"""
Update an existing line item on an existing document.
https://docs.veryfi.com/api/receipts-invoices/update-a-line-item/
:param document_id: ID of the document you'd like to update
:param line_item_id: ID of the line item you'd like to update
:param payload: line item object to update
:return: Line item data with updated fields, if fields are writable. Otherwise line item data with unchanged fields.
"""
return self.client._request(
"PUT", f"/documents/{document_id}/line-items/{line_item_id}", payload
)