Skip to content

Module veryfi.documents

View Source
import os

import base64

from typing import Dict, List, Optional

from veryfi._documents.line_items import LineItems

from veryfi._documents.tags import Tags

from veryfi._documents.pdf_split import PDFSplit

from veryfi.client_base import Client

class Documents(Tags, LineItems, PDFSplit):

    DEFAULT_CATEGORIES = [

        "Advertising & Marketing",

        "Automotive",

        "Bank Charges & Fees",

        "Legal & Professional Services",

        "Insurance",

        "Meals & Entertainment",

        "Office Supplies & Software",

        "Taxes & Licenses",

        "Travel",

        "Rent & Lease",

        "Repairs & Maintenance",

        "Payroll",

        "Utilities",

        "Job Supplies",

        "Grocery",

    ]

    def __init__(self, client: Client):

        self.client = client

        LineItems.__init__(self, self.client)

        Tags.__init__(self, self.client)

        PDFSplit.__init__(self, self.client)

    def get_documents(

        self,

        q: Optional[str] = None,

        external_id: Optional[str] = None,

        tag: Optional[str] = None,

        created_gt: Optional[str] = None,

        created_gte: Optional[str] = None,

        created_lt: Optional[str] = None,

        created_lte: Optional[str] = None,

        **kwargs,

    ) -> Dict:

        """

        Get list of documents.

        https://docs.veryfi.com/api/receipts-invoices/search-documents/

        :param q: Search query

        :param external_id: Search by external ID

        :param tag: Search by tag

        :param created_gt: Search by created date greater than

        :param created_gte: Search by created date greater than or equal to

        :param created_lt: Search by created date less than

        :param created_lte: Search by created date less than or equal to

        :param kwargs: Additional query parameters

        :return: List of previously processed documents

        """

        query_params = {}

        if q:

            query_params["q"] = q

        if external_id:

            query_params["external_id"] = external_id

        if tag:

            query_params["tag"] = tag

        if created_gt:

            query_params["created__gt"] = created_gt

        if created_gte:

            query_params["created__gte"] = created_gte

        if created_lt:

            query_params["created__lt"] = created_lt

        if created_lte:

            query_params["created__lte"] = created_lte

        query_params.update(kwargs)

        endpoint_name = "/documents/"

        return self.client._request("GET", endpoint_name, {}, query_params)

    def get_document(self, document_id: int, **kwargs) -> Dict:

        """

        Retrieve document by ID

        https://docs.veryfi.com/api/receipts-invoices/get-a-document/

        :param document_id: ID of the document you'd like to retrieve

        :param kwargs: Additional query parameters

        :return: Data extracted from the Document

        """

        endpoint_name = f"/documents/{document_id}/"

        return self.client._request("GET", endpoint_name, {}, kwargs)

    def process_document(

        self,

        file_path: str,

        categories: Optional[List] = None,

        delete_after_processing: bool = False,

        **kwargs,

    ) -> Dict:

        """

        Process a document and extract all the fields from it.

        https://docs.veryfi.com/api/receipts-invoices/process-a-document/

        :param file_path: Path on disk to a file to submit for data extraction

        :param categories: List of categories Veryfi can use to categorize the document

        :param delete_after_processing: Delete this document from Veryfi after data has been extracted

        :param kwargs: Additional body parameters

        :return: Data extracted from the document

        """

        if not categories:

            categories = self.DEFAULT_CATEGORIES

        file_name = os.path.basename(file_path)

        with open(file_path, "rb") as image_file:

            base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

        request_arguments = {

            "file_name": file_name,

            "file_data": base64_encoded_string,

            "categories": categories,

            "auto_delete": delete_after_processing,

        }

        request_arguments.update(kwargs)

        return self.client._request("POST", "/documents/", request_arguments)

    def process_document_url(

        self,

        file_url: Optional[str] = None,

        categories: Optional[List[str]] = None,

        delete_after_processing: bool = False,

        boost_mode: bool = False,

        external_id: Optional[str] = None,

        max_pages_to_process: Optional[int] = None,

        file_urls: Optional[List[str]] = None,

        **kwargs,

    ) -> Dict:

        """Process Document from url and extract all the fields from it.

        https://docs.veryfi.com/api/receipts-invoices/process-a-document/

        :param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg".

        :param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]

        :param categories: List of categories to use when categorizing the document

        :param delete_after_processing: Delete this document from Veryfi after data has been extracted

        :param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1.

        :param boost_mode: Flag that tells Veryfi whether boost mode should be enabled. When set to 1, Veryfi will skip data enrichment steps, but will process the document faster. Default value for this flag is 0

        :param external_id: Optional custom document identifier. Use this if you would like to assign your own ID to documents

        :param kwargs: Additional body parameters

        :return: Data extracted from the document.

        """

        endpoint_name = "/documents/"

        request_arguments = {

            "auto_delete": delete_after_processing,

            "boost_mode": boost_mode,

            "categories": categories,

            "external_id": external_id,

            "file_url": file_url,

            "file_urls": file_urls,

            "max_pages_to_process": max_pages_to_process,

        }

        request_arguments.update(kwargs)

        return self.client._request("POST", endpoint_name, request_arguments)

    def process_documents_bulk(self, file_urls: List[str]) -> List[int]:

        """

        Process multiple documents from urls and extract all the fields from it.

        If you want to use this endpoint, please contact support@veryfi.com first. Veryfi's Bulk upload allows you to process multiple Documents.

        https://docs.veryfi.com/api/receipts-invoices/bulk-process-multiple-documents/

        :param file_urls: List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]

        :return: List of document IDs being processed

        """

        endpoint_name = "/documents/bulk/"

        request_arguments = {"file_urls": file_urls}

        return self.client._request("POST", endpoint_name, request_arguments)

    def delete_document(self, document_id: int):

        """

        Delete Document from Veryfi

        https://docs.veryfi.com/api/receipts-invoices/delete-a-document/

        :param document_id: ID of the document you'd like to delete

        """

        self.client._request("DELETE", f"/documents/{document_id}/", {"id": document_id})

    def update_document(self, document_id: int, **kwargs) -> Dict:

        """

        Update data for a previously processed document, including almost any field like `vendor`, `date`, `notes` and etc.

        https://docs.veryfi.com/api/receipts-invoices/update-a-document/

        ```veryfi_client.update_document(id, date="2021-01-01", notes="look what I did")```

        :param document_id: ID of the document you'd like to update

        :param kwargs: fields to update

        :return: A document json with updated fields, if fields are writable. Otherwise a document with unchanged fields.

        """

        return self.client._request("PUT", f"/documents/{document_id}/", kwargs)

Classes

Documents

class Documents(
    client: veryfi.client_base.Client
)
View Source
class Documents(Tags, LineItems, PDFSplit):

    DEFAULT_CATEGORIES = [

        "Advertising & Marketing",

        "Automotive",

        "Bank Charges & Fees",

        "Legal & Professional Services",

        "Insurance",

        "Meals & Entertainment",

        "Office Supplies & Software",

        "Taxes & Licenses",

        "Travel",

        "Rent & Lease",

        "Repairs & Maintenance",

        "Payroll",

        "Utilities",

        "Job Supplies",

        "Grocery",

    ]

    def __init__(self, client: Client):

        self.client = client

        LineItems.__init__(self, self.client)

        Tags.__init__(self, self.client)

        PDFSplit.__init__(self, self.client)

    def get_documents(

        self,

        q: Optional[str] = None,

        external_id: Optional[str] = None,

        tag: Optional[str] = None,

        created_gt: Optional[str] = None,

        created_gte: Optional[str] = None,

        created_lt: Optional[str] = None,

        created_lte: Optional[str] = None,

        **kwargs,

    ) -> Dict:

        """

        Get list of documents.

        https://docs.veryfi.com/api/receipts-invoices/search-documents/

        :param q: Search query

        :param external_id: Search by external ID

        :param tag: Search by tag

        :param created_gt: Search by created date greater than

        :param created_gte: Search by created date greater than or equal to

        :param created_lt: Search by created date less than

        :param created_lte: Search by created date less than or equal to

        :param kwargs: Additional query parameters

        :return: List of previously processed documents

        """

        query_params = {}

        if q:

            query_params["q"] = q

        if external_id:

            query_params["external_id"] = external_id

        if tag:

            query_params["tag"] = tag

        if created_gt:

            query_params["created__gt"] = created_gt

        if created_gte:

            query_params["created__gte"] = created_gte

        if created_lt:

            query_params["created__lt"] = created_lt

        if created_lte:

            query_params["created__lte"] = created_lte

        query_params.update(kwargs)

        endpoint_name = "/documents/"

        return self.client._request("GET", endpoint_name, {}, query_params)

    def get_document(self, document_id: int, **kwargs) -> Dict:

        """

        Retrieve document by ID

        https://docs.veryfi.com/api/receipts-invoices/get-a-document/

        :param document_id: ID of the document you'd like to retrieve

        :param kwargs: Additional query parameters

        :return: Data extracted from the Document

        """

        endpoint_name = f"/documents/{document_id}/"

        return self.client._request("GET", endpoint_name, {}, kwargs)

    def process_document(

        self,

        file_path: str,

        categories: Optional[List] = None,

        delete_after_processing: bool = False,

        **kwargs,

    ) -> Dict:

        """

        Process a document and extract all the fields from it.

        https://docs.veryfi.com/api/receipts-invoices/process-a-document/

        :param file_path: Path on disk to a file to submit for data extraction

        :param categories: List of categories Veryfi can use to categorize the document

        :param delete_after_processing: Delete this document from Veryfi after data has been extracted

        :param kwargs: Additional body parameters

        :return: Data extracted from the document

        """

        if not categories:

            categories = self.DEFAULT_CATEGORIES

        file_name = os.path.basename(file_path)

        with open(file_path, "rb") as image_file:

            base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

        request_arguments = {

            "file_name": file_name,

            "file_data": base64_encoded_string,

            "categories": categories,

            "auto_delete": delete_after_processing,

        }

        request_arguments.update(kwargs)

        return self.client._request("POST", "/documents/", request_arguments)

    def process_document_url(

        self,

        file_url: Optional[str] = None,

        categories: Optional[List[str]] = None,

        delete_after_processing: bool = False,

        boost_mode: bool = False,

        external_id: Optional[str] = None,

        max_pages_to_process: Optional[int] = None,

        file_urls: Optional[List[str]] = None,

        **kwargs,

    ) -> Dict:

        """Process Document from url and extract all the fields from it.

        https://docs.veryfi.com/api/receipts-invoices/process-a-document/

        :param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg".

        :param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]

        :param categories: List of categories to use when categorizing the document

        :param delete_after_processing: Delete this document from Veryfi after data has been extracted

        :param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1.

        :param boost_mode: Flag that tells Veryfi whether boost mode should be enabled. When set to 1, Veryfi will skip data enrichment steps, but will process the document faster. Default value for this flag is 0

        :param external_id: Optional custom document identifier. Use this if you would like to assign your own ID to documents

        :param kwargs: Additional body parameters

        :return: Data extracted from the document.

        """

        endpoint_name = "/documents/"

        request_arguments = {

            "auto_delete": delete_after_processing,

            "boost_mode": boost_mode,

            "categories": categories,

            "external_id": external_id,

            "file_url": file_url,

            "file_urls": file_urls,

            "max_pages_to_process": max_pages_to_process,

        }

        request_arguments.update(kwargs)

        return self.client._request("POST", endpoint_name, request_arguments)

    def process_documents_bulk(self, file_urls: List[str]) -> List[int]:

        """

        Process multiple documents from urls and extract all the fields from it.

        If you want to use this endpoint, please contact support@veryfi.com first. Veryfi's Bulk upload allows you to process multiple Documents.

        https://docs.veryfi.com/api/receipts-invoices/bulk-process-multiple-documents/

        :param file_urls: List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]

        :return: List of document IDs being processed

        """

        endpoint_name = "/documents/bulk/"

        request_arguments = {"file_urls": file_urls}

        return self.client._request("POST", endpoint_name, request_arguments)

    def delete_document(self, document_id: int):

        """

        Delete Document from Veryfi

        https://docs.veryfi.com/api/receipts-invoices/delete-a-document/

        :param document_id: ID of the document you'd like to delete

        """

        self.client._request("DELETE", f"/documents/{document_id}/", {"id": document_id})

    def update_document(self, document_id: int, **kwargs) -> Dict:

        """

        Update data for a previously processed document, including almost any field like `vendor`, `date`, `notes` and etc.

        https://docs.veryfi.com/api/receipts-invoices/update-a-document/

        ```veryfi_client.update_document(id, date="2021-01-01", notes="look what I did")```

        :param document_id: ID of the document you'd like to update

        :param kwargs: fields to update

        :return: A document json with updated fields, if fields are writable. Otherwise a document with unchanged fields.

        """

        return self.client._request("PUT", f"/documents/{document_id}/", kwargs)

Ancestors (in MRO)

  • veryfi._documents.tags.Tags
  • veryfi._documents.line_items.LineItems
  • veryfi._documents.pdf_split.PDFSplit

Descendants

  • veryfi.client.Client

Class variables

DEFAULT_CATEGORIES

Methods

add_line_item

def add_line_item(
    self,
    document_id: int,
    payload: Dict
) -> Dict

Add a new line item on an existing document.

https://docs.veryfi.com/api/receipts-invoices/create-a-line-item/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to update None
payload None line item object to add None

Returns:

Type Description
None Added line item data
View Source
    def add_line_item(self, document_id: int, payload: Dict) -> Dict:

        """

        Add a new line item on an existing document.

        https://docs.veryfi.com/api/receipts-invoices/create-a-line-item/

        :param document_id: ID of the document you'd like to update

        :param payload: line item object to add

        :return: Added line item data

        """

        return self.client._request("POST", f"/documents/{document_id}/line-items/", payload)

add_tag

def add_tag(
    self,
    document_id,
    tag_name
)

Add a new tag on an existing document.

https://docs.veryfi.com/api/receipts-invoices/add-a-tag-to-a-document/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to update None
tag_name None name of the new tag None

Returns:

Type Description
None Added tag data
View Source
    def add_tag(self, document_id, tag_name):

        """

        Add a new tag on an existing document.

        https://docs.veryfi.com/api/receipts-invoices/add-a-tag-to-a-document/

        :param document_id: ID of the document you'd like to update

        :param tag_name: name of the new tag

        :return: Added tag data

        """

        endpoint_name = f"/documents/{document_id}/tags/"

        request_arguments = {"name": tag_name}

        return self.client._request("PUT", endpoint_name, request_arguments)

add_tags

def add_tags(
    self,
    document_id,
    tags
)

Add multiple tags on an existing document.

https://docs.veryfi.com/api/receipts-invoices/add-tags-to-a-document/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to update None
tags None array of strings None

Returns:

Type Description
None Added tags data
View Source
    def add_tags(self, document_id, tags):

        """

        Add multiple tags on an existing document.

        https://docs.veryfi.com/api/receipts-invoices/add-tags-to-a-document/

        :param document_id: ID of the document you'd like to update

        :param tags: array of strings

        :return: Added tags data

        """

        endpoint_name = f"/documents/{document_id}/tags/"

        request_arguments = {"tags": tags}

        return self.client._request("POST", endpoint_name, request_arguments)

delete_document

def delete_document(
    self,
    document_id: int
)

Delete Document from Veryfi

https://docs.veryfi.com/api/receipts-invoices/delete-a-document/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to delete None
View Source
    def delete_document(self, document_id: int):

        """

        Delete Document from Veryfi

        https://docs.veryfi.com/api/receipts-invoices/delete-a-document/

        :param document_id: ID of the document you'd like to delete

        """

        self.client._request("DELETE", f"/documents/{document_id}/", {"id": document_id})

delete_line_item

def delete_line_item(
    self,
    document_id: int,
    line_item_id: int
)

Delete an existing line item on an existing document.

https://docs.veryfi.com/api/receipts-invoices/delete-a-line-item/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to delete None
line_item_id None ID of the line item you'd like to delete None
View Source
    def delete_line_item(self, document_id: int, line_item_id: int):

        """

        Delete an existing line item on an existing document.

        https://docs.veryfi.com/api/receipts-invoices/delete-a-line-item/

        :param document_id: ID of the document you'd like to delete

        :param line_item_id: ID of the line item you'd like to delete

        """

        self.client._request("DELETE", f"/documents/{document_id}/line-items/{line_item_id}")

delete_line_items

def delete_line_items(
    self,
    document_id: int
)

Delete all line items on an existing document.

https://docs.veryfi.com/api/receipts-invoices/delete-all-document-line-items/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to delete None
View Source
    def delete_line_items(self, document_id: int):

        """

        Delete all line items on an existing document.

        https://docs.veryfi.com/api/receipts-invoices/delete-all-document-line-items/

        :param document_id: ID of the document you'd like to delete

        """

        self.client._request("DELETE", f"/documents/{document_id}/line-items/")

delete_tag

def delete_tag(
    self,
    document_id,
    tag_id
)

Unlink a tag from the list of tags assigned to a specific Document.

https://docs.veryfi.com/api/receipts-invoices/unlink-a-tag-from-a-document/

Parameters:

Name Type Description Default
document_id None ID of the document None
tag_id None ID of the tag you'd like to unlink None
View Source
    def delete_tag(self, document_id, tag_id):

        """

        Unlink a tag from the list of tags assigned to a specific Document.

        https://docs.veryfi.com/api/receipts-invoices/unlink-a-tag-from-a-document/

        :param document_id: ID of the document

        :param tag_id: ID of the tag you'd like to unlink

        """

        endpoint_name = f"/documents/{document_id}/tags/{tag_id}"

        self.client._request("DELETE", endpoint_name, {})

delete_tags

def delete_tags(
    self,
    document_id
)

Unlink all tags assigned to a specific Document.

https://docs.veryfi.com/api/receipts-invoices/unlink-all-tags-from-a-document/

Parameters:

Name Type Description Default
document_id None ID of the document None
View Source
    def delete_tags(self, document_id):

        """

        Unlink all tags assigned to a specific Document.

        https://docs.veryfi.com/api/receipts-invoices/unlink-all-tags-from-a-document/

        :param document_id: ID of the document

        """

        endpoint_name = f"/documents/{document_id}/tags"

        self.client._request("DELETE", endpoint_name, {})

get_document

def get_document(
    self,
    document_id: int,
    **kwargs
) -> Dict

Retrieve document by ID

https://docs.veryfi.com/api/receipts-invoices/get-a-document/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to retrieve None
kwargs None Additional query parameters None

Returns:

Type Description
None Data extracted from the Document
View Source
    def get_document(self, document_id: int, **kwargs) -> Dict:

        """

        Retrieve document by ID

        https://docs.veryfi.com/api/receipts-invoices/get-a-document/

        :param document_id: ID of the document you'd like to retrieve

        :param kwargs: Additional query parameters

        :return: Data extracted from the Document

        """

        endpoint_name = f"/documents/{document_id}/"

        return self.client._request("GET", endpoint_name, {}, kwargs)

get_documents

def get_documents(
    self,
    q: Optional[str] = None,
    external_id: Optional[str] = None,
    tag: Optional[str] = None,
    created_gt: Optional[str] = None,
    created_gte: Optional[str] = None,
    created_lt: Optional[str] = None,
    created_lte: Optional[str] = None,
    **kwargs
) -> Dict

Get list of documents.

https://docs.veryfi.com/api/receipts-invoices/search-documents/

Parameters:

Name Type Description Default
q None Search query None
external_id None Search by external ID None
tag None Search by tag None
created_gt None Search by created date greater than None
created_gte None Search by created date greater than or equal to None
created_lt None Search by created date less than None
created_lte None Search by created date less than or equal to None
kwargs None Additional query parameters None

Returns:

Type Description
None List of previously processed documents
View Source
    def get_documents(

        self,

        q: Optional[str] = None,

        external_id: Optional[str] = None,

        tag: Optional[str] = None,

        created_gt: Optional[str] = None,

        created_gte: Optional[str] = None,

        created_lt: Optional[str] = None,

        created_lte: Optional[str] = None,

        **kwargs,

    ) -> Dict:

        """

        Get list of documents.

        https://docs.veryfi.com/api/receipts-invoices/search-documents/

        :param q: Search query

        :param external_id: Search by external ID

        :param tag: Search by tag

        :param created_gt: Search by created date greater than

        :param created_gte: Search by created date greater than or equal to

        :param created_lt: Search by created date less than

        :param created_lte: Search by created date less than or equal to

        :param kwargs: Additional query parameters

        :return: List of previously processed documents

        """

        query_params = {}

        if q:

            query_params["q"] = q

        if external_id:

            query_params["external_id"] = external_id

        if tag:

            query_params["tag"] = tag

        if created_gt:

            query_params["created__gt"] = created_gt

        if created_gte:

            query_params["created__gte"] = created_gte

        if created_lt:

            query_params["created__lt"] = created_lt

        if created_lte:

            query_params["created__lte"] = created_lte

        query_params.update(kwargs)

        endpoint_name = "/documents/"

        return self.client._request("GET", endpoint_name, {}, query_params)

get_documents_from_pdf

def get_documents_from_pdf(
    self,
    document_id: int
)

Get Documents from PDF endpoint allows you to retrieve a collection of previously processed documents.

https://docs.veryfi.com/api/receipts-invoices/get-documents-from-pdf/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to retrieve None

Returns:

Type Description
None The processed Document response.
View Source
    def get_documents_from_pdf(self, document_id: int):

        """

        Get Documents from PDF endpoint allows you to retrieve a collection of previously processed documents.

        https://docs.veryfi.com/api/receipts-invoices/get-documents-from-pdf/

        :param document_id: ID of the document you'd like to retrieve

        :return: The processed Document response.

        """

        endpoint_name = f"/documents-set/{document_id}"

        return self.client._request("GET", endpoint_name, {})

get_line_item

def get_line_item(
    self,
    document_id: int,
    line_item_id: int
)

Retrieve a line item for existing document by ID.

https://docs.veryfi.com/api/receipts-invoices/get-a-line-item/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to retrieve None
line_item_id None ID of the line item you'd like to retrieve None

Returns:

Type Description
None Line item extracted from the document
View Source
    def get_line_item(self, document_id: int, line_item_id: int):

        """

        Retrieve a line item for existing document by ID.

        https://docs.veryfi.com/api/receipts-invoices/get-a-line-item/

        :param document_id: ID of the document you'd like to retrieve

        :param line_item_id: ID of the line item you'd like to retrieve

        :return: Line item extracted from the document

        """

        return self.client._request("GET", f"/documents/{document_id}/line-items/{line_item_id}")

get_line_items

def get_line_items(
    self,
    document_id: int
)

Retrieve all line items for a document.

https://docs.veryfi.com/api/receipts-invoices/get-document-line-items/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to retrieve None

Returns:

Type Description
None List of line items extracted from the document
View Source
    def get_line_items(self, document_id: int):

        """

        Retrieve all line items for a document.

        https://docs.veryfi.com/api/receipts-invoices/get-document-line-items/

        :param document_id: ID of the document you'd like to retrieve

        :return: List of line items extracted from the document

        """

        return self.client._request("GET", f"/documents/{document_id}/line-items/")

get_pdf

def get_pdf(
    self,
    **kwargs
)

Get a Submitted PDF endpoint allows you to retrieve a collection of previously processed.

https://docs.veryfi.com/api/receipts-invoices/get-submitted-pdf/

Parameters:

Name Type Description Default
kwargs None Additional query parameters. None

Returns:

Type Description
None The processed Document response.
View Source
    def get_pdf(self, **kwargs):

        """

        Get a Submitted PDF endpoint allows you to retrieve a collection of previously processed.

        https://docs.veryfi.com/api/receipts-invoices/get-submitted-pdf/

        :param kwargs: Additional query parameters.

        :return: The processed Document response.

        """

        endpoint_name = "/documents-set/"

        return self.client._request("GET", endpoint_name, {}, kwargs)

get_tags

def get_tags(
    self,
    document_id
)

Return all Tag assigned to a specific Document.

https://docs.veryfi.com/api/receipts-invoices/get-document-tags/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to get None

Returns:

Type Description
None Added tags data
View Source
    def get_tags(self, document_id):

        """

        Return all Tag assigned to a specific Document.

        https://docs.veryfi.com/api/receipts-invoices/get-document-tags/

        :param document_id: ID of the document you'd like to get

        :return: Added tags data

        """

        endpoint_name = f"/documents/{document_id}/tags"

        return self.client._request("GET", endpoint_name, {})

process_document

def process_document(
    self,
    file_path: str,
    categories: Optional[List] = None,
    delete_after_processing: bool = False,
    **kwargs
) -> Dict

Process a document and extract all the fields from it.

https://docs.veryfi.com/api/receipts-invoices/process-a-document/

Parameters:

Name Type Description Default
file_path None Path on disk to a file to submit for data extraction None
categories None List of categories Veryfi can use to categorize the document None
delete_after_processing None Delete this document from Veryfi after data has been extracted None
kwargs None Additional body parameters None

Returns:

Type Description
None Data extracted from the document
View Source
    def process_document(

        self,

        file_path: str,

        categories: Optional[List] = None,

        delete_after_processing: bool = False,

        **kwargs,

    ) -> Dict:

        """

        Process a document and extract all the fields from it.

        https://docs.veryfi.com/api/receipts-invoices/process-a-document/

        :param file_path: Path on disk to a file to submit for data extraction

        :param categories: List of categories Veryfi can use to categorize the document

        :param delete_after_processing: Delete this document from Veryfi after data has been extracted

        :param kwargs: Additional body parameters

        :return: Data extracted from the document

        """

        if not categories:

            categories = self.DEFAULT_CATEGORIES

        file_name = os.path.basename(file_path)

        with open(file_path, "rb") as image_file:

            base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

        request_arguments = {

            "file_name": file_name,

            "file_data": base64_encoded_string,

            "categories": categories,

            "auto_delete": delete_after_processing,

        }

        request_arguments.update(kwargs)

        return self.client._request("POST", "/documents/", request_arguments)

process_document_url

def process_document_url(
    self,
    file_url: Optional[str] = None,
    categories: Optional[List[str]] = None,
    delete_after_processing: bool = False,
    boost_mode: bool = False,
    external_id: Optional[str] = None,
    max_pages_to_process: Optional[int] = None,
    file_urls: Optional[List[str]] = None,
    **kwargs
) -> Dict

Process Document from url and extract all the fields from it.

https://docs.veryfi.com/api/receipts-invoices/process-a-document/

Parameters:

Name Type Description Default
file_url None Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg". None
file_urls None Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"] None
categories None List of categories to use when categorizing the document None
delete_after_processing None Delete this document from Veryfi after data has been extracted None
max_pages_to_process None When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1. None
boost_mode None Flag that tells Veryfi whether boost mode should be enabled. When set to 1, Veryfi will skip data enrichment steps, but will process the document faster. Default value for this flag is 0 None
external_id None Optional custom document identifier. Use this if you would like to assign your own ID to documents None
kwargs None Additional body parameters None

Returns:

Type Description
None Data extracted from the document.
View Source
    def process_document_url(

        self,

        file_url: Optional[str] = None,

        categories: Optional[List[str]] = None,

        delete_after_processing: bool = False,

        boost_mode: bool = False,

        external_id: Optional[str] = None,

        max_pages_to_process: Optional[int] = None,

        file_urls: Optional[List[str]] = None,

        **kwargs,

    ) -> Dict:

        """Process Document from url and extract all the fields from it.

        https://docs.veryfi.com/api/receipts-invoices/process-a-document/

        :param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg".

        :param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]

        :param categories: List of categories to use when categorizing the document

        :param delete_after_processing: Delete this document from Veryfi after data has been extracted

        :param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1.

        :param boost_mode: Flag that tells Veryfi whether boost mode should be enabled. When set to 1, Veryfi will skip data enrichment steps, but will process the document faster. Default value for this flag is 0

        :param external_id: Optional custom document identifier. Use this if you would like to assign your own ID to documents

        :param kwargs: Additional body parameters

        :return: Data extracted from the document.

        """

        endpoint_name = "/documents/"

        request_arguments = {

            "auto_delete": delete_after_processing,

            "boost_mode": boost_mode,

            "categories": categories,

            "external_id": external_id,

            "file_url": file_url,

            "file_urls": file_urls,

            "max_pages_to_process": max_pages_to_process,

        }

        request_arguments.update(kwargs)

        return self.client._request("POST", endpoint_name, request_arguments)

process_documents_bulk

def process_documents_bulk(
    self,
    file_urls: List[str]
) -> List[int]

Process multiple documents from urls and extract all the fields from it.

If you want to use this endpoint, please contact support@veryfi.com first. Veryfi's Bulk upload allows you to process multiple Documents. https://docs.veryfi.com/api/receipts-invoices/bulk-process-multiple-documents/

Parameters:

Name Type Description Default
file_urls None List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"] None

Returns:

Type Description
None List of document IDs being processed
View Source
    def process_documents_bulk(self, file_urls: List[str]) -> List[int]:

        """

        Process multiple documents from urls and extract all the fields from it.

        If you want to use this endpoint, please contact support@veryfi.com first. Veryfi's Bulk upload allows you to process multiple Documents.

        https://docs.veryfi.com/api/receipts-invoices/bulk-process-multiple-documents/

        :param file_urls: List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]

        :return: List of document IDs being processed

        """

        endpoint_name = "/documents/bulk/"

        request_arguments = {"file_urls": file_urls}

        return self.client._request("POST", endpoint_name, request_arguments)

replace_tags

def replace_tags(
    self,
    document_id,
    tags
)

Replace multiple tags on an existing document.

https://docs.veryfi.com/api/receipts-invoices/update-a-document/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to update None
tags None array of strings None

Returns:

Type Description
None Added tags data
View Source
    def replace_tags(self, document_id, tags):

        """

        Replace multiple tags on an existing document.

        https://docs.veryfi.com/api/receipts-invoices/update-a-document/

        :param document_id: ID of the document you'd like to update

        :param tags: array of strings

        :return: Added tags data

        """

        endpoint_name = f"/documents/{document_id}/"

        request_arguments = {"tags": tags}

        return self.client._request("PUT", endpoint_name, request_arguments)

split_and_process_pdf

def split_and_process_pdf(
    self,
    file_path: str,
    categories: Optional[List] = None,
    **kwargs
) -> Dict

Process a document and extract all the fields from it

https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/

Parameters:

Name Type Description Default
file_path None Path on disk to a file to submit for data extraction None
categories None List of categories Veryfi can use to categorize the document None
kwargs None Additional body parameters None

Returns:

Type Description
None Data extracted from the document
View Source
    def split_and_process_pdf(

        self,

        file_path: str,

        categories: Optional[List] = None,

        **kwargs,

    ) -> Dict:

        """

        Process a document and extract all the fields from it

        https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/

        :param file_path: Path on disk to a file to submit for data extraction

        :param categories: List of categories Veryfi can use to categorize the document

        :param kwargs: Additional body parameters

        :return: Data extracted from the document

        """

        endpoint_name = "/documents-set/"

        categories = categories or []

        file_name = os.path.basename(file_path)

        with open(file_path, "rb") as image_file:

            base64_encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

        request_arguments = {

            "file_name": file_name,

            "file_data": base64_encoded_string,

            "categories": categories,

        }

        request_arguments.update(kwargs)

        return self.client._request("POST", endpoint_name, request_arguments)

split_and_process_pdf_url

def split_and_process_pdf_url(
    self,
    file_url: Optional[str] = None,
    categories: Optional[List[str]] = None,
    max_pages_to_process: Optional[int] = None,
    file_urls: Optional[List[str]] = None,
    **kwargs
) -> Dict

Process Document from url and extract all the fields from it.

https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/

Parameters:

Name Type Description Default
file_url None Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg". None
file_urls None Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"] None
categories None List of categories to use when categorizing the document None
max_pages_to_process None When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1. None
kwargs None Additional body parameters None

Returns:

Type Description
None Data extracted from the document.
View Source
    def split_and_process_pdf_url(

        self,

        file_url: Optional[str] = None,

        categories: Optional[List[str]] = None,

        max_pages_to_process: Optional[int] = None,

        file_urls: Optional[List[str]] = None,

        **kwargs,

    ) -> Dict:

        """Process Document from url and extract all the fields from it.

        https://docs.veryfi.com/api/receipts-invoices/split-and-process-a-pdf/

        :param file_url: Required if file_urls isn't specified. Publicly accessible URL to a file, e.g. "https://cdn.example.com/receipt.jpg".

        :param file_urls: Required if file_url isn't specifies. List of publicly accessible URLs to multiple files, e.g. ["https://cdn.example.com/receipt1.jpg", "https://cdn.example.com/receipt2.jpg"]

        :param categories: List of categories to use when categorizing the document

        :param max_pages_to_process: When sending a long document to Veryfi for processing, this parameter controls how many pages of the document will be read and processed, starting from page 1.

        :param kwargs: Additional body parameters

        :return: Data extracted from the document.

        """

        endpoint_name = "/documents-set/"

        categories = categories or []

        request_arguments = {

            "categories": categories,

            "file_url": file_url,

            "file_urls": file_urls,

            "max_pages_to_process": max_pages_to_process,

        }

        request_arguments.update(kwargs)

        return self.client._request("POST", endpoint_name, request_arguments)

update_document

def update_document(
    self,
    document_id: int,
    **kwargs
) -> Dict

Update data for a previously processed document, including almost any field like vendor, date, notes and etc.

https://docs.veryfi.com/api/receipts-invoices/update-a-document/

veryfi_client.update_document(id, date="2021-01-01", notes="look what I did")

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to update None
kwargs None fields to update None

Returns:

Type Description
None A document json with updated fields, if fields are writable. Otherwise a document with unchanged fields.
View Source
    def update_document(self, document_id: int, **kwargs) -> Dict:

        """

        Update data for a previously processed document, including almost any field like `vendor`, `date`, `notes` and etc.

        https://docs.veryfi.com/api/receipts-invoices/update-a-document/

        ```veryfi_client.update_document(id, date="2021-01-01", notes="look what I did")```

        :param document_id: ID of the document you'd like to update

        :param kwargs: fields to update

        :return: A document json with updated fields, if fields are writable. Otherwise a document with unchanged fields.

        """

        return self.client._request("PUT", f"/documents/{document_id}/", kwargs)

update_line_item

def update_line_item(
    self,
    document_id: int,
    line_item_id: int,
    payload: Dict
) -> Dict

Update an existing line item on an existing document.

https://docs.veryfi.com/api/receipts-invoices/update-a-line-item/

Parameters:

Name Type Description Default
document_id None ID of the document you'd like to update None
line_item_id None ID of the line item you'd like to update None
payload None line item object to update None

Returns:

Type Description
None Line item data with updated fields, if fields are writable. Otherwise line item data with unchanged fields.
View Source
    def update_line_item(self, document_id: int, line_item_id: int, payload: Dict) -> Dict:

        """

        Update an existing line item on an existing document.

        https://docs.veryfi.com/api/receipts-invoices/update-a-line-item/

        :param document_id: ID of the document you'd like to update

        :param line_item_id: ID of the line item you'd like to update

        :param payload: line item object to update

        :return: Line item data with updated fields, if fields are writable. Otherwise line item data with unchanged fields.

        """

        return self.client._request(

            "PUT", f"/documents/{document_id}/line-items/{line_item_id}", payload

        )