Parsing Documents

You can parse various Brazilian fiscal documents, US passports, US driver licenses, and bank statements using the abstra.ai module. The AI-powered OCR extracts structured data from PDFs and images automatically.

Quick Start

from abstra.ai import parse_boleto, parse_nfse, parse_nfe, parse_us_passport, parse_us_driver_license, parse_bank_statement, parse_nfe, parse_us_passport
from abstra.forms import FileInput, run

# Basic document parsing
upload_page = [FileInput(label="Upload Document", key="document")]
document_file = run([upload_page])["document"]

# Parse based on document type
parsed_data = parse_boleto(document_file.path)  # or parse_nfse, parse_nfe, parse_us_passport, parse_us_driver_license, parse_bank_statement

US Passport Parsing

Extract personal information and document details from US passports:

from abstra.ai import parse_us_passport
from abstra.forms import FileInput, TextOutput, run
from datetime import datetime

def passport_verification_app():
    # Upload form
    upload_form = [
        FileInput(
            label="Upload US Passport (PDF, JPG, or PNG)",
            key="passport_file",
            accepted_formats=['.pdf', '.jpg', '.jpeg', '.png']
        )
    ]

    file_data = run([upload_form])
    passport_file = file_data["passport_file"]

    try:
        # Parse the passport
        passport = parse_us_passport(passport_file.path)

        # Check if passport is expired
        exp_date = datetime.strptime(passport.expiration_date, "%Y-%m-%d")
        is_expired = exp_date < datetime.now()
        days_until_expiry = (exp_date - datetime.now()).days

        # Display results
        results = [
            TextOutput(f"**Full Name:** {passport.given_names} {passport.family_name}"),
            TextOutput(f"**Passport Number:** {passport.document_id}"),
            TextOutput(f"**Date of Birth:** {passport.date_of_birth}"),
            TextOutput(f"**Issue Date:** {passport.issue_date}"),
            TextOutput(f"**Expiration Date:** {passport.expiration_date}"),
        ]

        # Add expiry warnings
        if is_expired:
            results.append(TextOutput("🚨 **This passport has expired!**"))
        elif days_until_expiry <= 180:  # 6 months
            results.append(TextOutput(f"⚠️ **Passport expires in {days_until_expiry} days!**"))

        # Show MRZ if available
        if passport.mrz_code:
            results.append(TextOutput(f"**MRZ Code:** `{passport.mrz_code}`"))

        run([results])
        return passport

    except Exception as e:
        run([TextOutput(f"❌ **Error parsing passport:** {str(e)}")])
        return None

# Run the app
passport_data = passport_verification_app()

Boleto Parsing

Process Brazilian payment slips and extract banking information:

from abstra.ai import parse_boleto
from abstra.forms import FileInput, TextOutput, run
from datetime import datetime

def boleto_processor():
    # Upload boleto
    upload_form = [
        FileInput(
            label="Upload Boleto Document",
            key="boleto_file",
            accepted_formats=['.pdf', '.jpg', '.jpeg', '.png']
        )
    ]

    file_data = run([upload_form])
    boleto_file = file_data["boleto_file"]

    try:
        # Parse the boleto
        boleto = parse_boleto(boleto_file.path)

        # Convert amount from centavos to reais
        amount_reais = boleto.valor / 100

        # Check if boleto is overdue
        due_date = datetime.strptime(boleto.vencimento, "%Y-%m-%d")
        is_overdue = due_date < datetime.now()
        days_overdue = (datetime.now() - due_date).days

        # Display payment information
        results = [
            TextOutput(f"**Beneficiary:** {boleto.beneficiario}"),
            TextOutput(f"**Amount:** R$ {amount_reais:.2f}"),
            TextOutput(f"**Due Date:** {boleto.vencimento}"),
            TextOutput(f"**Barcode:** `{boleto.codigo_de_barras}`"),
        ]

        # Add overdue warning
        if is_overdue:
            results.append(TextOutput(f"🚨 **This boleto is {days_overdue} days overdue!**"))

        # Additional details if available
        if boleto.nosso_numero:
            results.append(TextOutput(f"**Bank Reference:** {boleto.nosso_numero}"))
        if boleto.numero_documento:
            results.append(TextOutput(f"**Document Number:** {boleto.numero_documento}"))
        if boleto.cpf_cnpj_beneficiario:
            results.append(TextOutput(f"**Beneficiary Tax ID:** {boleto.cpf_cnpj_beneficiario}"))

        run([results])
        return boleto

    except Exception as e:
        run([TextOutput(f"❌ **Error parsing boleto:** {str(e)}")])
        return None

# Process boleto
boleto_data = boleto_processor()

NFSe Parsing

Extract service invoice information from Brazilian NFSe documents:

from abstra.ai import parse_nfse
from abstra.forms import FileInput, TextOutput, run

def nfse_analyzer():
    # Upload NFSe
    upload_form = [
        FileInput(
            label="Upload NFSe Document",
            key="nfse_file",
            accepted_formats=['.pdf', '.jpg', '.jpeg', '.png']
        )
    ]

    file_data = run([upload_form])
    nfse_file = file_data["nfse_file"]

    try:
        # Parse the NFSe
        nfse = parse_nfse(nfse_file.path)

        # Convert amounts from centavos to reais
        net_amount = nfse.valor_liquido_centavos / 100 if nfse.valor_liquido_centavos else 0
        total_amount = nfse.valor_total_centavos / 100 if nfse.valor_total_centavos else 0

        # Display service invoice information
        results = [
            TextOutput("## 📋 Service Invoice Details"),
            TextOutput(f"**Invoice Number:** {nfse.numero_nota}"),
            TextOutput(f"**Issue Date:** {nfse.data_emissao}"),
            TextOutput(f"**Net Amount:** R$ {net_amount:.2f}"),
            TextOutput(f"**Total Amount:** R$ {total_amount:.2f}"),
        ]

        # Service provider information
        if nfse.razao_social_prestador:
            results.extend([
                TextOutput("## 🏢 Service Provider"),
                TextOutput(f"**Company:** {nfse.razao_social_prestador}"),
                TextOutput(f"**CNPJ:** {nfse.cnpj_prestador}"),
            ])
            if nfse.endereco_prestador:
                results.append(TextOutput(f"**Address:** {nfse.endereco_prestador}"))
            if nfse.email_prestador:
                results.append(TextOutput(f"**Email:** {nfse.email_prestador}"))

        # Service recipient information
        if nfse.razao_social_tomador:
            results.extend([
                TextOutput("## 👤 Service Recipient"),
                TextOutput(f"**Company:** {nfse.razao_social_tomador}"),
                TextOutput(f"**CNPJ:** {nfse.cnpj_tomador}"),
            ])
            if nfse.endereco_tomador:
                results.append(TextOutput(f"**Address:** {nfse.endereco_tomador}"))

        # Service description
        if nfse.descricao:
            results.extend([
                TextOutput("## 📝 Service Description"),
                TextOutput(nfse.descricao)
            ])

        run([results])
        return nfse

    except Exception as e:
        run([TextOutput(f"❌ **Error parsing NFSe:** {str(e)}")])
        return None

# Process NFSe
nfse_data = nfse_analyzer()

NFe Parsing

Process comprehensive Brazilian electronic invoices:

from abstra.ai import parse_nfe
from abstra.forms import FileInput, TextOutput, run

def nfe_processor():
    # Upload NFe
    upload_form = [
        FileInput(
            label="Upload NFe Document",
            key="nfe_file",
            accepted_formats=['.pdf', '.jpg', '.jpeg', '.png']
        )
    ]

    file_data = run([upload_form])
    nfe_file = file_data["nfe_file"]

    try:
        # Parse the NFe
        nfe = parse_nfe(nfe_file.path)

        # Display basic invoice information
        results = [
            TextOutput("## 📄 Electronic Invoice (NFe)"),
            TextOutput(f"**Invoice Number:** {nfe.numero_nota}"),
            TextOutput(f"**Series:** {nfe.serie}"),
            TextOutput(f"**Access Key:** `{nfe.chave_acesso}`"),
            TextOutput(f"**Issue Date:** {nfe.data_emissao}"),
        ]

        # Financial information
        if nfe.valor_total:
            results.extend([
                TextOutput("## 💰 Financial Information"),
                TextOutput(f"**Products Value:** R$ {nfe.valor_produtos:.2f}" if nfe.valor_produtos is not None else "N/A"),
                TextOutput(f"**Total Value:** R$ {nfe.valor_total:.2f}"),
            ])

            # Tax information
            if nfe.valor_icms is not None:
                results.append(TextOutput(f"**ICMS Tax:** R$ {nfe.valor_icms:.2f}"))
            if nfe.valor_ipi is not None:
                results.append(TextOutput(f"**IPI Tax:** R$ {nfe.valor_ipi:.2f}"))

        # Issuer information
        if nfe.razao_social_emitente:
            results.extend([
                TextOutput("## 🏭 Issuer"),
                TextOutput(f"**Company:** {nfe.razao_social_emitente}"),
                TextOutput(f"**CNPJ:** {nfe.cnpj_emitente}"),
                TextOutput(f"**Address:** {nfe.endereco_emitente}" if nfe.endereco_emitente else "Address not available"),
            ])

        # Recipient information
        if nfe.nome_destinatario:
            results.extend([
                TextOutput("## 📦 Recipient"),
                TextOutput(f"**Name:** {nfe.nome_destinatario}"),
                TextOutput(f"**Tax ID:** {nfe.cpf_cnpj_destinatario}"),
                TextOutput(f"**Address:** {nfe.endereco_destinatario}" if nfe.endereco_destinatario else "Address not available"),
            ])

        # Product information
        if nfe.descricao_produto:
            results.extend([
                TextOutput("## 📋 Product Details"),
                TextOutput(f"**Description:** {nfe.descricao_produto}"),
                TextOutput(f"**Product Code:** {nfe.codigo_produto}" if nfe.codigo_produto else "N/A"),
                TextOutput(f"**NCM/SH:** {nfe.ncm_sh}" if nfe.ncm_sh else "N/A"),
                TextOutput(f"**Unit Value:** R$ {nfe.valor_unitario:.2f}" if nfe.valor_unitario is not None else "N/A"),
            ])

        # Transportation info
        if nfe.razao_social_transportadora:
            results.extend([
                TextOutput("## 🚛 Transportation"),
                TextOutput(f"**Carrier:** {nfe.razao_social_transportadora}"),
                TextOutput(f"**CNPJ:** {nfe.cnpj_transportadora}"),
                TextOutput(f"**Vehicle Plate:** {nfe.placa_veiculo}" if nfe.placa_veiculo else "N/A"),
            ])

        # Additional information
        if nfe.informacoes_adicionais:
            results.extend([
                TextOutput("## ℹ️ Additional Information"),
                TextOutput(nfe.informacoes_adicionais)
            ])

        run([results])
        return nfe

    except Exception as e:
        run([TextOutput(f"❌ **Error parsing NFe:** {str(e)}")])
        return None

# Process NFe
nfe_data = nfe_processor()

US Driver License

Extract personal information and licensing details from US driver's licenses:

from abstra.ai import parse_us_driver_license
from abstra.forms import FileInput, TextOutput, MarkdownOutput, run
from datetime import datetime

def driver_license_processor():
    # Upload driver license
    upload_form = [
        FileInput(
            label="Upload US Driver License (PDF, JPG, PNG)",
            key="license_file",
            accepted_formats=['.pdf', '.jpg', '.jpeg', '.png']
        )
    ]

    print("🪪 Starting driver license processing...")
    file_data = run([upload_form])
    license_file = file_data["license_file"]
    print(f"📁 File uploaded: {license_file.name}")

    # Parse the driver license using AI
    print("🔍 Parsing driver license with AI...")
    license_data = parse_us_driver_license(license_file.path)
    print("✅ Driver license parsed successfully!")

    # Display license information
    results = [
        MarkdownOutput("## 🪪 US Driver License Information"),
        MarkdownOutput("---"),
        MarkdownOutput("### 👤 Personal Information"),
        TextOutput(f"**Given Names:** {license_data.given_names or 'Not specified'}"),
        TextOutput(f"**Family Name:** {license_data.family_name or 'Not specified'}"),
        TextOutput(f"**Date of Birth:** {license_data.date_of_birth or 'Not specified'}"),
        TextOutput(f"**Address:** {license_data.address or 'Not specified'}"),
        MarkdownOutput("### 📄 License Details"),
        TextOutput(f"**Document ID:** {license_data.document_id or 'Not specified'}"),
        TextOutput(f"**Issue Date:** {license_data.issue_date or 'Not specified'}"),
        TextOutput(f"**Expiration Date:** {license_data.expiration_date or 'Not specified'}"),
    ]

    # Check if license is expired (if expiration date is available)
    try:
        if license_data.expiration_date:
            exp_date = license_data.expiration_date
            is_expired = exp_date < datetime.now().date()
            days_until_expiry = (exp_date - datetime.now().date()).days

            results.append(MarkdownOutput("### ⏰ Expiration Status"))
            
            if is_expired:
                results.append(TextOutput("🚨 **This driver license has EXPIRED!**"))
                results.append(TextOutput(f"**Expired:** {abs(days_until_expiry)} days ago"))
            elif days_until_expiry <= 60:  # 2 months
                results.append(TextOutput(f"⚠️ **License expires in {days_until_expiry} days!**"))
                results.append(TextOutput("Consider renewing soon."))
            else:
                results.append(TextOutput(f"✅ **License is valid** (expires in {days_until_expiry} days)"))
    except (ValueError, AttributeError) as e:
        print(f"⚠️ Could not parse expiration date: {e}")
        results.append(MarkdownOutput("### ⏰ Expiration Status"))
        results.append(TextOutput("⚠️ Unable to determine expiration status"))

    # Log the extracted information for debugging
    print(f"📊 Extracted data:")
    print(f"  - Name: {license_data.given_names} {license_data.family_name}")
    print(f"  - DOB: {license_data.date_of_birth}")
    print(f"  - Document ID: {license_data.document_id}")
    print(f"  - Expiration: {license_data.expiration_date}")

    run([results])
    return license_data

# Process driver license
print("🚀 Starting driver license processor...")
license_info = driver_license_processor()
if license_info:
    print("🏁 Processing completed successfully!")
else:
    print("❌ Processing failed.")

Bank Statement

Extract transaction details and account information from bank statements:

from abstra.ai import parse_bank_statement
from abstra.forms import FileInput, TextOutput, MarkdownOutput, run

def bank_statement_processor():
    # Upload bank statement
    upload_form = [
        FileInput(
            label="Upload Bank Statement (PDF, JPG, PNG)",
            key="statement_file",
            accepted_formats=['.pdf', '.jpg', '.jpeg', '.png']
        )
    ]

    print("📄 Starting bank statement processing...")
    file_data = run([upload_form])
    statement_file = file_data["statement_file"]
    print(f"📁 File uploaded: {statement_file.name}")

    # Parse the bank statement using AI
    print("🔍 Parsing bank statement with AI...")
    statement = parse_bank_statement(statement_file.path)
    print(f"✅ Bank statement parsed successfully!")
    
    # Display parsed bank statement information
    results = [
        MarkdownOutput("## 🏦 Bank Statement Summary"),
        MarkdownOutput("---"),
        MarkdownOutput("### 🏦 Bank Information"),
        TextOutput(f"**Bank Name:** {statement.bank_name or 'Not specified'}"),
        TextOutput(f"**Bank Address:** {statement.bank_address or 'Not specified'}"),
        MarkdownOutput("### 👤 Account Information"),
        TextOutput(f"**Client Name:** {statement.client_name or 'Not specified'}"),
        TextOutput(f"**Client Address:** {statement.client_address or 'Not specified'}"),
        TextOutput(f"**Account Number:** {statement.account_number or 'Not specified'}"),
        TextOutput(f"**Account Type:** {statement.account_type or 'Not specified'}"),
        MarkdownOutput("### 📅 Statement Period"),
        TextOutput(f"**Start Date:** {statement.statement_start_date or 'Not specified'}"),
        TextOutput(f"**End Date:** {statement.statement_end_date or 'Not specified'}"),
        MarkdownOutput("### 💰 Balance Information"),
        TextOutput(f"**Starting Balance:** {statement.starting_balance or 'Not specified'}"),
        TextOutput(f"**Ending Balance:** {statement.ending_balance or 'Not specified'}"),
    ]
    
    # Calculate balance change if both balances are available and numeric
    try:
        if statement.starting_balance and statement.ending_balance:
            # Try to convert to float for calculation
            start_balance = float(statement.starting_balance.replace('$', '').replace(',', '').strip())
            end_balance = float(statement.ending_balance.replace('$', '').replace(',', '').strip())
            balance_change = end_balance - start_balance
            
            if balance_change > 0:
                results.append(TextOutput(f"**Net Change:** +${balance_change:.2f} ✅"))
            elif balance_change < 0:
                results.append(TextOutput(f"**Net Change:** -${abs(balance_change):.2f} ⚠️"))
            else:
                results.append(TextOutput("**Net Change:** $0.00 ➡️"))
    except (ValueError, AttributeError) as e:
        print(f"⚠️ Could not calculate balance change: {e}")
        results.append(TextOutput("**Net Change:** Unable to calculate"))
    
    # Log the extracted information for debugging
    print(f"📊 Extracted data:")
    print(f"  - Client: {statement.client_name}")
    print(f"  - Account: {statement.account_number}")
    print(f"  - Period: {statement.statement_start_date} to {statement.statement_end_date}")
    print(f"  - Balance: {statement.starting_balance} → {statement.ending_balance}")
    
    run([results])
    return statement

# Process bank statement
print("🚀 Starting bank statement processor...")
statement_data = bank_statement_processor()
if statement_data:
    print(f"🏁 Processing completed successfully!")
else:
    print("❌ Processing failed.")

Tips for Best Results

Image Quality: Use high-resolution scans (300+ DPI)
File Formats: PDF files typically provide the best results
Lighting: Ensure even lighting without shadows or glare
Orientation: Keep documents properly oriented and flat
File Size: Keep files under 10MB for optimal processing speed
Complete Documents: Include all pages and sections of the document

Quick Start​

US Passport Parsing​

Boleto Parsing​

NFSe Parsing​

NFe Parsing​

US Driver License​

Bank Statement​

Tips for Best Results​