{
  "name": "Azure AI Speech to Text",
  "slug": "azure-speech-to-text",
  "website_url": "https://azure.microsoft.com/en-us/products/ai-services/ai-speech",
  "vendor_description": "\"Azure Speech in Foundry Tools provides speech to text, text to speech, and other capabilities through a Microsoft Foundry resource. You can transcribe speech to text with high accuracy, produce natural-sounding text-to-speech voices, translate spoken audio, and conduct live AI voice conversations.\"",
  "categories": [
    "speech-to-text"
  ],
  "summary": "Azure AI Speech to Text is Microsoft's cloud speech recognition service, offering real-time transcription, batch processing, speaker diarization, pronunciation assessment, and speech translation across more than 30 Azure regions. It starts at $1.00 per hour of audio with a free tier of 5 hours per month, scales via usage-based pricing, and supports self-serve signup with no sales call required. SDKs cover C#, Python, JavaScript, Java, Go, and Objective-C, and the service holds SOC 2 Type II, HIPAA, GDPR, ISO 27001, and PCI DSS certifications.",
  "capabilities": {
    "supported_actions": [
      "transcribe_realtime",
      "transcribe_batch",
      "transcribe_fast",
      "speaker_diarization",
      "language_detection",
      "word_timestamps",
      "custom_speech_model",
      "pronunciation_assessment",
      "phrase_lists",
      "speech_translation",
      "keyword_recognition",
      "llm_speech_transcription",
      "post_stream_refinement"
    ],
    "supported_regions": [
      "South Africa North",
      "East Asia",
      "Southeast Asia",
      "Australia East",
      "Central India",
      "Japan East",
      "Japan West",
      "Korea Central",
      "Canada Central",
      "Canada East",
      "North Europe",
      "West Europe",
      "France Central",
      "Germany West Central",
      "Italy North",
      "Norway East",
      "Sweden Central",
      "Switzerland North",
      "Switzerland West",
      "UK South",
      "UK West",
      "UAE North",
      "Brazil South",
      "Qatar Central",
      "Central US",
      "East US",
      "East US 2",
      "North Central US",
      "South Central US",
      "West Central US",
      "West US",
      "West US 2",
      "West US 3"
    ],
    "supported_languages": [
      "Afrikaans (South Africa)",
      "Amharic (Ethiopia)",
      "Arabic (20+ locales)",
      "Assamese (India)",
      "Azerbaijani",
      "Bulgarian",
      "Bhojpuri (India)",
      "Bengali (India)",
      "Bosnian",
      "Catalan",
      "Czech",
      "Welsh",
      "Danish",
      "German (3 locales)",
      "Greek",
      "English (15+ locales)",
      "Spanish (22 locales)",
      "Estonian",
      "Basque",
      "Persian",
      "Finnish",
      "Filipino",
      "French (4 locales)",
      "Irish",
      "Galician",
      "Gujarati",
      "Hebrew",
      "Hindi",
      "Croatian",
      "Hungarian",
      "Armenian",
      "Indonesian",
      "Icelandic",
      "Italian (2 locales)",
      "Japanese",
      "Javanese",
      "Georgian",
      "Kazakh",
      "Khmer",
      "Kannada",
      "Korean",
      "Lao",
      "Lithuanian",
      "Latvian",
      "Macedonian",
      "Malayalam",
      "Mongolian",
      "Marathi",
      "Malay",
      "Maltese",
      "Burmese",
      "Norwegian Bokmål",
      "Nepali",
      "Dutch (2 locales)",
      "Odia",
      "Punjabi",
      "Polish",
      "Pashto",
      "Portuguese (2 locales)",
      "Romanian",
      "Russian",
      "Sinhala",
      "Slovak",
      "Slovenian",
      "Somali",
      "Albanian",
      "Serbian",
      "Swedish",
      "Kiswahili",
      "Tamil",
      "Telugu",
      "Thai",
      "Turkish",
      "Ukrainian",
      "Urdu",
      "Uzbek",
      "Vietnamese",
      "Chinese Wu (Simplified)",
      "Chinese Cantonese (Simplified)",
      "Chinese Mandarin (Simplified)",
      "Chinese Southwestern Mandarin",
      "Chinese Cantonese (Traditional)",
      "Chinese Taiwanese Mandarin",
      "isiZulu - 130+ languages/locales total"
    ],
    "input_types": [
      "audio/wav (PCM, default)",
      "audio/mp3",
      "audio/ogg (OPUS)",
      "audio/flac",
      "AMR",
      "AMR-WB",
      "A-Law",
      "Mu-Law",
      "streaming via WebSocket (Speech SDK)",
      "file via Azure Blob Storage SAS URI",
      "file via public URI",
      "live microphone stream"
    ],
    "output_types": [
      "JSON (with word-level timestamps, offset, duration, speaker labels)",
      "plain text",
      "SRT/VTT captions (via post-processing)",
      "word-level timestamps (batch and real-time SDK)",
      "diarization speaker labels"
    ],
    "webhooks_supported": true,
    "sandbox_available": false,
    "sdk_languages": [
      "C#/.NET",
      "Python",
      "JavaScript",
      "Java",
      "Go",
      "Objective-C"
    ],
    "mcp_server_available": true
  },
  "integration": {
    "api_style": "rest",
    "base_url": "https://{resource}.cognitiveservices.azure.com/speechtotext/",
    "api_version": "2025-10-15",
    "versioning_scheme": "url",
    "stability": "ga",
    "deprecation_policy_url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-speech-to-text",
    "auth_methods": [
      "api_key",
      "oauth2"
    ],
    "auth_docs_url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-speech-to-text-short#authentication",
    "idempotency_supported": null,
    "error_format": "vendor-specific",
    "webhook_signing": "hmac_sha256",
    "webhook_events_url": "https://learn.microsoft.com/en-us/rest/api/speechtotext/web-hooks/create?view=rest-speechtotext-2025-10-15",
    "rate_limit_requests": 100,
    "rate_limit_window": "concurrent",
    "sdk_packages": [
      {
        "package": "Microsoft.CognitiveServices.Speech",
        "language": "C#/.NET",
        "official": true,
        "repo_url": "https://github.com/Azure-Samples/cognitive-services-speech-sdk"
      },
      {
        "package": "azure-cognitiveservices-speech",
        "language": "Python",
        "official": true,
        "repo_url": "https://github.com/Azure-Samples/cognitive-services-speech-sdk"
      },
      {
        "package": "microsoft-cognitiveservices-speech-sdk",
        "language": "JavaScript",
        "official": true,
        "repo_url": "https://github.com/microsoft/cognitive-services-speech-sdk-js"
      },
      {
        "package": "com.microsoft.cognitiveservices.speech:client-sdk",
        "language": "Java",
        "official": true,
        "repo_url": "https://github.com/Azure-Samples/cognitive-services-speech-sdk"
      },
      {
        "package": "github.com/Microsoft/cognitive-services-speech-sdk-go",
        "language": "Go",
        "official": true,
        "repo_url": "https://github.com/Microsoft/cognitive-services-speech-sdk-go"
      },
      {
        "package": null,
        "language": "Objective-C",
        "official": true,
        "repo_url": "https://github.com/Azure-Samples/cognitive-services-speech-sdk"
      }
    ],
    "mcp_url": "https://learn.microsoft.com/en-us/azure/developer/azure-mcp-server/services/azure-mcp-speech-foundry-tools",
    "quickstart_url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-speech-to-text"
  },
  "pricing": {
    "pricing_model": "usage_based",
    "has_published_pricing": true,
    "starting_price_usd": "1.0000",
    "price_basis": "hour of audio",
    "free_tier_available": true,
    "free_tier_limit": "5 hours/month",
    "free_tier_details": "Free (F0) tier: 5 audio hours per month for Standard and Custom Speech to Text (shared; batch transcription not available on F0). Resets monthly. Concurrent request limit of 1 (not adjustable). No SLA on F0 tier.",
    "minimum_commitment": null,
    "self_serve_signup": true,
    "requires_sales_call": false,
    "requires_verification": false,
    "enterprise_plan_available": true,
    "price_points": [
      {
        "plan": "Free (F0)",
        "item": "Standard real-time speech to text",
        "amount_usd": "0.0000",
        "amount_percent": null,
        "per": "5 audio hours per month",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Standard real-time speech to text",
        "amount_usd": "1.0000",
        "amount_percent": null,
        "per": "audio hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Fast transcription (synchronous file-based)",
        "amount_usd": "0.3600",
        "amount_percent": null,
        "per": "audio hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Batch transcription",
        "amount_usd": "0.1800",
        "amount_percent": null,
        "per": "audio hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Custom speech real-time transcription",
        "amount_usd": "1.2000",
        "amount_percent": null,
        "per": "audio hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Custom speech batch transcription",
        "amount_usd": "0.2250",
        "amount_percent": null,
        "per": "audio hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Custom model training",
        "amount_usd": "10.0000",
        "amount_percent": null,
        "per": "compute hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Custom model endpoint hosting",
        "amount_usd": "0.0538",
        "amount_percent": null,
        "per": "model per hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Language identification add-on (real-time)",
        "amount_usd": "0.3000",
        "amount_percent": null,
        "per": "audio hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Speaker diarization add-on (real-time)",
        "amount_usd": "0.3000",
        "amount_percent": null,
        "per": "audio hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Pay As You Go",
        "item": "Pronunciation assessment add-on (real-time)",
        "amount_usd": "0.3000",
        "amount_percent": null,
        "per": "audio hour",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Commitment Tier — Standard 2,000 hrs/mo",
        "item": "Standard real-time speech to text",
        "amount_usd": "1600.0000",
        "amount_percent": null,
        "per": "month (2,000 hours included; $0.80/hr effective)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Commitment Tier — Standard 10,000 hrs/mo",
        "item": "Standard real-time speech to text",
        "amount_usd": "6500.0000",
        "amount_percent": null,
        "per": "month (10,000 hours included; $0.65/hr effective)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Commitment Tier — Standard 50,000 hrs/mo",
        "item": "Standard real-time speech to text",
        "amount_usd": "25000.0000",
        "amount_percent": null,
        "per": "month (50,000 hours included; $0.50/hr effective)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Commitment Tier — Custom 2,000 hrs/mo",
        "item": "Custom speech real-time transcription",
        "amount_usd": "1920.0000",
        "amount_percent": null,
        "per": "month (2,000 hours included; $0.96/hr effective)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Commitment Tier — Custom 50,000 hrs/mo",
        "item": "Custom speech real-time transcription",
        "amount_usd": "30000.0000",
        "amount_percent": null,
        "per": "month (50,000 hours included; $0.60/hr effective)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Connected Container — Standard 2,000 hrs/mo",
        "item": "Standard real-time speech to text (connected container)",
        "amount_usd": "1520.0000",
        "amount_percent": null,
        "per": "month (2,000 hours included)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Connected Container — Standard 50,000 hrs/mo",
        "item": "Standard real-time speech to text (connected container)",
        "amount_usd": "23750.0000",
        "amount_percent": null,
        "per": "month (50,000 hours included)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Connected Container — Custom 2,000 hrs/mo",
        "item": "Custom speech real-time transcription (connected container)",
        "amount_usd": "1824.0000",
        "amount_percent": null,
        "per": "month (2,000 hours included)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Connected Container — Custom 50,000 hrs/mo",
        "item": "Custom speech real-time transcription (connected container)",
        "amount_usd": "28500.0000",
        "amount_percent": null,
        "per": "month (50,000 hours included)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Disconnected Container — Standard 120,000 hrs/yr",
        "item": "Standard real-time speech to text (disconnected/air-gapped container)",
        "amount_usd": "74100.0000",
        "amount_percent": null,
        "per": "year (120,000 hours included)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Disconnected Container — Standard 600,000 hrs/yr",
        "item": "Standard real-time speech to text (disconnected/air-gapped container)",
        "amount_usd": "285000.0000",
        "amount_percent": null,
        "per": "year (600,000 hours included)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Disconnected Container — Custom 120,000 hrs/yr",
        "item": "Custom speech real-time transcription (disconnected/air-gapped container)",
        "amount_usd": "88920.0000",
        "amount_percent": null,
        "per": "year (120,000 hours included)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      },
      {
        "plan": "Disconnected Container — Custom 600,000 hrs/yr",
        "item": "Custom speech real-time transcription (disconnected/air-gapped container)",
        "amount_usd": "342000.0000",
        "amount_percent": null,
        "per": "year (600,000 hours included)",
        "currency": "USD",
        "source_url": "https://azure.microsoft.com/en-us/pricing/details/speech/"
      }
    ]
  },
  "compliance": {
    "soc2": "type_2",
    "hipaa": true,
    "gdpr": true,
    "iso_27001": true,
    "pci_dss": true,
    "sla_published": true,
    "sla_url": "https://azure.microsoft.com/en-us/support/legal/sla/cognitive-services/v1_1/",
    "data_retention_policy_url": "https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/speech-service/speech-to-text/data-privacy-security",
    "documented_rate_limits": "Real-time speech to text: 100 concurrent requests per resource (base model and custom endpoint, adjustable for S0). Fast transcription: 600 requests per minute (adjustable). Batch transcription REST API: 100 requests per 10 seconds (600/min). Free (F0) concurrent request limit: 1 (not adjustable). Batch transcription: max audio file size 1 GB, max audio length 240 min (with diarization), max 1,000 files per request, max 10,000 blobs per container.",
    "known_restrictions": [
      "Free (F0) tier does not support batch transcription",
      "Free (F0) concurrent request limit of 1 is not adjustable",
      "Maximum audio file size for batch and fast transcription: 500 MB (fast) / 1 GB (batch)",
      "Maximum audio length for fast transcription: 5 hours per file",
      "Maximum diarization audio length: 240 minutes per session/file",
      "Diarization supports up to 35 speakers",
      "Real-time diarization session max: 240 minutes",
      "Data is processed only within the region of the Azure Speech resource (no cross-region processing)",
      "Sovereign cloud availability limited (Azure Government, 21Vianet)"
    ]
  },
  "developer": {
    "docs_url": "https://azure.microsoft.com/en-us/resources/developers/",
    "api_reference_url": null,
    "openapi_spec_url": null,
    "postman_collection_url": null,
    "changelog_url": null,
    "status_page_url": null,
    "llms_txt_url": "https://azure.microsoft.com/llms.txt",
    "markdown_docs_url": null,
    "rendering": "static"
  },
  "adoption": {
    "launched_at": "2018-09-24",
    "ga_date": "2018-09-24",
    "github_stars": null,
    "github_stars_at": null,
    "notable_customers": [
      "Microsoft Teams",
      "Microsoft Office 365",
      "Microsoft Edge"
    ]
  },
  "scores": {
    "agent_friendliness": 65,
    "pricing_transparency": 100,
    "setup_speed": 85,
    "docs_quality": 25,
    "procurement_friction": 100,
    "trust_readiness": 100,
    "best_for": [
      "Prototypes and side projects - free to start, no sales call",
      "Regulated or enterprise workloads - compliance attestations and an enterprise plan",
      "AI agents and automation - an agent-ready surface (MCP / llms.txt)"
    ],
    "avoid_if": []
  },
  "sources": [
    {
      "field": "categories",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-to-text",
      "excerpt": "Azure Speech in Foundry Tools service offers advanced speech to text capabilities. This feature supports both real-time and batch transcription, providing versatile solutions for converting audio streams into text.",
      "confidence": "high"
    },
    {
      "field": "supported_actions",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-to-text",
      "excerpt": "The speech to text service offers the following core features: Real-time transcription: Instant transcription with intermediate results for live audio inputs. Fast transcription: Fastest synchronous output for situations with predictable latency. Batch transcription: Efficient processing for large volumes of prerecorded audio. Custom speech: Models with enhanced accuracy for specific domains and conditions.",
      "confidence": "high"
    },
    {
      "field": "supported_languages",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support",
      "excerpt": "Speech-to-Text supports 130+ languages and locales.",
      "confidence": "high"
    },
    {
      "field": "categories",
      "url": "https://azure.microsoft.com/en-us/products/ai-services/ai-speech",
      "excerpt": "Azure AI Speech to Text is the STT SKU; Azure TTS is sold separately. Per product modeling rules, only `speech` applies.",
      "confidence": "high"
    },
    {
      "field": "vendor_description",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/overview",
      "excerpt": "Azure Speech in Foundry Tools provides speech to text, text to speech, and other capabilities through a Microsoft Foundry resource. You can transcribe speech to text with high accuracy, produce natural-sounding text-to-speech voices, translate spoken audio, and conduct live AI voice conversations.",
      "confidence": "high"
    },
    {
      "field": "supported_actions",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-to-text",
      "excerpt": "Diarization is the process of distinguishing and separating different speakers in an audio recording. This feature is particularly useful for transcribing conversations, meetings, or any multi-speaker audio content. The Speech service can identify up to 35 different speakers in an audio recording.",
      "confidence": "high"
    },
    {
      "field": "supported_languages",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/language-support",
      "excerpt": "Last Updated: June 19, 2026 (per document metadata). Multiple new MAI (Multi-Accent Intelligence) models have been added.",
      "confidence": "high"
    },
    {
      "field": "sdk_languages",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-sdk",
      "excerpt": "The Speech SDK supports the following languages and platforms: C#, C++, Go, Java, JavaScript, Objective-C, Python, Swift",
      "confidence": "high"
    },
    {
      "field": "mcp_server_available",
      "url": "https://learn.microsoft.com/en-us/azure/developer/azure-mcp-server/services/azure-mcp-speech-foundry-tools",
      "excerpt": "Azure Model Context Protocol (MCP) Server enables AI assistants like GitHub Copilot, Claude Desktop, and others to interact with Azure Speech in Foundry Tools through natural language commands.",
      "confidence": "high"
    },
    {
      "field": "pricing_model",
      "url": "https://blocksentient.com/review/microsoft-azure-speech-service/",
      "excerpt": "Standard Real-time: $1/hour. Fast Transcription: $0.36/hour. Batch Transcription: $0.18/hour. Pay-as-You-Go Rates with no mandatory recurring base fee.",
      "confidence": "high"
    },
    {
      "field": "free_tier_available",
      "url": "https://blocksentient.com/review/microsoft-azure-speech-service/",
      "excerpt": "The F0 tier provides users with 5 audio hours free per month for both standard and custom speech-to-text (batch excluded), plus one hosted custom model monthly with automatic decommissioning after 7 days if unused.",
      "confidence": "high"
    },
    {
      "field": "supported_regions",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/regions",
      "excerpt": "Azure Speech allows your application to convert audio to text, perform speech translation, and convert text to speech. Azure Speech is available in multiple regions with unique endpoints for the Speech SDK and REST APIs.",
      "confidence": "high"
    },
    {
      "field": "webhooks_supported",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/batch-transcription-create",
      "excerpt": "Instead of polling for transcription status, you can register a webhook to receive a notification when a transcription job completes (or reaches any other terminal state). The Speech service sends HTTP POST callbacks to your endpoint for transcription.created, transcription.processing, transcription.succeeded, transcription.failed, and transcription.deleted events.",
      "confidence": "high"
    },
    {
      "field": "pricing_model",
      "url": "https://azure.microsoft.com/en-us/pricing/details/speech/",
      "excerpt": null,
      "confidence": "high"
    },
    {
      "field": "has_published_pricing",
      "url": "https://azure.microsoft.com/en-us/pricing/details/speech/",
      "excerpt": null,
      "confidence": "high"
    },
    {
      "field": "free_tier_available",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-quotas-and-limits",
      "excerpt": "For the Free (F0) pricing tier, see the monthly allowances on the pricing page.",
      "confidence": "high"
    },
    {
      "field": "soc2",
      "url": "https://learn.microsoft.com/en-us/azure/compliance/offerings/offering-soc-2",
      "excerpt": "Microsoft Azure, Dynamics 365, and other Microsoft cloud services undergo rigorous independent third-party SOC 2 Type 2 audits conducted by a reputable certified public accountant (CPA) firm.",
      "confidence": "high"
    },
    {
      "field": "gdpr",
      "url": "https://learn.microsoft.com/en-us/legal/cognitive-services/speech-service/speech-to-text/data-privacy-security",
      "excerpt": "This article provides some high-level details regarding how speech to text processes data provided by customers. Note that audio data of humans speaking and the related text transcripts may be considered personal data and/or sensitive data under various privacy regulations and laws.",
      "confidence": "high"
    },
    {
      "field": "pci_dss",
      "url": "https://learn.microsoft.com/en-us/azure/compliance/offerings/offering-pci-dss",
      "excerpt": "Microsoft Azure maintains a PCI DSS validation using an approved Qualified Security Assessor (QSA), and is certified as compliant under PCI DSS version 4.0 at Service Provider Level 1.",
      "confidence": "medium"
    },
    {
      "field": "sla_published",
      "url": "https://www.azure.cn/en-us/support/sla/cognitive-services/",
      "excerpt": "We guarantee that Cognitive Services will be available at least 99.9% of the time.",
      "confidence": "high"
    },
    {
      "field": "sandbox_available",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/overview",
      "excerpt": "Run Azure Speech anywhere, in the cloud or at the edge in containers. Enable your applications, tools, and devices for speech by using the Speech CLI, Speech SDK, and REST APIs.",
      "confidence": "high"
    },
    {
      "field": "api_version",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-speech-to-text",
      "excerpt": "Speech to text REST API version `2025-10-15` is the latest version that's generally available.",
      "confidence": "high"
    },
    {
      "field": "stability",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-speech-to-text",
      "excerpt": "Speech to text REST API version `2025-10-15` is the latest version that's generally available.",
      "confidence": "high"
    },
    {
      "field": "auth_methods",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-speech-to-text-short#authentication",
      "excerpt": "When using the Speech SDK to access the Speech service, there are three authentication methods available: service keys, a key-based token, and Microsoft Entra ID.",
      "confidence": "high"
    },
    {
      "field": "auth_docs_url",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/how-to-configure-azure-ad-auth",
      "excerpt": "This article shows how to use Microsoft Entra authentication with the Speech SDK.",
      "confidence": "high"
    },
    {
      "field": "base_url",
      "url": "https://learn.microsoft.com/en-us/rest/api/speechtotext/web-hooks/create?view=rest-speechtotext-2025-10-15",
      "excerpt": "POST {endpoint}/speechtotext/webhooks?api-version=2025-10-15",
      "confidence": "high"
    },
    {
      "field": "sdk_packages",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-speech-to-text",
      "excerpt": "Python: `azure-cognitiveservices-speech` ... C#/.NET: `Microsoft.CognitiveServices.Speech` (NuGet) ... JavaScript/Node.js: `microsoft-cognitiveservices-speech-sdk` (npm) ... Java: `com.microsoft.cognitiveservices.speech:client-sdk` (Maven)",
      "confidence": "high"
    },
    {
      "field": "webhook_events_url",
      "url": "https://learn.microsoft.com/en-us/rest/api/speechtotext/web-hooks/create?view=rest-speechtotext-2025-10-15",
      "excerpt": "transcriptionCompletion | transcriptionCreation | transcriptionDeletion | transcriptionProcessing | datasetCompletion | datasetCreation | datasetDeletion | datasetProcessing | endpointCompletion | endpointCreation | endpointDeletion | endpointProcessing | evaluationCompletion | evaluationCreation | evaluationDeletion | evaluationProcessing | modelCompletion | modelCreation | modelDeletion | modelProcessing",
      "confidence": "high"
    },
    {
      "field": "rate_limit_requests",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-quotas-and-limits",
      "excerpt": "Concurrent request limit for base model endpoint | 1 This limit isn't adjustable. | 100 (default value)The rate is adjustable for Standard (S0) resources.",
      "confidence": "high"
    },
    {
      "field": "starting_price_usd",
      "url": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/speech-services/",
      "excerpt": null,
      "confidence": "medium"
    },
    {
      "field": "free_tier_limit",
      "url": "https://azure.microsoft.com/en-us/pricing/details/cognitive-services/speech-services/",
      "excerpt": null,
      "confidence": "medium"
    },
    {
      "field": "mcp_url",
      "url": "https://learn.microsoft.com/en-us/azure/developer/azure-mcp-server/services/azure-mcp-speech-foundry-tools",
      "excerpt": "Azure Model Context Protocol (MCP) Server enables AI assistants like GitHub Copilot, Claude Desktop, and others to interact with Azure Speech in Foundry Tools through natural language commands.",
      "confidence": "high"
    },
    {
      "field": "ga_date",
      "url": "https://azure.microsoft.com/en-us/updates/azure-cognitive-services-speech-service-is-now-generally-available/",
      "excerpt": null,
      "confidence": "high"
    },
    {
      "field": "quickstart_url",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/get-started-speech-to-text",
      "excerpt": null,
      "confidence": "high"
    },
    {
      "field": "documented_rate_limits",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-quotas-and-limits",
      "excerpt": "Concurrent request limit for base model endpoint: Free (F0) = 1 (not adjustable); Standard (S0) = 100 (default, adjustable). Fast transcription Maximum requests per minute: Standard (S0) = 600. Batch transcription REST API limit: Standard (S0) = 100 requests per 10 seconds.",
      "confidence": "high"
    },
    {
      "field": "hipaa",
      "url": "https://learn.microsoft.com/en-us/azure/compliance/offerings/offering-hipaa-us",
      "excerpt": "Azure has enabled the physical, technical, and administrative safeguards required by HIPAA and the HITECH Act inside the in-scope Azure services, and offers a HIPAA BAA as part of the Microsoft Product Terms to all customers who are covered entities or business associates under HIPAA.",
      "confidence": "high"
    },
    {
      "field": "iso_27001",
      "url": "https://learn.microsoft.com/en-us/azure/compliance/offerings/offering-hipaa-us",
      "excerpt": "Both Azure and Azure Government align with the NIST CSF and are certified under ISO/IEC 27001.",
      "confidence": "high"
    },
    {
      "field": "pci_dss",
      "url": "https://learn.microsoft.com/en-us/azure/compliance/offerings/cloud-services-in-audit-scope",
      "excerpt": "See Appendices A and B in Microsoft Azure Compliance Offerings for detailed insight into which cloud services are in scope for the following compliance offerings: PCI DSS.",
      "confidence": "medium"
    },
    {
      "field": "sla_published",
      "url": "https://azure.microsoft.com/en-us/support/legal/sla/cognitive-services/v1_1/",
      "excerpt": null,
      "confidence": "high"
    },
    {
      "field": "known_restrictions",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-quotas-and-limits",
      "excerpt": "Maximum audio input file size (fast transcription): < 500 MB. Maximum audio length (fast transcription): < 5 hours per file. Maximum audio length for transcriptions with diarization enabled: 240 minutes per file. Maximum number of files per transcription request: 1,000. Maximum file size for audio input (batch): 1 GB.",
      "confidence": "high"
    },
    {
      "field": "versioning_scheme",
      "url": "https://learn.microsoft.com/en-us/rest/api/speechtotext/web-hooks/create?view=rest-speechtotext-2025-10-15",
      "excerpt": "POST {endpoint}/speechtotext/webhooks?api-version=2025-10-15",
      "confidence": "high"
    },
    {
      "field": "deprecation_policy_url",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-speech-to-text",
      "excerpt": "Speech to text REST API `v3.0`, `3.2-preview.1`, and `3.2-preview.2` were retired on March 31, 2026.",
      "confidence": "high"
    },
    {
      "field": "auth_methods",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-speech-to-text-short",
      "excerpt": "Either this header or `Ocp-Apim-Subscription-Key` is required. `Authorization` | An authorization token preceded by the word `Bearer`.",
      "confidence": "high"
    },
    {
      "field": "base_url",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/rest-speech-to-text-short",
      "excerpt": "The endpoint for the REST API for short audio has this format: `https://YourResourceName.cognitiveservices.azure.com/stt/speech/recognition/conversation/cognitiveservices/v1`",
      "confidence": "high"
    },
    {
      "field": "sdk_packages",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-sdk",
      "excerpt": "C# | .NET | Windows, Linux, macOS, Mono, UWP ... Go | Go | Linux ... Java | Java | Android, Windows, Linux, macOS ... JavaScript | JavaScript | Browser, Node.js ... Python | Python | Windows, Linux, macOS",
      "confidence": "high"
    },
    {
      "field": "webhook_signing",
      "url": "https://learn.microsoft.com/en-us/rest/api/speechtotext/web-hooks/create?view=rest-speechtotext-2025-10-15",
      "excerpt": "Creates a new web hook. If the property secret in the configuration is present and contains a non-empty string, it will be used to create a SHA256 hash of the payload with the secret as HMAC key. This hash will be set as X-MicrosoftSpeechServices-Signature header when calling back into the registered URL.",
      "confidence": "high"
    },
    {
      "field": "error_format",
      "url": "https://learn.microsoft.com/en-us/rest/api/speechtotext/web-hooks/create?view=rest-speechtotext-2025-10-15",
      "excerpt": "Error | Name: code | Type: ErrorCode | Description: High level error codes. ... Name: innerError | Type: InnerError | Description: New Inner Error format which conforms to Cognitive Services API Guidelines",
      "confidence": "high"
    },
    {
      "field": "rate_limit_window",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-services-quotas-and-limits",
      "excerpt": "These limits apply to concurrent real-time speech-to-text requests and speech translation requests *combined*.",
      "confidence": "high"
    },
    {
      "field": "price_basis",
      "url": "https://learn.microsoft.com/en-us/answers/questions/2155625/speech-to-text-costing-1-hr-is-crazy-no-bulk-avail",
      "excerpt": null,
      "confidence": "medium"
    },
    {
      "field": "data_retention_policy_url",
      "url": "https://learn.microsoft.com/en-us/azure/foundry/responsible-ai/speech-service/speech-to-text/data-privacy-security",
      "excerpt": "When doing real-time speech to text, fast transcription, pronunciation assessment, and speech translation, Microsoft does not retain or store the data provided by customers.",
      "confidence": "high"
    },
    {
      "field": "notable_customers",
      "url": "https://learn.microsoft.com/en-us/azure/ai-services/speech-service/overview",
      "excerpt": "Microsoft uses Azure Speech for many scenarios, such as captioning in Microsoft Teams, dictation in Microsoft Office 365, and Read Aloud in the Microsoft Edge browser.",
      "confidence": "high"
    },
    {
      "field": "sla_url",
      "url": "https://azure.microsoft.com/en-us/support/legal/sla/cognitive-services/v1_1/",
      "excerpt": null,
      "confidence": "medium"
    }
  ],
  "fields_not_found": [
    "minimum_commitment",
    "exact_published_SLA_percentage_in_v1_1_doc (99.9% confirmed from azure.cn SLA mirror)",
    "idempotency_supported",
    "sla_uptime_percentage"
  ],
  "source_confidence": "high",
  "last_verified_at": "2026-06-21T00:00:00.000Z",
  "contribute": {
    "note": "This data is crowd-editable. Suggest corrections or leave a review via these endpoints - no auth required, requests are rate-limited, and every submission is reviewed before it goes live. For a field edit, {field} is any key in this response and a citation is required.",
    "endpoints": [
      {
        "action": "review",
        "label": "Leave a review or comment",
        "method": "POST",
        "url": "https://apio.sh/api/feedback/azure-speech-to-text",
        "example_body": {
          "kind": "review",
          "rating": 5,
          "body": "Your experience with this API…"
        }
      },
      {
        "action": "field_edit",
        "label": "Suggest a correction to a field (cite a source)",
        "method": "POST",
        "url": "https://apio.sh/api/suggest/azure-speech-to-text/{field}",
        "example_body": {
          "value": "corrected value",
          "citations": [
            {
              "url": "https://source.example/page",
              "excerpt": "supporting quote"
            }
          ],
          "note": "what changed and why"
        }
      },
      {
        "action": "new_api",
        "label": "Suggest a new API to add (only name is required - send as much as you can cite)",
        "method": "POST",
        "url": "https://apio.sh/api/suggest/api",
        "example_body": {
          "value": {
            "name": "Acme Email API",
            "website_url": "https://acme.example",
            "vendor_name": "Acme",
            "categories": [
              "email"
            ],
            "pricing_model": "usage_based",
            "has_published_pricing": true,
            "free_tier_available": true,
            "price_points": [
              {
                "plan": "Pay as you go",
                "item": "1,000 emails",
                "amount_usd": 1,
                "per": "1,000 emails",
                "source_url": "https://acme.example/pricing"
              }
            ],
            "soc2": "type_2",
            "docs_url": "https://docs.acme.example"
          },
          "citations": [
            {
              "url": "https://acme.example/pricing",
              "excerpt": "$1 per 1,000 emails"
            }
          ]
        },
        "value_schema": [
          {
            "field": "name",
            "type": "string",
            "label": "Product name (the only required field)"
          },
          {
            "field": "website_url",
            "type": "string",
            "label": "Marketing website URL"
          },
          {
            "field": "vendor_name",
            "type": "string",
            "label": "Company/vendor name behind the product"
          },
          {
            "field": "vendor_description",
            "type": "string",
            "label": "One-line description, quoted from the vendor"
          },
          {
            "field": "categories",
            "type": "string[]",
            "label": "Category/leaf slugs it belongs to, e.g. [\"email\",\"sms\"]"
          },
          {
            "field": "primary_use_cases",
            "type": "string[]",
            "label": "What it's used for"
          },
          {
            "field": "supported_actions",
            "type": "string[]",
            "label": "Concrete API actions/capabilities"
          },
          {
            "field": "supported_regions",
            "type": "string[]",
            "label": "Countries/regions/data-residency"
          },
          {
            "field": "supported_languages",
            "type": "string[]",
            "label": "Human/spoken languages supported"
          },
          {
            "field": "input_types",
            "type": "string[]",
            "label": "Accepted input types/formats"
          },
          {
            "field": "output_types",
            "type": "string[]",
            "label": "Produced output types/formats"
          },
          {
            "field": "webhooks_supported",
            "type": "bool",
            "label": "Supports webhooks?"
          },
          {
            "field": "sandbox_available",
            "type": "bool",
            "label": "Dedicated sandbox/test mode?"
          },
          {
            "field": "sdk_languages",
            "type": "string[]",
            "label": "Languages with an official SDK"
          },
          {
            "field": "mcp_server_available",
            "type": "bool",
            "label": "Official MCP server?"
          },
          {
            "field": "pricing_model",
            "type": "enum",
            "label": "Pricing model",
            "values": [
              "usage_based",
              "subscription",
              "seat_based",
              "hybrid",
              "contact_sales"
            ]
          },
          {
            "field": "has_published_pricing",
            "type": "bool",
            "label": "Concrete prices published publicly?"
          },
          {
            "field": "free_tier_available",
            "type": "bool",
            "label": "Recurring/perpetual free tier (not a trial)?"
          },
          {
            "field": "free_tier_details",
            "type": "string",
            "label": "What the free tier includes"
          },
          {
            "field": "minimum_commitment",
            "type": "string",
            "label": "Any stated minimum spend/commitment"
          },
          {
            "field": "self_serve_signup",
            "type": "bool",
            "label": "Get an API key without talking to sales?"
          },
          {
            "field": "requires_sales_call",
            "type": "bool",
            "label": "Does going live require sales?"
          },
          {
            "field": "enterprise_plan_available",
            "type": "bool",
            "label": "Named enterprise tier?"
          },
          {
            "field": "starting_price_usd",
            "type": "number",
            "label": "Lowest published paid entry price (USD number)"
          },
          {
            "field": "price_basis",
            "type": "string",
            "label": "Unit the starting price is per, e.g. '1,000 emails'"
          },
          {
            "field": "free_tier_limit",
            "type": "string",
            "label": "Free-tier ceiling in literal terms"
          },
          {
            "field": "price_points",
            "type": "object[]",
            "label": "Structured prices: [{plan,item,amount_usd,amount_percent,per,source_url}]"
          },
          {
            "field": "soc2",
            "type": "enum",
            "label": "SOC 2 status",
            "values": [
              "type_2",
              "type_1",
              "in_progress",
              "none",
              "unknown"
            ]
          },
          {
            "field": "hipaa",
            "type": "bool",
            "label": "HIPAA (e.g. BAA available)?"
          },
          {
            "field": "gdpr",
            "type": "bool",
            "label": "GDPR compliance stated?"
          },
          {
            "field": "iso_27001",
            "type": "bool",
            "label": "ISO 27001 certified?"
          },
          {
            "field": "pci_dss",
            "type": "bool",
            "label": "PCI DSS compliant?"
          },
          {
            "field": "sla_published",
            "type": "bool",
            "label": "Published SLA / uptime commitment?"
          },
          {
            "field": "documented_rate_limits",
            "type": "string",
            "label": "Documented rate limits, quoted"
          },
          {
            "field": "known_restrictions",
            "type": "string[]",
            "label": "Notable documented restrictions/limits"
          },
          {
            "field": "auth_methods",
            "type": "string[]",
            "label": "Auth methods",
            "values": [
              "api_key",
              "oauth2",
              "jwt",
              "basic",
              "hmac_signature",
              "mtls",
              "session"
            ]
          },
          {
            "field": "api_style",
            "type": "enum",
            "label": "Primary API style",
            "values": [
              "rest",
              "graphql",
              "grpc",
              "soap",
              "websocket"
            ]
          },
          {
            "field": "base_url",
            "type": "string",
            "label": "API root/base URL"
          },
          {
            "field": "api_version",
            "type": "string",
            "label": "Current API version string"
          },
          {
            "field": "quickstart_url",
            "type": "string",
            "label": "Quickstart / hello-world URL"
          },
          {
            "field": "docs_url",
            "type": "string",
            "label": "Developer docs URL"
          },
          {
            "field": "api_reference_url",
            "type": "string",
            "label": "API reference URL"
          },
          {
            "field": "openapi_spec_url",
            "type": "string",
            "label": "OpenAPI spec URL"
          },
          {
            "field": "changelog_url",
            "type": "string",
            "label": "Changelog URL"
          },
          {
            "field": "status_page_url",
            "type": "string",
            "label": "Status page URL"
          },
          {
            "field": "notable_customers",
            "type": "string[]",
            "label": "Named public customers"
          },
          {
            "field": "launched_at",
            "type": "string",
            "label": "Launch date (ISO date or year)"
          }
        ]
      },
      {
        "action": "token",
        "label": "Optional - raise your rate limit",
        "method": "GET",
        "url": "https://apio.sh/api/feedback/token"
      }
    ]
  }
}