{
  "schema": "lmruntime.package-catalog.v3",
  "version_policy": "Public documentation and install commands intentionally omit UAIX.LmRuntime package version numbers. NuGet remains authoritative for current release metadata.",
  "packages": {
    "abstractions": {
      "route": "package-abstractions",
      "id": "UAIX.LmRuntime.Abstractions",
      "name": "Abstractions",
      "tagline": "Stable runtime contracts, request and response models, diagnostics, and governance interfaces.",
      "description": "Stable public interfaces, canonical inference contracts, runtime settings, and diagnostics for pure C# local LLM runtime packages.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Abstractions",
      "dependencies": [],
      "audience": "Runtime authors, adapter authors, orchestration layers, and applications that need common contracts without taking a model-format or execution dependency.",
      "install_when": [
        "Implementing IInferenceRuntime, IInferenceSession, IModelAdapter, ITokenizer, or IUaiMemoryStore.",
        "Canonical chat, tool, streaming, token-count, usage, error, and runtime-setting contracts.",
        "Budget, claim-boundary, constraint, review-gate, memory-firewall, or Teleodynamic governance contracts."
      ],
      "not_for": [
        "Reading GGUF files, running tensor kernels, or generating tokens by itself.",
        "Granting tools, commands, network access, or execution authority through memory or governance metadata."
      ],
      "key_types": [
        "IInferenceRuntime",
        "IInferenceSession",
        "IModelAdapter",
        "ITokenizer",
        "InferenceRequest",
        "InferenceResponse",
        "LlmMessage",
        "StreamingDelta",
        "RuntimeOptions",
        "IRuntimeBudgetGovernor",
        "IReviewGatePolicy",
        "IMemoryFirewall"
      ],
      "concepts": [
        {
          "title": "Contracts first",
          "text": "Applications can depend on stable requests, responses, messages, usage records, streaming deltas, model descriptors, tools, and response formats while leaving the concrete runtime replaceable."
        },
        {
          "title": "Async and streaming",
          "text": "The inference contract exposes both a complete response and IAsyncEnumerable streaming deltas, with cooperative CancellationToken flow."
        },
        {
          "title": "Governance is explicit",
          "text": "Budget, review, evidence, claim-boundary, constraint, quarantine, and memory-firewall APIs are separate policies. They do not silently alter model mathematics."
        }
      ],
      "examples": [
        {
          "title": "Create and execute an inference request",
          "description": "Accept any IInferenceRuntime implementation and construct a provider-neutral request.",
          "filename": "InferenceExample.cs",
          "code": "using UAIX.LmRuntime.Abstractions;\nusing UAIX.LmRuntime.Contracts;\n\npublic static class InferenceExample\n{\n    /// <summary>\n    /// Executes one provider-neutral inference request.\n    /// </summary>\n    /// <param name=\"runtime\">The runtime implementation that will execute the request.</param>\n    /// <param name=\"model\">The model identifier understood by the runtime.</param>\n    /// <param name=\"prompt\">The user prompt to submit.</param>\n    /// <param name=\"cancellationToken\">A token used to cancel the operation.</param>\n    /// <returns>The normalized inference response.</returns>\n    public static Task<InferenceResponse> GenerateAsync(\n        IInferenceRuntime runtime,\n        string model,\n        string prompt,\n        CancellationToken cancellationToken)\n    {\n        ArgumentNullException.ThrowIfNull(runtime);\n        ArgumentException.ThrowIfNullOrWhiteSpace(model);\n        ArgumentException.ThrowIfNullOrWhiteSpace(prompt);\n\n        var request = new InferenceRequest\n        {\n            Model = model,\n            Messages =\n            [\n                LlmMessage.System(\"Answer directly and identify uncertainty.\"),\n                LlmMessage.User(prompt)\n            ],\n            MaxOutputTokens = 256,\n            Temperature = 0,\n            TopP = 1,\n            UseMemory = false\n        };\n\n        return runtime.GenerateAsync(request, cancellationToken);\n    }\n}"
        },
        {
          "title": "Consume streaming deltas",
          "description": "Stream text without coupling the caller to a concrete runtime implementation.",
          "filename": "StreamingExample.cs",
          "code": "using System.Runtime.CompilerServices;\nusing UAIX.LmRuntime.Abstractions;\nusing UAIX.LmRuntime.Contracts;\n\npublic static class StreamingExample\n{\n    /// <summary>\n    /// Yields non-empty text fragments from a streaming inference operation.\n    /// </summary>\n    /// <param name=\"runtime\">The runtime implementation that produces deltas.</param>\n    /// <param name=\"request\">The normalized inference request.</param>\n    /// <param name=\"cancellationToken\">A token used to cancel enumeration and generation.</param>\n    /// <returns>An asynchronous sequence of text fragments.</returns>\n    public static async IAsyncEnumerable<string> StreamTextAsync(\n        IInferenceRuntime runtime,\n        InferenceRequest request,\n        [EnumeratorCancellation] CancellationToken cancellationToken)\n    {\n        ArgumentNullException.ThrowIfNull(runtime);\n        ArgumentNullException.ThrowIfNull(request);\n\n        await foreach (StreamingDelta delta in runtime\n            .StreamAsync(request, cancellationToken)\n            .WithCancellation(cancellationToken))\n        {\n            if (!string.IsNullOrEmpty(delta.Text))\n            {\n                yield return delta.Text;\n            }\n        }\n    }\n}"
        },
        {
          "title": "Count chat tokens through the common tokenizer contract",
          "description": "Use ITokenizer when an orchestration layer should not know which concrete tokenizer is active.",
          "filename": "TokenCountingExample.cs",
          "code": "using UAIX.LmRuntime.Abstractions;\nusing UAIX.LmRuntime.Contracts;\n\n/// <summary>\n/// Counts the tokens consumed by a normalized chat transcript.\n/// </summary>\n/// <param name=\"tokenizer\">The tokenizer implementation used by the target model.</param>\n/// <param name=\"messages\">The ordered chat messages to count.</param>\n/// <returns>The token count and tokenizer identity.</returns>\nstatic TokenCountResult CountChatTokens(\n    ITokenizer tokenizer,\n    IReadOnlyList<LlmMessage> messages)\n{\n    ArgumentNullException.ThrowIfNull(tokenizer);\n    ArgumentNullException.ThrowIfNull(messages);\n\n    return tokenizer.CountTokens(messages);\n}"
        },
        {
          "title": "Configure explicit runtime-policy defaults",
          "description": "RuntimeOptions exposes governance and resource settings as normal application configuration; enabling a flag does not substitute for implementing the corresponding policy.",
          "filename": "RuntimeOptionsExample.cs",
          "code": "using UAIX.LmRuntime.Contracts;\n\nvar options = new RuntimeOptions\n{\n    DefaultModel = \"local-model\",\n    MaxContextTokens = 8_192,\n    MaxMemoryEntries = 0,\n    EnableTeleodynamicGovernance = false,\n    EnableConstraintPolicy = true,\n    EnableClaimBoundaryPolicy = true,\n    EnableReviewGatePolicy = true,\n    ReturnNoOpResponseOnGovernanceDenial = true,\n    ReturnNoOpResponseOnClaimBoundaryViolation = true,\n    ReturnNoOpResponseOnReviewGateRequired = true,\n    QuarantineGeneratedNeedsHumanReview = true,\n    FailOnEvidenceLedgerError = true\n};",
          "notes": "Policy flags describe orchestration intent. The application must supply and test concrete policy implementations."
        }
      ],
      "faq": [
        {
          "q": "Can I generate text with only this package?",
          "a": "No. It defines contracts and policy interfaces. Use LocalEndpoint for the high-level local GGUF facade, or compose the lower-level GGUF, tokenization, kernel, sampling, and LLaMA packages."
        },
        {
          "q": "Are the governance interfaces mandatory?",
          "a": "No. They are explicit policy surfaces. A runtime should advertise and test the policies it actually applies rather than implying that contract presence equals enforcement."
        },
        {
          "q": "Does a memory object grant model, command, network, or tool authority?",
          "a": "No. Memory and governance evidence remain data. Execution authority belongs to separately implemented and user-approved application gates."
        }
      ],
      "required_for": "runtime-neutral contracts",
      "group": "Foundations"
    },
    "tensors": {
      "route": "package-tensors",
      "id": "UAIX.LmRuntime.Tensors",
      "name": "Tensors",
      "tagline": "Tensor shapes, data types, GGML storage traits, quantized-block metadata, and reference vector math.",
      "description": "Tensor descriptors, memory descriptors, layouts, and quantization metadata for pure C# local LLM runtime packages.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Tensors",
      "dependencies": [],
      "audience": "Model-format parsers, kernel authors, tensor inspectors, and tests that need shared storage and quantization semantics.",
      "install_when": [
        "You need TensorShape, TensorDataType, GgmlTensorType, ITensor, or shared byte-length calculations.",
        "You are validating GGML/GGUF quantized block layouts.",
        "You need small reference Dot or RmsNorm operations for correctness tests."
      ],
      "not_for": [
        "Owning model-file lifetime, tokenization, sampling, or LLaMA execution.",
        "Assuming that a storage type is executable merely because its traits are represented."
      ],
      "key_types": [
        "TensorShape",
        "GgmlTensorType",
        "TensorDataType",
        "TensorTypeTraits",
        "TensorTypeTraitsCatalog",
        "QuantizedBlockTraits",
        "ITensor",
        "VectorMath"
      ],
      "concepts": [
        {
          "title": "Logical shape",
          "text": "TensorShape keeps dimensions and checked element-count semantics separate from any particular allocation or model file."
        },
        {
          "title": "Storage traits",
          "text": "TensorTypeTraitsCatalog maps GGML storage identifiers to block element counts, block byte counts, logical data types, and quantization status."
        },
        {
          "title": "Reference math",
          "text": "VectorMath supplies small, legible operations useful for validation and parity checks; optimized model execution belongs in the kernel package."
        }
      ],
      "examples": [
        {
          "title": "Calculate tensor storage requirements",
          "description": "Derive element count and encoded byte length from a logical shape and GGML storage type.",
          "filename": "TensorStorageExample.cs",
          "code": "using UAIX.LmRuntime.Tensors;\n\nTensorShape shape = TensorShape.From(4_096, 4_096);\nTensorTypeTraits traits = TensorTypeTraitsCatalog.Get(GgmlTensorType.Q4_0);\n\nulong byteLength = TensorTypeTraitsCatalog.ComputeByteLength(\n    GgmlTensorType.Q4_0,\n    checked((ulong)shape.ElementCount));\n\nConsole.WriteLine($\"Elements: {shape.ElementCount:N0}\");\nConsole.WriteLine($\"Block elements: {traits.BlockElementCount}\");\nConsole.WriteLine($\"Block bytes: {traits.BlockByteCount}\");\nConsole.WriteLine($\"Encoded bytes: {byteLength:N0}\");"
        },
        {
          "title": "Run reference vector operations",
          "description": "Use span-based reference math without allocating intermediate arrays.",
          "filename": "VectorMathExample.cs",
          "code": "using UAIX.LmRuntime.Tensors;\n\nReadOnlySpan<float> left = [1.0f, 2.0f, 3.0f, 4.0f];\nReadOnlySpan<float> right = [0.5f, 0.25f, -1.0f, 2.0f];\n\nfloat dot = VectorMath.Dot(left, right);\n\nReadOnlySpan<float> input = [1.0f, 2.0f, 3.0f, 4.0f];\nReadOnlySpan<float> weight = [1.0f, 1.0f, 1.0f, 1.0f];\nSpan<float> normalized = stackalloc float[input.Length];\n\nVectorMath.RmsNorm(input, weight, normalized, epsilon: 1e-5f);"
        },
        {
          "title": "Inspect quantized block metadata",
          "description": "Query a quantized layout before accepting tensor dimensions or allocating a destination buffer.",
          "filename": "QuantizedTraitExample.cs",
          "code": "using UAIX.LmRuntime.Tensors;\n\nQuantizedBlockTrait trait = QuantizedBlockTraits.Get(GgmlTensorType.Q5_0);\n\nConsole.WriteLine(trait.GgmlType);\nConsole.WriteLine($\"Elements per block: {trait.BlockElementCount}\");\nConsole.WriteLine($\"Bytes per block: {trait.BlockByteCount}\");"
        }
      ],
      "faq": [
        {
          "q": "Does ComputeByteLength dequantize a tensor?",
          "a": "No. It calculates storage size from the registered block traits. Dequantization and matrix operations are in Kernels.Cpu."
        },
        {
          "q": "Why are logical element count and encoded byte length separate?",
          "a": "Quantized formats encode a block of logical values into a smaller fixed-size block. Treating encoded bytes as scalar elements causes range, alignment, and allocation errors."
        },
        {
          "q": "Does ITensor own memory?",
          "a": "The interface describes shape and data type only. Concrete ownership and disposal rules belong to the implementing type."
        }
      ],
      "required_for": "tensor layout and storage metadata",
      "group": "Foundations"
    },
    "acceleration": {
      "route": "package-acceleration",
      "id": "UAIX.LmRuntime.Acceleration",
      "name": "Acceleration",
      "tagline": "Explicit backend contracts, registration, capability declarations, local probing, deterministic selection, and visible CPU fallback.",
      "description": "Acceleration owns the runtime-neutral control plane for local execution backends. Hosts register backend instances, probe them in stable order, choose with a declared policy, and receive the selected backend, device, capabilities, diagnostics, and CPU-fallback state. Installing a backend package does not register it or grant execution authority.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Acceleration",
      "dependencies": [],
      "audience": "Host authors, runtime integrators, diagnostics tooling, and applications that must distinguish declared compatibility from proven local availability.",
      "install_when": [
        "You need an explicit registry of local execution backends.",
        "You need CPU/GPU selection policies with visible fallback state.",
        "You need capability, device, runtime-identifier, native-asset, and probe diagnostics.",
        "You are implementing a host-owned backend adapter against IRuntimeBackend."
      ],
      "not_for": [
        "It does not execute a GGUF model or perform tensor math.",
        "It does not load drivers, download assets, invoke provider APIs, start subprocesses, or perform remote inference.",
        "A capability declaration is not proof that a backend is available on the current machine."
      ],
      "key_types": [
        "IRuntimeBackend",
        "IRuntimeBackendRegistry",
        "IRuntimeBackendSelector",
        "RuntimeBackendRegistry",
        "RuntimeBackendSelector",
        "RuntimeBackendOptions",
        "RuntimeSelectionPolicy",
        "RuntimeSelectionResult",
        "RuntimeBackendProbeResult",
        "RuntimeBackendCapabilities",
        "RuntimeDeviceDescriptor",
        "DiagnosticRuntimeBackend"
      ],
      "concepts": [
        {
          "title": "Registration is explicit",
          "text": "The host controls which backend instances enter the registry. A package reference alone does not make a backend selectable."
        },
        {
          "title": "Selection is evidence-bearing",
          "text": "The result identifies the backend, device, capabilities, diagnostics, and whether a GPU-prefer policy used CPU fallback."
        },
        {
          "title": "Require policies fail closed",
          "text": "RequireGpu and RequireBackendId return a failed selection when the named execution class cannot prove availability."
        }
      ],
      "examples": [
        {
          "title": "Register and select the managed CPU backend",
          "description": "Compose Acceleration with Backends.CpuManaged and require a CPU-capable local backend.",
          "filename": "RequireCpuExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.CpuManaged;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixCpuManagedBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireCpu\n    });\n\nif (!result.Succeeded)\n{\n    throw new InvalidOperationException(result.FailureReason);\n}\n\nConsole.WriteLine(result.SelectedBackendId);\nConsole.WriteLine(result.SelectedDevice?.DeviceId);\n",
          "notes": "This composition requires the Backends.CpuManaged package. Selection does not load a model; it establishes explicit backend identity."
        },
        {
          "title": "Prefer CUDA and expose managed CPU fallback",
          "description": "Register a diagnostic CUDA path and a working managed CPU path, then observe the fallback bit.",
          "filename": "PreferGpuFallbackExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.CpuManaged;\nusing UAIX.LmRuntime.Backends.Cuda;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixCudaBackend();\nregistry.AddUaixCpuManagedBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.PreferGpu,\n        AllowCpuFallback = true\n    });\n\nConsole.WriteLine($\"Succeeded: {result.Succeeded}\");\nConsole.WriteLine($\"Backend: {result.SelectedBackendId}\");\nConsole.WriteLine($\"CPU fallback: {result.UsedCpuFallback}\");\nforeach (string diagnostic in result.Diagnostics)\n{\n    Console.WriteLine(diagnostic);\n}\n",
          "notes": "The supplied CUDA registration is diagnostic and probes unavailable until a native adapter proves assets, runtime libraries, and a device."
        },
        {
          "title": "Require GPU and fail without proven availability",
          "description": "Use a require policy when CPU substitution would violate the host workload contract.",
          "filename": "RequireGpuExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Cuda;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixCudaBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireGpu,\n        AllowCpuFallback = false,\n        RequireNativeAssets = true\n    });\n\nif (!result.Succeeded)\n{\n    Console.Error.WriteLine(result.FailureReason);\n    foreach (string diagnostic in result.Diagnostics)\n    {\n        Console.Error.WriteLine(diagnostic);\n    }\n}\n",
          "notes": "RequireGpu never reports a managed CPU selection as GPU execution."
        },
        {
          "title": "Probe every registered backend directly",
          "description": "Build a diagnostics view without asking the selector to choose an execution path.",
          "filename": "ProbeRegistryExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.CpuManaged;\nusing UAIX.LmRuntime.Backends.DirectML;\nusing UAIX.LmRuntime.Backends.Vulkan;\n\nvar registry = new RuntimeBackendRegistry();\nregistry\n    .AddUaixCpuManagedBackend()\n    .AddUaixDirectMlBackend()\n    .AddUaixVulkanBackend();\n\nvar probeSettings = new RuntimeBackendOptions\n{\n    RequestedRuntimeIdentifier = System.Runtime.InteropServices.RuntimeInformation.RuntimeIdentifier\n};\n\nforeach (IRuntimeBackend backend in registry.GetBackends())\n{\n    RuntimeBackendProbeResult probe = await backend.ProbeAsync(probeSettings);\n    Console.WriteLine($\"{backend.Id}: available={probe.IsAvailable}\");\n    foreach (string diagnostic in probe.Diagnostics)\n    {\n        Console.WriteLine($\"  {diagnostic}\");\n    }\n}\n",
          "notes": "Probing is local and diagnostic. The registry order is preserved so reports and selection remain deterministic."
        },
        {
          "title": "Select one required backend identifier",
          "description": "Pin selection to the stable backend ID rather than relying on registration order.",
          "filename": "RequireBackendIdExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.CpuManaged;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixCpuManagedBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireBackendId,\n        PreferredBackendId = CpuManagedRuntimeBackend.BackendId\n    });\n\nConsole.WriteLine(result.Succeeded\n    ? $\"Selected {result.SelectedBackendId}\"\n    : result.FailureReason);\n",
          "notes": "RequireBackendId returns failure when the ID is empty, unregistered, or unavailable."
        },
        {
          "title": "Inspect declared capabilities without treating them as availability",
          "description": "Read backend capability metadata and then check the independent probe result.",
          "filename": "CapabilityAndProbeExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Cuda;\n\nIRuntimeBackend backend = new CudaRuntimeBackend();\nRuntimeBackendCapabilities declared = backend.Capabilities;\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(new RuntimeBackendOptions());\n\nConsole.WriteLine($\"API: {declared.BackendApiName}\");\nConsole.WriteLine($\"Declares GPU execution: {declared.SupportsGpuExecution}\");\nConsole.WriteLine($\"Native asset state: {declared.NativeAssetState}\");\nConsole.WriteLine($\"Available now: {probe.IsAvailable}\");\n",
          "notes": "Capabilities describe the backend family. Probe evidence decides whether execution is available in the current host."
        }
      ],
      "faq": [
        {
          "q": "Does installing a backend register it?",
          "a": "No. The host must construct or add the backend to an IRuntimeBackendRegistry."
        },
        {
          "q": "Can PreferGpu silently run on CPU?",
          "a": "No. CPU fallback requires AllowCpuFallback and is reported through UsedCpuFallback and diagnostics."
        },
        {
          "q": "Does Acceleration execute inference?",
          "a": "No. It owns contracts, registration, probing, selection, and evidence. Model execution remains in the execution packages."
        },
        {
          "q": "What should a custom backend prove?",
          "a": "Its probe should distinguish native assets, runtime libraries, drivers, devices, policy restrictions, and selected device identity without exposing private prompt or path content."
        }
      ],
      "required_for": "explicit backend registration, probing, selection, and fallback evidence",
      "group": "Backend selection"
    },
    "gguf": {
      "route": "package-gguf",
      "id": "UAIX.LmRuntime.Gguf",
      "name": "GGUF",
      "tagline": "Bounded GGUF parsing, metadata and tensor catalogs, strict validation, sharding, hashing, and mapped tensor access.",
      "description": "GGUF parser, metadata model, tensor catalog, sharding, and validation for pure C# local LLM runtime packages.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Gguf",
      "dependencies": [
        "UAIX.LmRuntime.Tensors"
      ],
      "audience": "Applications that inspect or validate GGUF artifacts, tokenizer/model loaders, and low-level mapped-tensor consumers.",
      "install_when": [
        "You need to parse GGUF metadata and tensor descriptors without starting inference.",
        "You need bounded strict validation, shard information, artifact classification, or SHA-256 identity.",
        "You need mapped, segmented, or copied access to tensor payload bytes."
      ],
      "not_for": [
        "Selecting a tokenizer, binding a LLaMA graph, or generating output by itself.",
        "Treating all syntactically valid GGUF files as supported model architectures."
      ],
      "key_types": [
        "GgufReader",
        "GgufParseOptions",
        "GgufModel",
        "GgufStrictValidator",
        "GgufTensorDescriptor",
        "GgufHashingReader",
        "MappedGgufFile",
        "MappedTensorView",
        "Float32TensorReader",
        "SegmentedModelFileReader"
      ],
      "concepts": [
        {
          "title": "Bounded intake",
          "text": "GgufParseOptions places explicit limits on tensor counts, metadata counts, dimensions, strings, arrays, and nesting before dependent work proceeds."
        },
        {
          "title": "Structure before architecture",
          "text": "GgufModel represents container metadata and tensor descriptors. A structurally valid file can still be unsupported by a tokenizer or model-family loader."
        },
        {
          "title": "Mapped ownership",
          "text": "MappedGgufFile owns the mapping lifetime. Tensor spans and views must not outlive the owner, and callers should dispose the mapping deterministically."
        }
      ],
      "examples": [
        {
          "title": "Strictly validate a GGUF file",
          "description": "Return all structural validation errors without moving immediately into model execution.",
          "filename": "GgufValidationExample.cs",
          "code": "using UAIX.LmRuntime.Gguf;\n\nvar options = new GgufParseOptions\n{\n    MaxTensorCount = 100_000,\n    MaxMetadataCount = 100_000,\n    MaxDimensionCount = 8,\n    MaxStringBytes = 16 * 1024 * 1024,\n    MaxArrayLength = 10_000_000,\n    MaxArrayDepth = 8\n};\n\nGgufValidationReport report =\n    GgufStrictValidator.Validate(\"models/model.gguf\", options);\n\nif (!report.IsValid)\n{\n    foreach (GgufValidationError error in report.Errors)\n    {\n        Console.Error.WriteLine(\n            $\"{error.Code}: {error.Message} @ {error.ByteOffset?.ToString() ?? \"n/a\"}\");\n    }\n\n    return;\n}\n\nConsole.WriteLine($\"Tensors: {report.Model.Tensors.Count}\");\nConsole.WriteLine($\"Metadata entries: {report.Model.Metadata.Count}\");"
        },
        {
          "title": "Inspect model metadata and tensors",
          "description": "Parse the container and enumerate the metadata/tensor catalog without materializing model weights.",
          "filename": "GgufInspectionExample.cs",
          "code": "using UAIX.LmRuntime.Gguf;\n\nGgufModel model = GgufReader.Read(\n    \"models/model.gguf\",\n    new GgufParseOptions());\n\nConsole.WriteLine($\"Artifact kind: {model.ArtifactKind}\");\nConsole.WriteLine($\"GGUF container version: {model.Version}\");\nConsole.WriteLine($\"Tensor data offset: {model.TensorDataOffset}\");\nConsole.WriteLine($\"Alignment: {model.Alignment}\");\n\nif (model.TryGetString(\"general.name\", out string? modelName))\n{\n    Console.WriteLine($\"Model name: {modelName}\");\n}\n\nforeach (GgufTensorDescriptor tensor in model.Tensors)\n{\n    Console.WriteLine(\n        $\"{tensor.Name}: {tensor.GgmlType}, \" +\n        $\"{tensor.ElementCount:N0} elements, {tensor.ByteLength:N0} bytes\");\n}"
        },
        {
          "title": "Compute and compare a model identity",
          "description": "Calculate SHA-256 before accepting a local artifact into a trusted model catalog.",
          "filename": "GgufHashExample.cs",
          "code": "using UAIX.LmRuntime.Gguf;\n\npublic static class GgufIdentityVerifier\n{\n    /// <summary>\n    /// Computes the current GGUF digest and rejects an artifact that does not match the trusted identity.\n    /// </summary>\n    /// <param name=\"modelPath\">The local GGUF path to hash.</param>\n    /// <param name=\"expectedSha256\">The expected 64-character SHA-256 digest.</param>\n    /// <returns>The normalized digest computed from the current file bytes.</returns>\n    public static string VerifySha256(\n        string modelPath,\n        string expectedSha256)\n    {\n        ArgumentException.ThrowIfNullOrWhiteSpace(modelPath);\n        ArgumentException.ThrowIfNullOrWhiteSpace(expectedSha256);\n\n        string actualSha256 = GgufHashingReader.ComputeSha256(modelPath);\n\n        if (!string.Equals(\n                actualSha256,\n                expectedSha256,\n                StringComparison.OrdinalIgnoreCase))\n        {\n            throw new InvalidDataException(\n                \"The GGUF artifact hash does not match the trusted identity.\");\n        }\n\n        return actualSha256;\n    }\n}"
        },
        {
          "title": "Read a mapped float32 tensor",
          "description": "Keep the mapped file alive while reading a tensor view and copy values only when the caller needs an owned array.",
          "filename": "MappedTensorExample.cs",
          "code": "using System.Linq;\nusing UAIX.LmRuntime.Gguf;\nusing UAIX.LmRuntime.Tensors;\n\nGgufModel model = GgufReader.Read(\n    \"models/model.gguf\",\n    new GgufParseOptions());\n\nGgufTensorDescriptor descriptor = model.Tensors\n    .First(tensor => tensor.GgmlType == GgmlTensorType.F32);\n\nusing var mapped = new MappedGgufFile(model);\n\nMappedTensorView view = mapped.CreateTensorView(\n    descriptor,\n    descriptor.Dimensions);\n\nvar reader = new Float32TensorReader(view);\nfloat firstValue = reader.ReadElement(0);\nfloat[] ownedValues = reader.ToArray();\n\nConsole.WriteLine($\"First value: {firstValue}\");\nConsole.WriteLine($\"Copied values: {ownedValues.Length}\");"
        }
      ],
      "faq": [
        {
          "q": "What is the difference between Read and TryRead?",
          "a": "Read is the direct parse path and can throw for invalid input. TryRead returns a GgufParseResult containing success state, diagnostics, and the parsed model when available."
        },
        {
          "q": "Does strict validation prove that the model can run?",
          "a": "No. It proves the checked container invariants. Tokenizer compatibility, architecture support, required tensors, storage layouts, and execution still need their own validation gates."
        },
        {
          "q": "Can I hold a tensor span after disposing MappedGgufFile?",
          "a": "No. Treat every span, memory segment, and mapped tensor view as borrowing the mapping lifetime."
        },
        {
          "q": "Does this package download models?",
          "a": "No. The caller supplies local paths and owns model acquisition, licensing review, and trust policy."
        }
      ],
      "required_for": "GGUF inspection and validation",
      "group": "Model pipeline"
    },
    "sampling": {
      "route": "package-sampling",
      "id": "UAIX.LmRuntime.Sampling",
      "name": "Sampling",
      "tagline": "Greedy and probability sampling, logit transforms, deterministic random state, top-k/top-p filtering, stop matching, and generation control.",
      "description": "Samplers and logits processors for pure C# local LLM runtime generation.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Sampling",
      "dependencies": [
        "UAIX.LmRuntime.Abstractions"
      ],
      "audience": "Generation loops, model runtimes, and tests that need explicit token selection and termination behavior.",
      "install_when": [
        "You need deterministic greedy argmax selection.",
        "You need seeded temperature, top-k, top-p, minimum-p, repetition, frequency, presence, or logit-bias processing.",
        "You need byte-safe stop-sequence matching or a bounded generation controller."
      ],
      "not_for": [
        "Computing logits or decoding model weights.",
        "Treating a seed as sufficient reproducibility when model execution, tokenizer behavior, or hardware math differs."
      ],
      "key_types": [
        "GreedySampler",
        "ProbabilitySampler",
        "SamplingOptions",
        "SamplingState",
        "LogitProcessor",
        "TopKSelector",
        "StopSequenceMatcher",
        "GenerationController",
        "Xoshiro256StarStar"
      ],
      "concepts": [
        {
          "title": "Selection is separate from logits",
          "text": "The package receives a vocabulary-sized logit span. Model execution remains outside the sampling layer."
        },
        {
          "title": "State is explicit",
          "text": "SamplingState owns deterministic random state and token-frequency history, so callers control whether state persists across generation steps."
        },
        {
          "title": "Stops are byte-aware",
          "text": "StopSequenceMatcher retains partial UTF-8 byte prefixes across appended chunks and distinguishes visible output from matched stop bytes."
        }
      ],
      "examples": [
        {
          "title": "Select the greedy token",
          "description": "Choose the highest logit without allocating a probability distribution.",
          "filename": "GreedySamplingExample.cs",
          "code": "using UAIX.LmRuntime.Sampling;\n\nReadOnlySpan<float> logits = [-1.2f, 0.5f, 2.75f, 1.1f];\nint tokenId = GreedySampler.Select(logits);\n\nConsole.WriteLine($\"Selected token: {tokenId}\");"
        },
        {
          "title": "Run reproducible probability sampling",
          "description": "Create SamplingState once per generation sequence so random and repetition state advance together.",
          "filename": "ProbabilitySamplingExample.cs",
          "code": "using UAIX.LmRuntime.Sampling;\n\npublic static class ReproducibleSamplingExample\n{\n    /// <summary>\n    /// Selects one token from logits using an explicitly seeded sampling state.\n    /// </summary>\n    /// <param name=\"logits\">The vocabulary logits for the current decoding step.</param>\n    /// <returns>The selected token and candidate evidence.</returns>\n    public static SamplingDecision Select(ReadOnlySpan<float> logits)\n    {\n        if (logits.IsEmpty)\n        {\n            throw new ArgumentException(\"At least one logit is required.\", nameof(logits));\n        }\n\n        var options = new SamplingOptions\n        {\n            Temperature = 0.8f,\n            TopK = Math.Min(40, logits.Length),\n            TopP = 0.95f,\n            MinimumP = 0.05f,\n            RepetitionPenalty = 1.1f,\n            FrequencyPenalty = 0,\n            PresencePenalty = 0,\n            MinimumGeneratedTokens = 0,\n            MaximumGeneratedTokens = 128,\n            MaximumContextTokens = 8_192,\n            Seed = 42\n        };\n\n        options.Validate(vocabularySize: logits.Length);\n\n        var state = new SamplingState(options);\n        SamplingDecision decision =\n            ProbabilitySampler.Select(logits, options, state);\n\n        state.RecordToken(decision.TokenId);\n        return decision;\n    }\n}",
          "notes": "Reproducibility also requires the same logits, tokenizer, settings, token history, and runtime math path."
        },
        {
          "title": "Apply logit bias and history penalties",
          "description": "Inspect the transformed logits independently from token selection.",
          "filename": "LogitProcessingExample.cs",
          "code": "using UAIX.LmRuntime.Sampling;\n\npublic static class LogitProcessingExample\n{\n    /// <summary>\n    /// Applies caller-selected bias and history penalties to one logit vector.\n    /// </summary>\n    /// <param name=\"logits\">The unmodified vocabulary logits.</param>\n    /// <returns>A processed copy suitable for token selection.</returns>\n    public static float[] Process(ReadOnlySpan<float> logits)\n    {\n        if (logits.Length < 2)\n        {\n            throw new ArgumentException(\n                \"At least two logits are required for this example.\",\n                nameof(logits));\n        }\n\n        var options = new SamplingOptions\n        {\n            Temperature = 1.0f,\n            RepetitionPenalty = 1.15f,\n            FrequencyPenalty = 0.2f,\n            PresencePenalty = 0.1f,\n            LogitBias = new Dictionary<int, float>\n            {\n                [0] = -2.0f,\n                [1] = 1.5f\n            },\n            Seed = 7\n        };\n\n        options.Validate(vocabularySize: logits.Length);\n\n        var state = new SamplingState(options);\n        state.RecordToken(0);\n        state.RecordToken(0);\n\n        return LogitProcessor.Process(logits, options, state);\n    }\n}"
        },
        {
          "title": "Match a stop sequence across byte chunks",
          "description": "Detect a stop string even when its UTF-8 bytes span multiple generated tokens.",
          "filename": "StopSequenceExample.cs",
          "code": "using System.Text;\nusing UAIX.LmRuntime.Sampling;\n\nvar matcher = new StopSequenceMatcher(\n    stopSequences: [\"</answer>\"],\n    includeMatchedBytes: false);\n\nStopSequenceMatchResult first = matcher.Append(\n    Encoding.UTF8.GetBytes(\"Result</ans\"));\n\nStopSequenceMatchResult second = matcher.Append(\n    Encoding.UTF8.GetBytes(\"wer>ignored\"));\n\nbyte[] remaining = matcher.Complete();\n\nConsole.Write(Encoding.UTF8.GetString(first.VisibleBytes));\nConsole.Write(Encoding.UTF8.GetString(second.VisibleBytes));\nConsole.Write(Encoding.UTF8.GetString(remaining));"
        }
      ],
      "faq": [
        {
          "q": "When should I use GreedySampler?",
          "a": "Use it for deterministic argmax generation, parity tests, and local facade paths that intentionally do not expose probabilistic settings."
        },
        {
          "q": "Where should SamplingState live?",
          "a": "Create one state per generation sequence and retain it across steps. Starting a new state resets random progression and token-frequency history."
        },
        {
          "q": "Does a fixed seed guarantee identical text everywhere?",
          "a": "No. It controls the package random generator. Identical output still depends on identical logits, processing settings, token history, tokenizer, floating-point behavior, and model execution."
        },
        {
          "q": "Why match stop sequences as bytes?",
          "a": "Generated tokens may split one Unicode scalar or stop string across token boundaries. Byte-level retention avoids decoding incomplete fragments prematurely."
        }
      ],
      "required_for": "token selection and stop handling",
      "group": "Model pipeline"
    },
    "cpu-kernels": {
      "route": "package-cpu-kernels",
      "id": "UAIX.LmRuntime.Kernels.Cpu",
      "name": "CPU Kernels",
      "tagline": "Reference, portable-vector, AVX2-aware, half-precision, and quantized CPU kernels with explicit dispatch and parity checks.",
      "description": "Scalar, Vector<T>, and intrinsic-ready CPU kernels for pure C# local LLM runtime inference.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Kernels.Cpu",
      "dependencies": [
        "UAIX.LmRuntime.Tensors"
      ],
      "audience": "Model executors, kernel developers, and test suites that need managed CPU math over float and quantized tensor storage.",
      "install_when": [
        "You need scalar correctness kernels or CPU dispatch for dot, matrix-vector, RMS normalization, softmax, and RoPE primitives.",
        "You need Q4, Q5, Q6, Q8, or K-quantized dequantization/dot/matrix behavior represented by this package.",
        "You need parity comparisons between reference and selected CPU paths."
      ],
      "not_for": [
        "GPU execution or native acceleration.",
        "Assuming a requested ISA tier is selected; inspect CpuKernelSelection for the actual tier and reason."
      ],
      "key_types": [
        "CpuKernelDispatcher",
        "CpuKernelTier",
        "CpuKernelSelection",
        "ReferenceCpuKernels",
        "QuantizedCpuKernels",
        "KQuantizedCpuKernels",
        "Scalar16CpuKernels",
        "QuantizedKernelParityRunner",
        "ReferenceMatrixRowDispatcher"
      ],
      "concepts": [
        {
          "title": "Reference first",
          "text": "Scalar/reference operations remain the correctness anchor. Optimized paths should be admitted through parity evidence rather than labels alone."
        },
        {
          "title": "Dispatch is observable",
          "text": "CpuKernelDispatcher reports requested tier, selected tier, operation, and rationale so fallback behavior is not hidden."
        },
        {
          "title": "Storage-specific validation",
          "text": "Quantized kernels depend on exact block layout, row shape, byte length, alignment, and destination bounds supplied by tensor/storage metadata."
        }
      ],
      "examples": [
        {
          "title": "Dispatch a float32 dot product",
          "description": "Request the best available implemented tier and retain the actual selection evidence.",
          "filename": "CpuDotExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nReadOnlySpan<float> left = [1.0f, 2.0f, 3.0f, 4.0f];\nReadOnlySpan<float> right = [0.5f, 0.25f, -1.0f, 2.0f];\n\nfloat value = CpuKernelDispatcher.DotFloat32(\n    left,\n    right,\n    CpuKernelTier.Auto,\n    out CpuKernelSelection selection);\n\nConsole.WriteLine($\"Value: {value}\");\nConsole.WriteLine(\n    $\"{selection.RequestedTier} -> {selection.SelectedTier}: {selection.Reason}\");"
        },
        {
          "title": "Run a managed matrix-vector operation",
          "description": "Provide row-major matrix values, explicit dimensions, and caller-owned output storage.",
          "filename": "CpuMatVecExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nconst int rowCount = 2;\nconst int columnCount = 3;\n\nReadOnlySpan<float> matrix =\n[\n    1.0f, 2.0f, 3.0f,\n    4.0f, 5.0f, 6.0f\n];\n\nReadOnlySpan<float> vector = [0.5f, 1.0f, -0.5f];\nSpan<float> output = stackalloc float[rowCount];\n\nCpuKernelDispatcher.MatVecFloat32(\n    matrix,\n    rowCount,\n    columnCount,\n    vector,\n    output,\n    CpuKernelTier.Auto,\n    out CpuKernelSelection selection);\n\nConsole.WriteLine(\n    $\"{selection.RequestedTier} -> {selection.SelectedTier}: {selection.Reason}\");"
        },
        {
          "title": "Compare an optimized result with the reference",
          "description": "Make tolerance and maximum deviation visible in a kernel-admission test.",
          "filename": "KernelParityExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nReadOnlySpan<float> reference = [1.0f, 2.0f, 3.0f];\nReadOnlySpan<float> actual = [1.0f, 2.000001f, 2.999999f];\n\nQuantizedKernelParityReport report =\n    QuantizedKernelParityRunner.CompareAgainstReference(\n        reference,\n        actual,\n        tolerance: 1e-5f);\n\nif (!report.Passed)\n{\n    throw new InvalidOperationException(\n        $\"Kernel parity failed: max error {report.MaxAbsoluteError}\");\n}"
        },
        {
          "title": "Dispatch a quantized matrix row layout",
          "description": "Use the GGML tensor type and explicit dimensions to select the matching managed row implementation.",
          "filename": "QuantizedMatVecExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\nusing UAIX.LmRuntime.Tensors;\n\npublic static class QuantizedMatrixExample\n{\n    /// <summary>\n    /// Multiplies a Q4_0 matrix by a float activation vector through the reference row dispatcher.\n    /// </summary>\n    /// <param name=\"encodedMatrixBytes\">The complete encoded matrix storage.</param>\n    /// <param name=\"rowCount\">The number of matrix rows.</param>\n    /// <param name=\"columnCount\">The number of matrix columns and activation values.</param>\n    /// <param name=\"activations\">The float activation vector.</param>\n    /// <returns>A newly allocated output vector containing one value per row.</returns>\n    public static float[] MultiplyQ4_0(\n        ReadOnlySpan<byte> encodedMatrixBytes,\n        int rowCount,\n        int columnCount,\n        ReadOnlySpan<float> activations)\n    {\n        var output = new float[rowCount];\n\n        ReferenceMatrixRowDispatcher.MatVec(\n            GgmlTensorType.Q4_0,\n            encodedMatrixBytes,\n            rowCount,\n            columnCount,\n            activations,\n            output);\n\n        return output;\n    }\n}",
          "notes": "The encoded buffer must contain complete blocks for the requested storage type and logical row shape."
        },
        {
          "title": "Apply RMS normalization through explicit tier selection",
          "description": "Normalize a caller-owned activation vector while recording the selected managed kernel tier.",
          "filename": "CpuRmsNormExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nReadOnlySpan<float> input = [1.0f, -2.0f, 0.5f, 4.0f];\nReadOnlySpan<float> weight = [1.0f, 1.0f, 1.0f, 1.0f];\nSpan<float> output = stackalloc float[input.Length];\n\nCpuKernelDispatcher.RmsNorm(\n    input,\n    weight,\n    output,\n    epsilon: 1e-5f,\n    requestedTier: CpuKernelTier.Auto,\n    selection: out CpuKernelSelection selection);\n\nConsole.WriteLine($\"Selected tier: {selection.SelectedTier}\");\nConsole.WriteLine(string.Join(\", \", output.ToArray()));\n",
          "notes": "Auto records the actual managed tier. Force Scalar when building a deterministic comparison baseline."
        },
        {
          "title": "Use reference softmax and RoPE in caller-owned spans",
          "description": "Run two core reference transformations without allocating output arrays inside the kernel calls.",
          "filename": "ReferenceTransformExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nSpan<float> logits = stackalloc float[] { 1.25f, 0.25f, -0.5f, 2.0f };\nReferenceCpuKernels.SoftmaxInPlace(logits);\n\nSpan<float> query = stackalloc float[] { 0.5f, -0.25f, 1.0f, 0.75f };\nReferenceCpuKernels.ApplyRope(query, position: 7, theta: 10_000.0f);\n\nConsole.WriteLine($\"Probability sum: {logits.ToArray().Sum():F6}\");\nConsole.WriteLine(string.Join(\", \", query.ToArray()));\n",
          "notes": "These methods are reference transformations. Validate shape and numerical parity before replacing them with a different execution path."
        },
        {
          "title": "Decode little-endian F16 and BF16 storage",
          "description": "Convert two-byte scalar encodings and reject non-finite model values when the host requires that boundary.",
          "filename": "Scalar16DecodeExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nReadOnlySpan<byte> f16One = [0x00, 0x3C];\nReadOnlySpan<byte> bf16One = [0x80, 0x3F];\n\nfloat fromFloat16 = Scalar16CpuKernels.DecodeFloat16(\n    f16One,\n    bigEndian: false,\n    rejectNonFinite: true);\n\nfloat fromBFloat16 = Scalar16CpuKernels.DecodeBFloat16(\n    bf16One,\n    bigEndian: false,\n    rejectNonFinite: true);\n\nConsole.WriteLine($\"F16={fromFloat16}; BF16={fromBFloat16}\");\n",
          "notes": "GGUF numeric storage is normally little-endian. Pass the byte order explicitly rather than relying on host architecture."
        },
        {
          "title": "Convert a caller-owned F16 buffer to float32",
          "description": "Decode a contiguous F16 sequence into a destination span whose length defines the scalar count.",
          "filename": "Scalar16CopyExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nReadOnlySpan<byte> encoded =\n[\n    0x00, 0x3C, // 1.0\n    0x00, 0x40, // 2.0\n    0x00, 0xC2  // -3.0\n];\nSpan<float> decoded = stackalloc float[3];\n\nScalar16CpuKernels.CopyFloat16(\n    encoded,\n    decoded,\n    bigEndian: false);\n\nConsole.WriteLine(string.Join(\", \", decoded.ToArray()));\n",
          "notes": "The source must contain exactly two bytes for each destination element. The method does not retain either span."
        },
        {
          "title": "Describe quantized row storage before dispatch",
          "description": "Compute the exact byte requirement for a row-major Q4_0 matrix before accepting its encoded bytes.",
          "filename": "MatrixStorageDescriptionExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\nusing UAIX.LmRuntime.Tensors;\n\nReferenceMatrixStorageDescriptor descriptor =\n    ReferenceMatrixRowDispatcher.Describe(\n        GgmlTensorType.Q4_0,\n        rowCount: 12,\n        columnCount: 128);\n\nConsole.WriteLine($\"Type: {descriptor.GgmlType}\");\nConsole.WriteLine($\"Shape: {descriptor.RowCount} x {descriptor.ColumnCount}\");\nConsole.WriteLine($\"Required bytes: {descriptor.RequiredByteCount}\");\n",
          "notes": "Describe validates block divisibility and checked geometry before encoded storage is accepted by MatVec."
        },
        {
          "title": "Inspect exact Q4_K and Q6_K block layouts",
          "description": "Read the audited logical and physical block geometry exposed by the K-quant kernel layer.",
          "filename": "KQuantLayoutExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nKQuantizedBlockLayout q4 = KQuantizedCpuKernels.Q4KLayout;\nKQuantizedBlockLayout q6 = KQuantizedCpuKernels.Q6KLayout;\n\nConsole.WriteLine($\"{q4.Format}: {q4.ElementCount} values / {q4.ByteCount} bytes\");\nConsole.WriteLine(q4.LayoutDescription);\nConsole.WriteLine($\"{q6.Format}: {q6.ElementCount} values / {q6.ByteCount} bytes\");\nConsole.WriteLine(q6.LayoutDescription);\n",
          "notes": "The layout descriptors document storage geometry; they do not validate that an arbitrary byte block contains a compatible model tensor."
        },
        {
          "title": "Force the scalar tier for a comparison baseline",
          "description": "Run the same dot product through Auto and Scalar and compare the selected-tier result.",
          "filename": "KernelTierComparisonExample.cs",
          "code": "using UAIX.LmRuntime.Kernels.Cpu;\n\nReadOnlySpan<float> left = [0.25f, 0.5f, 1.0f, 2.0f, 4.0f];\nReadOnlySpan<float> right = [4.0f, 2.0f, 1.0f, 0.5f, 0.25f];\n\nfloat automatic = CpuKernelDispatcher.DotFloat32(\n    left,\n    right,\n    CpuKernelTier.Auto,\n    out CpuKernelSelection automaticSelection);\n\nfloat scalar = CpuKernelDispatcher.DotFloat32(\n    left,\n    right,\n    CpuKernelTier.Scalar,\n    out CpuKernelSelection scalarSelection);\n\nConsole.WriteLine($\"Auto: {automaticSelection.SelectedTier} => {automatic}\");\nConsole.WriteLine($\"Scalar: {scalarSelection.SelectedTier} => {scalar}\");\nConsole.WriteLine($\"Absolute difference: {MathF.Abs(automatic - scalar)}\");\n",
          "notes": "A matching result in one toy input is not a complete parity claim. Use representative fixtures and declared tolerances."
        }
      ],
      "faq": [
        {
          "q": "Does Auto always mean AVX2?",
          "a": "No. Auto selects the highest implemented and supported tier for that operation. Inspect CpuKernelSelection rather than inferring the result from the host CPU."
        },
        {
          "q": "Are all GGML storage types executable?",
          "a": "No. Representation in Tensors and execution in Kernels.Cpu are separate evidence levels. Check the specific dispatcher, dequantizer, or matrix source used by your model path."
        },
        {
          "q": "Can I pass overlapping input and output spans?",
          "a": "Do not assume overlap is supported unless the member documentation explicitly permits it. Use separate caller-owned output storage for matrix and normalization operations."
        },
        {
          "q": "Does this package use native libraries?",
          "a": "The package describes pure managed CPU kernels. It does not provide GPU or native inference backends."
        }
      ],
      "required_for": "managed CPU math",
      "group": "Model pipeline"
    },
    "tokenization": {
      "route": "package-tokenization",
      "id": "UAIX.LmRuntime.Tokenization",
      "name": "Tokenization",
      "tagline": "GGUF tokenizer metadata, tokenizer factories and engines, special-token handling, chat templates, truncation, safety, and parity tools.",
      "description": "Tokenizer implementations and chat template rendering for pure C# local LLM runtime packages.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Tokenization",
      "dependencies": [
        "UAIX.LmRuntime.Abstractions",
        "UAIX.LmRuntime.Gguf"
      ],
      "audience": "Applications and model loaders that need model-coupled encoding/decoding, chat rendering, token budgets, or tokenizer verification.",
      "install_when": [
        "Creating a tokenizer from GGUF metadata.",
        "Using SentencePiece BPE, GPT-2 BPE, RWKV-world, metadata-driven, or tokenizer.json adapter surfaces.",
        "Controlling special tokens, streaming UTF-8 decode, chat-template validation, token-budget truncation, or golden-corpus parity."
      ],
      "not_for": [
        "Assuming one tokenizer is interchangeable across model artifacts.",
        "Executing arbitrary general-purpose Jinja templates or silently repairing unsupported metadata."
      ],
      "key_types": [
        "GgufTokenizerFactory",
        "GgufTokenizerMetadataReader",
        "MetadataDrivenGgufTokenizer",
        "IGgufTokenizer",
        "TokenizationOptions",
        "DetokenizationOptions",
        "ChatTemplateRenderer",
        "ChatTemplateConformanceSuite",
        "TokenBudgetTruncator",
        "StreamingUtf8TokenDecoder",
        "TokenizerGoldenCorpus"
      ],
      "concepts": [
        {
          "title": "Tokenizer follows the artifact",
          "text": "Read and validate tokenizer metadata from the same GGUF model that supplies the weights. Vocabulary, merges, special IDs, pre-tokenizer, and chat-template behavior are model-coupled."
        },
        {
          "title": "Configuration is explicit",
          "text": "Adding BOS/EOS tokens, parsing special tokens, removing or unparsing special tokens, whitespace cleanup, traces, and invalid UTF-16 policy are caller-visible choices."
        },
        {
          "title": "Parity is testable",
          "text": "Golden records, fingerprints, reconciliation, and parity reports let maintainers compare behavior without treating a single successful prompt as broad compatibility proof."
        }
      ],
      "examples": [
        {
          "title": "Create a strict tokenizer from GGUF metadata",
          "description": "Use the common ITokenizer contract for straightforward encoding and decoding.",
          "filename": "TokenizerFactoryExample.cs",
          "code": "using UAIX.LmRuntime.Abstractions;\nusing UAIX.LmRuntime.Gguf;\nusing UAIX.LmRuntime.Tokenization;\n\nGgufModel model = GgufReader.Read(\n    \"models/model.gguf\",\n    new GgufParseOptions());\n\nITokenizer tokenizer =\n    new GgufTokenizerFactory().CreateStrict(model);\n\nIReadOnlyList<int> tokenIds = tokenizer.Encode(\n    \"Hello, local runtime.\",\n    addBos: true,\n    addEos: false);\n\nstring roundTrip = tokenizer.Decode(tokenIds);\n\nConsole.WriteLine(string.Join(\", \", tokenIds));\nConsole.WriteLine(roundTrip);"
        },
        {
          "title": "Request a detailed tokenization trace",
          "description": "Use the GGUF-specific interface when the caller needs explicit special-token and trace controls.",
          "filename": "DetailedTokenizationExample.cs",
          "code": "using UAIX.LmRuntime.Gguf;\nusing UAIX.LmRuntime.Tokenization;\n\nGgufModel model = GgufModel.Load(\n    \"models/model.gguf\",\n    new GgufParseOptions());\n\nGgufTokenizerMetadata metadata =\n    GgufTokenizerMetadataReader.ReadStrict(model);\n\nvar tokenizer = new MetadataDrivenGgufTokenizer(metadata);\n\nTokenizationResult result = tokenizer.Encode(\n    \"Explain token boundaries.\",\n    new TokenizationOptions\n    {\n        AddSpecialTokens = true,\n        ParseSpecialTokens = false,\n        EmitTrace = true,\n        InvalidUtf16Policy = InvalidUtf16Policy.Reject\n    });\n\nforeach (string traceLine in result.Trace)\n{\n    Console.WriteLine(traceLine);\n}\n\nstring decoded = tokenizer.Decode(\n    result.TokenIds,\n    new DetokenizationOptions\n    {\n        RemoveSpecialTokens = true,\n        UnparseSpecialTokens = false,\n        CleanSpaces = false\n    });"
        },
        {
          "title": "Render a bounded chat transcript",
          "description": "Use the deterministic role/content renderer when a general Jinja interpreter is neither required nor desired.",
          "filename": "ChatTemplateExample.cs",
          "code": "using UAIX.LmRuntime.Contracts;\nusing UAIX.LmRuntime.Tokenization;\n\nvar messages = new[]\n{\n    LlmMessage.System(\"Answer in one paragraph.\"),\n    LlmMessage.User(\"What is a tensor?\")\n};\n\nstring prompt = new ChatTemplateRenderer().Render(messages);\nConsole.WriteLine(prompt);"
        },
        {
          "title": "Truncate chat history to a token budget",
          "description": "Delegate counting to the active model tokenizer rather than using character length as a proxy.",
          "filename": "TokenBudgetExample.cs",
          "code": "using UAIX.LmRuntime.Abstractions;\nusing UAIX.LmRuntime.Contracts;\nusing UAIX.LmRuntime.Tokenization;\n\n/// <summary>\n/// Truncates an ordered transcript to the supplied tokenizer budget.\n/// </summary>\n/// <param name=\"messages\">The messages to fit into the budget.</param>\n/// <param name=\"tokenizer\">The tokenizer for the target model.</param>\n/// <param name=\"maximumTokens\">The maximum accepted token count.</param>\n/// <returns>The retained ordered message sequence.</returns>\nstatic IReadOnlyList<LlmMessage> FitTranscript(\n    IReadOnlyList<LlmMessage> messages,\n    ITokenizer tokenizer,\n    int maximumTokens)\n{\n    return new TokenBudgetTruncator().TruncateMessages(\n        messages,\n        tokenizer,\n        maximumTokens);\n}"
        },
        {
          "title": "Decode partial UTF-8 token bytes safely",
          "description": "Keep incomplete UTF-8 sequences buffered across token boundaries and flush at stream completion.",
          "filename": "StreamingDecodeExample.cs",
          "code": "using UAIX.LmRuntime.Tokenization;\n\nvar decoder = new StreamingUtf8TokenDecoder();\n\nstring first = decoder.Decode([0xE2, 0x82], flush: false);\nstring second = decoder.Decode([0xAC], flush: false);\nstring final = decoder.Decode(ReadOnlySpan<byte>.Empty, flush: true);\n\nConsole.Write(first);\nConsole.Write(second);\nConsole.Write(final);"
        }
      ],
      "faq": [
        {
          "q": "Should I use Create or CreateStrict?",
          "a": "Use the strict path when unsupported or inconsistent metadata must fail closed. Use the non-strict path only when its fallback behavior is understood and covered by your own compatibility tests."
        },
        {
          "q": "Why can encoding and detokenization settings change output?",
          "a": "Special-token insertion, parsing, removal, whitespace cleanup, and invalid-text policy are part of the tokenizer contract. Set them explicitly at application boundaries."
        },
        {
          "q": "Does ChatTemplateRenderer execute arbitrary templates from a model?",
          "a": "No. It is a small deterministic role/content renderer. Use the conformance surface to evaluate supported template behavior, and do not imply general Jinja compatibility."
        },
        {
          "q": "Can I reuse one tokenizer for another model with a similar name?",
          "a": "Do not assume so. Validate vocabulary, merges, special token IDs, pre-tokenizer behavior, and fingerprints against the actual artifact."
        }
      ],
      "required_for": "tokenizer and chat-template work",
      "group": "Model pipeline"
    },
    "models-llama": {
      "route": "package-models-llama",
      "id": "UAIX.LmRuntime.Models.Llama",
      "name": "Models.Llama",
      "tagline": "LLaMA-family configuration, tensor binding, mapped weight sources, reference forward execution, sessions, KV cache, generation, persistence, and parity evidence.",
      "description": "LLaMA-family graph configuration and reference forward-pass primitives for pure C# local LLM runtime inference.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Models.Llama",
      "dependencies": [
        "UAIX.LmRuntime.Abstractions",
        "UAIX.LmRuntime.Gguf",
        "UAIX.LmRuntime.Kernels.Cpu",
        "UAIX.LmRuntime.Sampling",
        "UAIX.LmRuntime.Tensors",
        "UAIX.LmRuntime.Tokenization"
      ],
      "audience": "Runtime developers and advanced applications that need direct control of the LLaMA-family model graph and deterministic reference sessions.",
      "install_when": [
        "You need to derive LLaMA configuration from GGUF metadata and validate architecture invariants.",
        "You need required-tensor registries, binding manifests, mapped/array weight sources, reference forward operations, or storage parity.",
        "You need direct mapped/reference sessions, deterministic greedy generation, KV-cache state, or session artifacts."
      ],
      "not_for": [
        "Claiming every LLaMA-named or derivative architecture is supported without model-specific validation.",
        "Using a lower-level session when the LocalEndpoint facade already provides the bounded application behavior you need."
      ],
      "key_types": [
        "LlamaModelConfig",
        "LlamaMappedModelLoader",
        "LlamaMappedModel",
        "LlamaMappedReferenceSession",
        "LlamaReferenceSession",
        "LlamaTensorBinder",
        "TensorBindingManifest",
        "ReferenceKvCache",
        "ReferenceKvCacheSerializer",
        "LlamaSessionArtifactSerializer",
        "LlamaStorageParityRunner",
        "RealModelSmokeRunner"
      ],
      "concepts": [
        {
          "title": "Architecture validation",
          "text": "LlamaModelConfig derives graph dimensions from GGUF metadata and validates head counts, dimensions, context, vocabulary, RoPE, and normalization requirements before execution."
        },
        {
          "title": "Binding is evidence",
          "text": "Required tensor roles, storage kinds, ownership, diagnostics, and manifests make missing, duplicate, incompatible, or unexpectedly materialized weights observable."
        },
        {
          "title": "Session state is explicit",
          "text": "Reference sessions own position, logits, and KV-cache state. Callers choose reset behavior and can capture, serialize, fingerprint, restore, or discard state under bounded policies."
        }
      ],
      "examples": [
        {
          "title": "Validate LLaMA configuration from GGUF",
          "description": "Separate container parsing from architecture-specific configuration checks.",
          "filename": "LlamaConfigurationExample.cs",
          "code": "using UAIX.LmRuntime.Gguf;\nusing UAIX.LmRuntime.Models.Llama;\n\nGgufModel gguf = GgufReader.Read(\n    \"models/model.gguf\",\n    new GgufParseOptions());\n\nLlamaModelConfig configuration =\n    LlamaModelConfig.FromGguf(gguf);\n\nconfiguration.Validate();\n\nConsole.WriteLine($\"Model: {configuration.ModelName}\");\nConsole.WriteLine($\"Layers: {configuration.BlockCount}\");\nConsole.WriteLine($\"Embedding: {configuration.EmbeddingLength}\");\nConsole.WriteLine($\"Heads: {configuration.AttentionHeadCount}\");\nConsole.WriteLine($\"KV heads: {configuration.AttentionKeyValueHeadCount}\");\nConsole.WriteLine($\"Context: {configuration.ContextLength}\");"
        },
        {
          "title": "Load a mapped model and decode one greedy token",
          "description": "Use direct mapped execution for diagnostics, model validation, and deterministic one-token evidence.",
          "filename": "MappedOneTokenExample.cs",
          "code": "using UAIX.LmRuntime.Models.Llama;\n\nvar loader = new LlamaMappedModelLoader();\n\nusing LlamaMappedModel model = loader.Load(\n    \"models/model.gguf\",\n    new LlamaMappedModelLoadOptions\n    {\n        RuntimeMode = LlamaRuntimeMode.DeterministicParity,\n        ComputeModelSha256 = true\n    });\n\nusing LlamaMappedReferenceSession session =\n    model.CreateReferenceSession();\n\nLlamaMappedGreedyTokenResult result =\n    session.DecodeOneGreedy(\n        \"Hello\",\n        new LlamaOneTokenOptions\n        {\n            ResetSession = true,\n            ParseSpecialTokens = false,\n            AddSpecialTokens = true,\n            EmitTokenizerTrace = false\n        });\n\nConsole.WriteLine($\"{result.TokenId}: {result.TokenText}\");\nConsole.WriteLine($\"Selected logit: {result.SelectedLogit}\");\nConsole.WriteLine($\"Position: {result.Position}\");"
        },
        {
          "title": "Generate several greedy tokens with caller-owned buffers",
          "description": "Bound output allocation and observe each committed token.",
          "filename": "MappedGenerationExample.cs",
          "code": "using UAIX.LmRuntime.Models.Llama;\nusing UAIX.LmRuntime.Tokenization;\n\npublic static class MappedGenerationExample\n{\n    /// <summary>\n    /// Generates greedy tokens into caller-owned buffers and observes each committed selection.\n    /// </summary>\n    /// <param name=\"model\">The loaded mapped model that defines vocabulary capacity.</param>\n    /// <param name=\"session\">The isolated mapped reference session.</param>\n    /// <param name=\"prompt\">The prompt to tokenize and prefill.</param>\n    /// <param name=\"maximumTokens\">The maximum number of output tokens.</param>\n    /// <param name=\"cancellationToken\">A token observed between committed model steps.</param>\n    /// <returns>The bounded greedy-generation result.</returns>\n    public static LlamaGreedyGenerationResult Generate(\n        LlamaMappedModel model,\n        LlamaMappedReferenceSession session,\n        string prompt,\n        int maximumTokens,\n        CancellationToken cancellationToken)\n    {\n        ArgumentNullException.ThrowIfNull(model);\n        ArgumentNullException.ThrowIfNull(session);\n        ArgumentException.ThrowIfNullOrWhiteSpace(prompt);\n        ArgumentOutOfRangeException.ThrowIfNegativeOrZero(maximumTokens);\n\n        int[] generatedTokenIds = new int[maximumTokens];\n        float[] finalLogits = new float[model.Configuration.VocabularySize];\n\n        return session.GenerateGreedy(\n            prompt,\n            generatedTokenIds,\n            finalLogits,\n            new LlamaGreedyGenerationOptions\n            {\n                MaximumTokens = maximumTokens,\n                ResetSession = true,\n                EndOfSequenceTokenId = null,\n                StopTokenIds = Array.Empty<int>()\n            },\n            new TokenizationOptions\n            {\n                AddSpecialTokens = true,\n                ParseSpecialTokens = false\n            },\n            token => Console.WriteLine(\n                $\"{token.Sequence}: {token.TokenId} ({token.SelectedLogit})\"),\n            cancellationToken);\n    }\n}"
        },
        {
          "title": "Use the deterministic in-memory fixture",
          "description": "Exercise reference execution without depending on an external model artifact.",
          "filename": "ReferenceFixtureExample.cs",
          "code": "using UAIX.LmRuntime.Models.Llama;\n\nLlamaReferenceFixture fixture =\n    LlamaReferenceFixtureFactory.CreateDeterministic();\n\nLlamaReferenceSession session = fixture.CreateSession();\n\nLlamaGreedyTokenResult result = session.DecodeOneGreedy(\n    fixture.PromptTokenIds,\n    resetSession: true);\n\nConsole.WriteLine($\"Token: {result.TokenId}\");\nConsole.WriteLine($\"Position: {result.Position}\");"
        },
        {
          "title": "Save and restore session state",
          "description": "Bind persisted state to model, configuration, tokenizer, and cache-layout fingerprints, and enforce a maximum artifact size.",
          "filename": "SessionPersistenceExample.cs",
          "code": "using UAIX.LmRuntime.Models.Llama;\n\npublic static class SessionPersistenceExample\n{\n    /// <summary>\n    /// Saves a mapped reference session and immediately reloads the authenticated artifact.\n    /// </summary>\n    /// <param name=\"model\">The mapped model that supplies model identity evidence.</param>\n    /// <param name=\"session\">The session whose deterministic state will be persisted.</param>\n    /// <param name=\"statePath\">The destination path for the session artifact.</param>\n    /// <param name=\"configurationFingerprint\">The host-computed configuration fingerprint.</param>\n    /// <param name=\"tokenizerFingerprint\">The host-computed tokenizer fingerprint.</param>\n    /// <param name=\"cacheLayoutFingerprint\">The host-computed cache-layout fingerprint.</param>\n    /// <returns>The authenticated artifact loaded from disk.</returns>\n    public static LlamaSessionArtifact SaveAndReload(\n        LlamaMappedModel model,\n        LlamaMappedReferenceSession session,\n        string statePath,\n        string configurationFingerprint,\n        string tokenizerFingerprint,\n        string cacheLayoutFingerprint)\n    {\n        ArgumentNullException.ThrowIfNull(model);\n        ArgumentNullException.ThrowIfNull(session);\n        ArgumentException.ThrowIfNullOrWhiteSpace(statePath);\n\n        string? directory = Path.GetDirectoryName(\n            Path.GetFullPath(statePath));\n\n        if (!string.IsNullOrEmpty(directory))\n        {\n            Directory.CreateDirectory(directory);\n        }\n\n        var persistence = new LlamaSessionPersistenceOptions\n        {\n            ModelSha256 = model.Manifest.ModelSha256,\n            ConfigurationFingerprint = configurationFingerprint,\n            TokenizerFingerprint = tokenizerFingerprint,\n            CacheLayoutFingerprint = cacheLayoutFingerprint,\n            SamplerMode = \"greedy\",\n            GeneratedUtc = DateTimeOffset.UtcNow,\n            ClaimStatus = \"local-evidence\",\n            MaximumByteCount = 64 * 1024 * 1024\n        };\n\n        session.SaveState(statePath, persistence);\n\n        return session.LoadState(\n            statePath,\n            maximumByteCount: persistence.MaximumByteCount);\n    }\n}",
          "notes": "The caller supplies and validates compatibility fingerprints; persisted state should be treated as model-bound untrusted input."
        }
      ],
      "faq": [
        {
          "q": "What is the difference between mapped and materialized reference sessions?",
          "a": "A mapped session reads supported weight storage through mapped sources. A materialized session copies compatible weights into managed reference structures. The manifest and materialization records expose the chosen ownership and copied-byte behavior."
        },
        {
          "q": "Does LlamaModelConfig.FromGguf prove the model will execute?",
          "a": "No. Configuration validation is one gate. Required tensors, storage support, tokenizer compatibility, binding, context limits, and a real execution stage must also pass."
        },
        {
          "q": "Can session artifacts be restored with a different model?",
          "a": "They should not be. Use the model hash and configuration, tokenizer, and cache-layout fingerprints as strict compatibility checks, plus a bounded size and trusted path policy."
        },
        {
          "q": "Is the reference path intended as a benchmark leader?",
          "a": "No. It is a legible correctness and parity anchor. Performance claims require retained measurements for the exact model, hardware, settings, and code path."
        }
      ],
      "required_for": "LLaMA graph/session internals",
      "group": "Model pipeline"
    },
    "backends-cpu-managed": {
      "route": "package-backends-cpu-managed",
      "id": "UAIX.LmRuntime.Backends.CpuManaged",
      "name": "Backends.CpuManaged",
      "tagline": "The explicit managed .NET CPU backend registration, capability declaration, device identity, and no-native-asset compatibility lane.",
      "description": "Backends.CpuManaged supplies the backend object used to represent managed CPU execution in the Acceleration registry. Its probe is available when the package is loaded, reports the current process runtime identifier and a stable CPU device ID, and requires no GPU, driver, native inference library, provider API, subprocess, network request, or model download.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Backends.CpuManaged",
      "dependencies": [
        "UAIX.LmRuntime.Acceleration"
      ],
      "audience": "Applications and hosts that require a package-visible managed CPU backend, deterministic CPU selection, or explicit CPU fallback identity.",
      "install_when": [
        "You need a registered CPU backend that probes available without native assets.",
        "You need a stable managed CPU backend ID and device descriptor in diagnostics.",
        "You need PreferGpu to fall back to an explicitly registered CPU lane."
      ],
      "not_for": [
        "It does not expose the tensor kernels directly; use Kernels.Cpu for managed math APIs.",
        "It does not load a GGUF model; use LocalEndpoint or Models.Llama for model execution.",
        "It does not claim GPU or native inference."
      ],
      "key_types": [
        "CpuManagedRuntimeBackend",
        "RuntimeBackendRegistryExtensions"
      ],
      "concepts": [
        {
          "title": "Always-local probe",
          "text": "The backend is available when loaded because it has no GPU, driver, or native-asset dependency."
        },
        {
          "title": "Stable identity",
          "text": "The backend ID is uaix.lmruntime.cpu-managed and the default device ID is cpu:managed:default."
        },
        {
          "title": "Visible fallback",
          "text": "When selected after a GPU-prefer miss, Acceleration reports UsedCpuFallback rather than relabeling the CPU lane."
        }
      ],
      "examples": [
        {
          "title": "Register the managed CPU backend",
          "description": "Add the backend through the fluent registry extension and inspect deterministic order.",
          "filename": "RegisterCpuManagedExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.CpuManaged;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixCpuManagedBackend();\n\nforeach (IRuntimeBackend backend in registry.GetBackends())\n{\n    Console.WriteLine($\"{backend.Id} ({backend.Kind})\");\n}\n",
          "notes": "Calling the registration helper twice on the same registry throws because duplicate backend IDs are rejected."
        },
        {
          "title": "Probe managed CPU availability",
          "description": "Read the backend and device evidence returned by the local probe.",
          "filename": "ProbeCpuManagedExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.CpuManaged;\n\nvar backend = new CpuManagedRuntimeBackend();\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireCpu\n    });\n\nConsole.WriteLine($\"Available: {probe.IsAvailable}\");\nConsole.WriteLine($\"Native assets: {probe.Capabilities.NativeAssetState}\");\nforeach (RuntimeDeviceDescriptor device in probe.Devices)\n{\n    Console.WriteLine($\"{device.DeviceId} / {device.RuntimeIdentifier}\");\n}\n",
          "notes": "The probe reports managed CPU availability only; it does not parse or execute a model."
        },
        {
          "title": "Require the managed CPU backend by stable ID",
          "description": "Use a backend-ID policy when the host must reject every other execution class.",
          "filename": "PinCpuManagedExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.CpuManaged;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixCpuManagedBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireBackendId,\n        PreferredBackendId = CpuManagedRuntimeBackend.BackendId,\n        AllowCpuFallback = false\n    });\n\nif (!result.Succeeded)\n{\n    throw new InvalidOperationException(result.FailureReason);\n}\n",
          "notes": "The stable ID pins backend selection, not a particular operating-system thread or CPU core."
        },
        {
          "title": "Assert the no-native boundary in host startup checks",
          "description": "Fail application startup when package metadata contradicts the expected managed CPU lane.",
          "filename": "AssertCpuBoundaryExample.cs",
          "code": "using UAIX.LmRuntime.Backends.CpuManaged;\n\nvar backend = new CpuManagedRuntimeBackend();\n\nif (backend.Capabilities.UsesNativeInference ||\n    backend.Capabilities.UsesGpuAcceleration ||\n    backend.Capabilities.NativeAssetState != \"not-required\")\n{\n    throw new InvalidOperationException(\n        \"The selected backend does not match the required managed CPU boundary.\");\n}\n",
          "notes": "This checks declared capability metadata. A full host readiness check should also probe and record the returned device identity."
        }
      ],
      "faq": [
        {
          "q": "Is this the same package as Kernels.Cpu?",
          "a": "No. Backends.CpuManaged represents backend selection and diagnostics. Kernels.Cpu exposes managed numerical operations."
        },
        {
          "q": "Does it require a RID-specific native package?",
          "a": "No. NativeAssetState is not-required and the probe has no native dependency."
        },
        {
          "q": "Does it download models?",
          "a": "No. Model acquisition and trust remain host responsibilities."
        }
      ],
      "required_for": "the package-visible managed CPU backend and explicit CPU fallback identity",
      "group": "Backend registrations"
    },
    "backends-cuda": {
      "route": "package-backends-cuda",
      "id": "UAIX.LmRuntime.Backends.Cuda",
      "name": "Backends.Cuda",
      "tagline": "CUDA backend registration, declared compatibility metadata, runtime identifiers, and fail-closed local probe diagnostics.",
      "description": "Backends.Cuda registers the UAIX CUDA backend family with Acceleration. It declares the intended CUDA capability surface and package runtime identifiers (win-x64, linux-x64), then probes unavailable until a host-supplied native adapter proves assets, runtime libraries, drivers, and a device. The managed package performs no hidden native inference. Modern Windows and Tesla K80 native-asset slots are published as separate package identities.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Backends.Cuda",
      "dependencies": [
        "UAIX.LmRuntime.Acceleration"
      ],
      "audience": "Hosts building an explicit CUDA compatibility lane, diagnostics UI, deployment validation, or future native adapter integration for NVIDIA GPU hosts.",
      "install_when": [
        "You need the stable CUDA backend ID and registry extension.",
        "You need package-visible CUDA runtime-identifier and capability declarations.",
        "You need fail-closed probe diagnostics before CUDA execution is proven."
      ],
      "not_for": [
        "Installing or registering this package does not prove CUDA execution.",
        "The supplied managed backend does not load native inference libraries or enumerate a real device.",
        "Do not report GPU availability unless a host adapter returns an available probe with concrete device evidence."
      ],
      "key_types": [
        "CudaRuntimeBackend",
        "RuntimeBackendRegistryExtensions"
      ],
      "concepts": [
        {
          "title": "Registration is not execution",
          "text": "AddUaixCudaBackend adds metadata and probe behavior to the registry; it does not start a CUDA engine."
        },
        {
          "title": "Compatibility fails closed",
          "text": "The default diagnostic probe returns unavailable and states which native evidence has not been supplied."
        },
        {
          "title": "Runtime identifiers are declared",
          "text": "The package declares win-x64, linux-x64; the current process and deployment still require independent validation."
        }
      ],
      "examples": [
        {
          "title": "Register the CUDA backend",
          "description": "Add the package-visible CUDA backend to an explicit Acceleration registry.",
          "filename": "RegisterCUDAExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Cuda;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixCudaBackend();\n\nIRuntimeBackend backend = registry.FindById(CudaRuntimeBackend.BackendId)\n    ?? throw new InvalidOperationException(\"CUDA backend registration failed.\");\n\nConsole.WriteLine($\"{backend.Id} / {backend.Kind}\");\n",
          "notes": "Registration exposes CUDA diagnostics. It does not prove a native runtime or device."
        },
        {
          "title": "Probe the CUDA package boundary",
          "description": "Inspect declared capabilities and the independent availability result.",
          "filename": "ProbeCUDAExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Cuda;\n\nvar backend = new CudaRuntimeBackend();\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(\n    new RuntimeBackendOptions\n    {\n        RequestedRuntimeIdentifier =\n            System.Runtime.InteropServices.RuntimeInformation.RuntimeIdentifier,\n        RequireNativeAssets = true\n    });\n\nConsole.WriteLine($\"Declared API: {probe.Capabilities.BackendApiName}\");\nConsole.WriteLine($\"Native state: {probe.Capabilities.NativeAssetState}\");\nConsole.WriteLine($\"Available: {probe.IsAvailable}\");\nforeach (string diagnostic in probe.Diagnostics)\n{\n    Console.WriteLine(diagnostic);\n}\n",
          "notes": "The default probe is expected to remain unavailable until a production adapter proves CUDA assets, runtime libraries, and a device."
        },
        {
          "title": "Require the CUDA backend by ID",
          "description": "Reject CPU substitution when the workload contract specifically requires CUDA.",
          "filename": "RequireCUDAExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Cuda;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixCudaBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireBackendId,\n        PreferredBackendId = CudaRuntimeBackend.BackendId,\n        AllowCpuFallback = false,\n        RequireNativeAssets = true\n    });\n\nif (!result.Succeeded)\n{\n    Console.Error.WriteLine(result.FailureReason);\n}\n",
          "notes": "RequireBackendId fails instead of relabeling another backend as CUDA."
        },
        {
          "title": "List the CUDA package runtime identifiers",
          "description": "Use declared identifiers as deployment metadata, not as proof that the current machine is ready.",
          "filename": "CUDARuntimeIdentifiersExample.cs",
          "code": "using UAIX.LmRuntime.Backends.Cuda;\n\nvar backend = new CudaRuntimeBackend();\nforeach (string runtimeIdentifier in backend.Capabilities.RuntimeIdentifiers)\n{\n    Console.WriteLine(runtimeIdentifier);\n}\n",
          "notes": "The declared identifiers are win-x64, linux-x64. Probe evidence remains required for a usable CUDA execution path."
        }
      ],
      "faq": [
        {
          "q": "Does this package execute through CUDA today?",
          "a": "The managed registration package reports unavailable until a host-supplied native adapter proves CUDA assets, runtime libraries, drivers, and a device."
        },
        {
          "q": "Why does SupportsGpuExecution read true when IsAvailable is false?",
          "a": "Capabilities describe the backend family. IsAvailable describes the current proven host state."
        },
        {
          "q": "Does registration contact the network?",
          "a": "No. Registration and the supplied diagnostic probe perform no downloads, provider calls, subprocess execution, or remote inference."
        }
      ],
      "required_for": "CUDA backend registration and fail-closed CUDA diagnostics",
      "group": "Backend registrations"
    },
    "backends-directml": {
      "route": "package-backends-directml",
      "id": "UAIX.LmRuntime.Backends.DirectML",
      "name": "Backends.DirectML",
      "tagline": "DirectML backend registration, declared compatibility metadata, runtime identifiers, and fail-closed local probe diagnostics.",
      "description": "Backends.DirectML registers the UAIX DirectML backend family with Acceleration. It declares the intended DirectML capability surface and package runtime identifiers (win-x64), then probes unavailable until a host-supplied native adapter proves assets, runtime libraries, drivers, and a device. The managed package performs no hidden native inference.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Backends.DirectML",
      "dependencies": [
        "UAIX.LmRuntime.Acceleration"
      ],
      "audience": "Hosts building an explicit DirectML compatibility lane, diagnostics UI, deployment validation, or future native adapter integration for Windows GPU hosts.",
      "install_when": [
        "You need the stable DirectML backend ID and registry extension.",
        "You need package-visible DirectML runtime-identifier and capability declarations.",
        "You need fail-closed probe diagnostics before DirectML execution is proven."
      ],
      "not_for": [
        "Installing or registering this package does not prove DirectML execution.",
        "The supplied managed backend does not load native inference libraries or enumerate a real device.",
        "Do not report GPU availability unless a host adapter returns an available probe with concrete device evidence."
      ],
      "key_types": [
        "DirectMlRuntimeBackend",
        "RuntimeBackendRegistryExtensions"
      ],
      "concepts": [
        {
          "title": "Registration is not execution",
          "text": "AddUaixDirectMlBackend adds metadata and probe behavior to the registry; it does not start a DirectML engine."
        },
        {
          "title": "Compatibility fails closed",
          "text": "The default diagnostic probe returns unavailable and states which native evidence has not been supplied."
        },
        {
          "title": "Runtime identifiers are declared",
          "text": "The package declares win-x64; the current process and deployment still require independent validation."
        }
      ],
      "examples": [
        {
          "title": "Register the DirectML backend",
          "description": "Add the package-visible DirectML backend to an explicit Acceleration registry.",
          "filename": "RegisterDirectMlExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.DirectML;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixDirectMlBackend();\n\nIRuntimeBackend backend = registry.FindById(DirectMlRuntimeBackend.BackendId)\n    ?? throw new InvalidOperationException(\"DirectML backend registration failed.\");\n\nConsole.WriteLine($\"{backend.Id} / {backend.Kind}\");\n",
          "notes": "Registration exposes DirectML diagnostics. It does not prove a native runtime or device."
        },
        {
          "title": "Probe the DirectML package boundary",
          "description": "Inspect declared capabilities and the independent availability result.",
          "filename": "ProbeDirectMlExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.DirectML;\n\nvar backend = new DirectMlRuntimeBackend();\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(\n    new RuntimeBackendOptions\n    {\n        RequestedRuntimeIdentifier =\n            System.Runtime.InteropServices.RuntimeInformation.RuntimeIdentifier,\n        RequireNativeAssets = true\n    });\n\nConsole.WriteLine($\"Declared API: {probe.Capabilities.BackendApiName}\");\nConsole.WriteLine($\"Native state: {probe.Capabilities.NativeAssetState}\");\nConsole.WriteLine($\"Available: {probe.IsAvailable}\");\nforeach (string diagnostic in probe.Diagnostics)\n{\n    Console.WriteLine(diagnostic);\n}\n",
          "notes": "The default probe is expected to remain unavailable until a production adapter proves DirectML assets, runtime libraries, and a device."
        },
        {
          "title": "Require the DirectML backend by ID",
          "description": "Reject CPU substitution when the workload contract specifically requires DirectML.",
          "filename": "RequireDirectMlExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.DirectML;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixDirectMlBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireBackendId,\n        PreferredBackendId = DirectMlRuntimeBackend.BackendId,\n        AllowCpuFallback = false,\n        RequireNativeAssets = true\n    });\n\nif (!result.Succeeded)\n{\n    Console.Error.WriteLine(result.FailureReason);\n}\n",
          "notes": "RequireBackendId fails instead of relabeling another backend as DirectML."
        },
        {
          "title": "List the DirectML package runtime identifiers",
          "description": "Use declared identifiers as deployment metadata, not as proof that the current machine is ready.",
          "filename": "DirectMlRuntimeIdentifiersExample.cs",
          "code": "using UAIX.LmRuntime.Backends.DirectML;\n\nvar backend = new DirectMlRuntimeBackend();\nforeach (string runtimeIdentifier in backend.Capabilities.RuntimeIdentifiers)\n{\n    Console.WriteLine(runtimeIdentifier);\n}\n",
          "notes": "The declared identifiers are win-x64. Probe evidence remains required for a usable DirectML execution path."
        }
      ],
      "faq": [
        {
          "q": "Does this package execute through DirectML today?",
          "a": "The managed registration package reports unavailable until a host-supplied native adapter proves DirectML assets, runtime libraries, drivers, and a device."
        },
        {
          "q": "Why does SupportsGpuExecution read true when IsAvailable is false?",
          "a": "Capabilities describe the backend family. IsAvailable describes the current proven host state."
        },
        {
          "q": "Does registration contact the network?",
          "a": "No. Registration and the supplied diagnostic probe perform no downloads, provider calls, subprocess execution, or remote inference."
        }
      ],
      "required_for": "DirectML backend registration and fail-closed DirectML diagnostics",
      "group": "Backend registrations"
    },
    "backends-vulkan": {
      "route": "package-backends-vulkan",
      "id": "UAIX.LmRuntime.Backends.Vulkan",
      "name": "Backends.Vulkan",
      "tagline": "Vulkan backend registration, declared compatibility metadata, runtime identifiers, and fail-closed local probe diagnostics.",
      "description": "Backends.Vulkan registers the UAIX Vulkan backend family with Acceleration. It declares the intended Vulkan capability surface and package runtime identifiers (win-x64, linux-x64), then probes unavailable until a host-supplied native adapter proves assets, runtime libraries, drivers, and a device. The managed package performs no hidden native inference.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Backends.Vulkan",
      "dependencies": [
        "UAIX.LmRuntime.Acceleration"
      ],
      "audience": "Hosts building an explicit Vulkan compatibility lane, diagnostics UI, deployment validation, or future native adapter integration for vendor-diverse Windows and Linux GPU hosts.",
      "install_when": [
        "You need the stable Vulkan backend ID and registry extension.",
        "You need package-visible Vulkan runtime-identifier and capability declarations.",
        "You need fail-closed probe diagnostics before Vulkan execution is proven."
      ],
      "not_for": [
        "Installing or registering this package does not prove Vulkan execution.",
        "The supplied managed backend does not load native inference libraries or enumerate a real device.",
        "Do not report GPU availability unless a host adapter returns an available probe with concrete device evidence."
      ],
      "key_types": [
        "VulkanRuntimeBackend",
        "RuntimeBackendRegistryExtensions"
      ],
      "concepts": [
        {
          "title": "Registration is not execution",
          "text": "AddUaixVulkanBackend adds metadata and probe behavior to the registry; it does not start a Vulkan engine."
        },
        {
          "title": "Compatibility fails closed",
          "text": "The default diagnostic probe returns unavailable and states which native evidence has not been supplied."
        },
        {
          "title": "Runtime identifiers are declared",
          "text": "The package declares win-x64, linux-x64; the current process and deployment still require independent validation."
        }
      ],
      "examples": [
        {
          "title": "Register the Vulkan backend",
          "description": "Add the package-visible Vulkan backend to an explicit Acceleration registry.",
          "filename": "RegisterVulkanExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Vulkan;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixVulkanBackend();\n\nIRuntimeBackend backend = registry.FindById(VulkanRuntimeBackend.BackendId)\n    ?? throw new InvalidOperationException(\"Vulkan backend registration failed.\");\n\nConsole.WriteLine($\"{backend.Id} / {backend.Kind}\");\n",
          "notes": "Registration exposes Vulkan diagnostics. It does not prove a native runtime or device."
        },
        {
          "title": "Probe the Vulkan package boundary",
          "description": "Inspect declared capabilities and the independent availability result.",
          "filename": "ProbeVulkanExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Vulkan;\n\nvar backend = new VulkanRuntimeBackend();\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(\n    new RuntimeBackendOptions\n    {\n        RequestedRuntimeIdentifier =\n            System.Runtime.InteropServices.RuntimeInformation.RuntimeIdentifier,\n        RequireNativeAssets = true\n    });\n\nConsole.WriteLine($\"Declared API: {probe.Capabilities.BackendApiName}\");\nConsole.WriteLine($\"Native state: {probe.Capabilities.NativeAssetState}\");\nConsole.WriteLine($\"Available: {probe.IsAvailable}\");\nforeach (string diagnostic in probe.Diagnostics)\n{\n    Console.WriteLine(diagnostic);\n}\n",
          "notes": "The default probe is expected to remain unavailable until a production adapter proves Vulkan assets, runtime libraries, and a device."
        },
        {
          "title": "Require the Vulkan backend by ID",
          "description": "Reject CPU substitution when the workload contract specifically requires Vulkan.",
          "filename": "RequireVulkanExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Vulkan;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixVulkanBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireBackendId,\n        PreferredBackendId = VulkanRuntimeBackend.BackendId,\n        AllowCpuFallback = false,\n        RequireNativeAssets = true\n    });\n\nif (!result.Succeeded)\n{\n    Console.Error.WriteLine(result.FailureReason);\n}\n",
          "notes": "RequireBackendId fails instead of relabeling another backend as Vulkan."
        },
        {
          "title": "List the Vulkan package runtime identifiers",
          "description": "Use declared identifiers as deployment metadata, not as proof that the current machine is ready.",
          "filename": "VulkanRuntimeIdentifiersExample.cs",
          "code": "using UAIX.LmRuntime.Backends.Vulkan;\n\nvar backend = new VulkanRuntimeBackend();\nforeach (string runtimeIdentifier in backend.Capabilities.RuntimeIdentifiers)\n{\n    Console.WriteLine(runtimeIdentifier);\n}\n",
          "notes": "The declared identifiers are win-x64, linux-x64. Probe evidence remains required for a usable Vulkan execution path."
        }
      ],
      "faq": [
        {
          "q": "Does this package execute through Vulkan today?",
          "a": "The managed registration package reports unavailable until a host-supplied native adapter proves Vulkan assets, runtime libraries, drivers, and a device."
        },
        {
          "q": "Why does SupportsGpuExecution read true when IsAvailable is false?",
          "a": "Capabilities describe the backend family. IsAvailable describes the current proven host state."
        },
        {
          "q": "Does registration contact the network?",
          "a": "No. Registration and the supplied diagnostic probe perform no downloads, provider calls, subprocess execution, or remote inference."
        }
      ],
      "required_for": "Vulkan backend registration and fail-closed Vulkan diagnostics",
      "group": "Backend registrations"
    },
    "backends-rocm": {
      "route": "package-backends-rocm",
      "id": "UAIX.LmRuntime.Backends.Rocm",
      "name": "Backends.Rocm",
      "tagline": "ROCm backend registration, declared compatibility metadata, runtime identifiers, and fail-closed local probe diagnostics.",
      "description": "Backends.Rocm registers the UAIX ROCm backend family with Acceleration. It declares the intended ROCm capability surface and package runtime identifiers (linux-x64), then probes unavailable until a host-supplied native adapter proves assets, runtime libraries, drivers, and a device. The managed package performs no hidden native inference.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Backends.Rocm",
      "dependencies": [
        "UAIX.LmRuntime.Acceleration"
      ],
      "audience": "Hosts building an explicit ROCm compatibility lane, diagnostics UI, deployment validation, or future native adapter integration for AMD GPU Linux hosts.",
      "install_when": [
        "You need the stable ROCm backend ID and registry extension.",
        "You need package-visible ROCm runtime-identifier and capability declarations.",
        "You need fail-closed probe diagnostics before ROCm execution is proven."
      ],
      "not_for": [
        "Installing or registering this package does not prove ROCm execution.",
        "The supplied managed backend does not load native inference libraries or enumerate a real device.",
        "Do not report GPU availability unless a host adapter returns an available probe with concrete device evidence."
      ],
      "key_types": [
        "RocmRuntimeBackend",
        "RuntimeBackendRegistryExtensions"
      ],
      "concepts": [
        {
          "title": "Registration is not execution",
          "text": "AddUaixRocmBackend adds metadata and probe behavior to the registry; it does not start a ROCm engine."
        },
        {
          "title": "Compatibility fails closed",
          "text": "The default diagnostic probe returns unavailable and states which native evidence has not been supplied."
        },
        {
          "title": "Runtime identifiers are declared",
          "text": "The package declares linux-x64; the current process and deployment still require independent validation."
        }
      ],
      "examples": [
        {
          "title": "Register the ROCm backend",
          "description": "Add the package-visible ROCm backend to an explicit Acceleration registry.",
          "filename": "RegisterROCmExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Rocm;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixRocmBackend();\n\nIRuntimeBackend backend = registry.FindById(RocmRuntimeBackend.BackendId)\n    ?? throw new InvalidOperationException(\"ROCm backend registration failed.\");\n\nConsole.WriteLine($\"{backend.Id} / {backend.Kind}\");\n",
          "notes": "Registration exposes ROCm diagnostics. It does not prove a native runtime or device."
        },
        {
          "title": "Probe the ROCm package boundary",
          "description": "Inspect declared capabilities and the independent availability result.",
          "filename": "ProbeROCmExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Rocm;\n\nvar backend = new RocmRuntimeBackend();\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(\n    new RuntimeBackendOptions\n    {\n        RequestedRuntimeIdentifier =\n            System.Runtime.InteropServices.RuntimeInformation.RuntimeIdentifier,\n        RequireNativeAssets = true\n    });\n\nConsole.WriteLine($\"Declared API: {probe.Capabilities.BackendApiName}\");\nConsole.WriteLine($\"Native state: {probe.Capabilities.NativeAssetState}\");\nConsole.WriteLine($\"Available: {probe.IsAvailable}\");\nforeach (string diagnostic in probe.Diagnostics)\n{\n    Console.WriteLine(diagnostic);\n}\n",
          "notes": "The default probe is expected to remain unavailable until a production adapter proves ROCm assets, runtime libraries, and a device."
        },
        {
          "title": "Require the ROCm backend by ID",
          "description": "Reject CPU substitution when the workload contract specifically requires ROCm.",
          "filename": "RequireROCmExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Rocm;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixRocmBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireBackendId,\n        PreferredBackendId = RocmRuntimeBackend.BackendId,\n        AllowCpuFallback = false,\n        RequireNativeAssets = true\n    });\n\nif (!result.Succeeded)\n{\n    Console.Error.WriteLine(result.FailureReason);\n}\n",
          "notes": "RequireBackendId fails instead of relabeling another backend as ROCm."
        },
        {
          "title": "List the ROCm package runtime identifiers",
          "description": "Use declared identifiers as deployment metadata, not as proof that the current machine is ready.",
          "filename": "ROCmRuntimeIdentifiersExample.cs",
          "code": "using UAIX.LmRuntime.Backends.Rocm;\n\nvar backend = new RocmRuntimeBackend();\nforeach (string runtimeIdentifier in backend.Capabilities.RuntimeIdentifiers)\n{\n    Console.WriteLine(runtimeIdentifier);\n}\n",
          "notes": "The declared identifiers are linux-x64. Probe evidence remains required for a usable ROCm execution path."
        }
      ],
      "faq": [
        {
          "q": "Does this package execute through ROCm today?",
          "a": "The managed registration package reports unavailable until a host-supplied native adapter proves ROCm assets, runtime libraries, drivers, and a device."
        },
        {
          "q": "Why does SupportsGpuExecution read true when IsAvailable is false?",
          "a": "Capabilities describe the backend family. IsAvailable describes the current proven host state."
        },
        {
          "q": "Does registration contact the network?",
          "a": "No. Registration and the supplied diagnostic probe perform no downloads, provider calls, subprocess execution, or remote inference."
        }
      ],
      "required_for": "ROCm backend registration and fail-closed ROCm diagnostics",
      "group": "Backend registrations"
    },
    "backends-metal": {
      "route": "package-backends-metal",
      "id": "UAIX.LmRuntime.Backends.Metal",
      "name": "Backends.Metal",
      "tagline": "Metal backend registration, declared compatibility metadata, runtime identifiers, and fail-closed local probe diagnostics.",
      "description": "Backends.Metal registers the UAIX Metal backend family with Acceleration. It declares the intended Metal capability surface and package runtime identifiers (osx-arm64, osx-x64), then probes unavailable until a host-supplied native adapter proves assets, runtime libraries, drivers, and a device. The managed package performs no hidden native inference.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Backends.Metal",
      "dependencies": [
        "UAIX.LmRuntime.Acceleration"
      ],
      "audience": "Hosts building an explicit Metal compatibility lane, diagnostics UI, deployment validation, or future native adapter integration for Apple GPU hosts.",
      "install_when": [
        "You need the stable Metal backend ID and registry extension.",
        "You need package-visible Metal runtime-identifier and capability declarations.",
        "You need fail-closed probe diagnostics before Metal execution is proven."
      ],
      "not_for": [
        "Installing or registering this package does not prove Metal execution.",
        "The supplied managed backend does not load native inference libraries or enumerate a real device.",
        "Do not report GPU availability unless a host adapter returns an available probe with concrete device evidence."
      ],
      "key_types": [
        "MetalRuntimeBackend",
        "RuntimeBackendRegistryExtensions"
      ],
      "concepts": [
        {
          "title": "Registration is not execution",
          "text": "AddUaixMetalBackend adds metadata and probe behavior to the registry; it does not start a Metal engine."
        },
        {
          "title": "Compatibility fails closed",
          "text": "The default diagnostic probe returns unavailable and states which native evidence has not been supplied."
        },
        {
          "title": "Runtime identifiers are declared",
          "text": "The package declares osx-arm64, osx-x64; the current process and deployment still require independent validation."
        }
      ],
      "examples": [
        {
          "title": "Register the Metal backend",
          "description": "Add the package-visible Metal backend to an explicit Acceleration registry.",
          "filename": "RegisterMetalExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Metal;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixMetalBackend();\n\nIRuntimeBackend backend = registry.FindById(MetalRuntimeBackend.BackendId)\n    ?? throw new InvalidOperationException(\"Metal backend registration failed.\");\n\nConsole.WriteLine($\"{backend.Id} / {backend.Kind}\");\n",
          "notes": "Registration exposes Metal diagnostics. It does not prove a native runtime or device."
        },
        {
          "title": "Probe the Metal package boundary",
          "description": "Inspect declared capabilities and the independent availability result.",
          "filename": "ProbeMetalExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Metal;\n\nvar backend = new MetalRuntimeBackend();\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(\n    new RuntimeBackendOptions\n    {\n        RequestedRuntimeIdentifier =\n            System.Runtime.InteropServices.RuntimeInformation.RuntimeIdentifier,\n        RequireNativeAssets = true\n    });\n\nConsole.WriteLine($\"Declared API: {probe.Capabilities.BackendApiName}\");\nConsole.WriteLine($\"Native state: {probe.Capabilities.NativeAssetState}\");\nConsole.WriteLine($\"Available: {probe.IsAvailable}\");\nforeach (string diagnostic in probe.Diagnostics)\n{\n    Console.WriteLine(diagnostic);\n}\n",
          "notes": "The default probe is expected to remain unavailable until a production adapter proves Metal assets, runtime libraries, and a device."
        },
        {
          "title": "Require the Metal backend by ID",
          "description": "Reject CPU substitution when the workload contract specifically requires Metal.",
          "filename": "RequireMetalExample.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Metal;\n\nvar registry = new RuntimeBackendRegistry();\nregistry.AddUaixMetalBackend();\n\nvar selector = new RuntimeBackendSelector(registry);\nRuntimeSelectionResult result = await selector.SelectAsync(\n    new RuntimeBackendOptions\n    {\n        Policy = RuntimeSelectionPolicy.RequireBackendId,\n        PreferredBackendId = MetalRuntimeBackend.BackendId,\n        AllowCpuFallback = false,\n        RequireNativeAssets = true\n    });\n\nif (!result.Succeeded)\n{\n    Console.Error.WriteLine(result.FailureReason);\n}\n",
          "notes": "RequireBackendId fails instead of relabeling another backend as Metal."
        },
        {
          "title": "List the Metal package runtime identifiers",
          "description": "Use declared identifiers as deployment metadata, not as proof that the current machine is ready.",
          "filename": "MetalRuntimeIdentifiersExample.cs",
          "code": "using UAIX.LmRuntime.Backends.Metal;\n\nvar backend = new MetalRuntimeBackend();\nforeach (string runtimeIdentifier in backend.Capabilities.RuntimeIdentifiers)\n{\n    Console.WriteLine(runtimeIdentifier);\n}\n",
          "notes": "The declared identifiers are osx-arm64, osx-x64. Probe evidence remains required for a usable Metal execution path."
        }
      ],
      "faq": [
        {
          "q": "Does this package execute through Metal today?",
          "a": "The managed registration package reports unavailable until a host-supplied native adapter proves Metal assets, runtime libraries, drivers, and a device."
        },
        {
          "q": "Why does SupportsGpuExecution read true when IsAvailable is false?",
          "a": "Capabilities describe the backend family. IsAvailable describes the current proven host state."
        },
        {
          "q": "Does registration contact the network?",
          "a": "No. Registration and the supplied diagnostic probe perform no downloads, provider calls, subprocess execution, or remote inference."
        }
      ],
      "required_for": "Metal backend registration and fail-closed Metal diagnostics",
      "group": "Backend registrations"
    },
    "backends-cuda-native-win-x64": {
      "route": "package-backends-cuda-native-win-x64",
      "id": "UAIX.LmRuntime.Backends.Cuda.Native.win-x64",
      "name": "Backends.Cuda.Native.win-x64",
      "tagline": "RID-specific NuGet package slot for modern Windows x64 CUDA native assets under runtimes/win-x64/native/.",
      "description": "This package reserves the standard NuGet runtime asset path for the Windows x64 modern CUDA native-asset package slot. It has no managed API surface. The supplied package source contains an asset manifest and no CUDA inference binary, so a package reference must not be treated as proof of native execution. Deployment and probe evidence remain required.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Backends.Cuda.Native.win-x64",
      "dependencies": [],
      "audience": "Deployment engineers and native adapter authors separating modern Windows x64 CUDA assets from managed backend registration.",
      "install_when": [
        "You are packaging or validating the Windows x64 modern CUDA native-asset package slot under the win-x64 NuGet RID path.",
        "You need the native-asset package identity to remain separate from managed backend registration.",
        "You are testing output layout and future adapter probing without making an execution claim."
      ],
      "not_for": [
        "It contains no managed runtime API.",
        "The supplied source round embeds no CUDA inference binary.",
        "It does not register Backends.Cuda and does not make the CUDA probe available."
      ],
      "key_types": [],
      "concepts": [
        {
          "title": "RID-specific placement",
          "text": "Packable files under native/* are assigned to runtimes/win-x64/native/ by the project."
        },
        {
          "title": "Separate package identity",
          "text": "Managed registration, modern CUDA assets, and Tesla K80 legacy assets are distinct package and evidence boundaries."
        },
        {
          "title": "Execution requires proof",
          "text": "A usable adapter must prove file presence, loadability, compatible runtime libraries, device support, and execution behavior."
        }
      ],
      "examples": [
        {
          "title": "Reference the package only for win-x64 publishing",
          "description": "Keep the RID-specific package reference scoped to the Windows x64 deployment target.",
          "filename": "App.csproj",
          "code": "<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net9.0</TargetFramework>\n    <RuntimeIdentifier>win-x64</RuntimeIdentifier>\n  </PropertyGroup>\n  <ItemGroup Condition=\"'$(RuntimeIdentifier)' == 'win-x64'\">\n    <PackageReference Include=\"UAIX.LmRuntime.Backends.Cuda.Native.win-x64\" />\n  </ItemGroup>\n</Project>\n",
          "notes": "A conditional package reference controls restore topology; it does not verify the content or loadability of a native asset."
        },
        {
          "title": "Publish and inspect the native output boundary",
          "description": "Create the RID-specific output and enumerate native files before a deployment claim is made.",
          "filename": "terminal",
          "code": "dotnet publish --configuration Release --runtime win-x64\n\n# Inspect the published output with your normal release tooling.\n# A package ID or directory alone is not proof of a usable CUDA engine.\n",
          "notes": "The supplied package source contains only an asset manifest. Validate the concrete package version and output contents in the release pipeline."
        },
        {
          "title": "Keep CUDA registration and native assets as separate checks",
          "description": "Probe the managed CUDA backend after deployment rather than assuming that restore activated GPU execution.",
          "filename": "CudaDeploymentProbe.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Cuda;\n\nvar backend = new CudaRuntimeBackend();\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(\n    new RuntimeBackendOptions\n    {\n        RequestedRuntimeIdentifier = \"win-x64\",\n        RequireNativeAssets = true,\n        NativeAssetDirectory = AppContext.BaseDirectory\n    });\n\nif (!probe.IsAvailable)\n{\n    foreach (string diagnostic in probe.Diagnostics)\n    {\n        Console.Error.WriteLine(diagnostic);\n    }\n}\n",
          "notes": "The supplied diagnostic CUDA backend does not load native files. A production native adapter must provide the proof before IsAvailable can be true."
        }
      ],
      "faq": [
        {
          "q": "Why is there no API reference?",
          "a": "This is a native-asset package slot with no managed assembly API."
        },
        {
          "q": "Does the current package prove CUDA execution?",
          "a": "No. The supplied source asset manifest explicitly states that no CUDA inference binary is embedded."
        },
        {
          "q": "Why is the K80 package separate?",
          "a": "Modern assets and Tesla K80 legacy assets have different compatibility and release-evidence requirements."
        }
      ],
      "required_for": "the Windows x64 modern CUDA native-asset package slot",
      "group": "Native asset slots"
    },
    "backends-cuda-legacy-k80-win-x64": {
      "route": "package-backends-cuda-legacy-k80-win-x64",
      "id": "UAIX.LmRuntime.Backends.Cuda.LegacyK80.win-x64",
      "name": "Backends.Cuda.LegacyK80.win-x64",
      "tagline": "RID-specific NuGet package slot for Tesla K80 legacy CUDA native assets under runtimes/win-x64/native/.",
      "description": "This package reserves the standard NuGet runtime asset path for the separate Windows x64 Tesla K80 compute-capability 3.7 native-asset package slot. It has no managed API surface. The supplied package source contains an asset manifest and no CUDA inference binary, so a package reference must not be treated as proof of native execution. Deployment and probe evidence remain required.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.Backends.Cuda.LegacyK80.win-x64",
      "dependencies": [],
      "audience": "Deployment engineers and native adapter authors separating Tesla K80 legacy CUDA assets from managed backend registration.",
      "install_when": [
        "You are packaging or validating the separate Windows x64 Tesla K80 compute-capability 3.7 native-asset package slot under the win-x64 NuGet RID path.",
        "You need the native-asset package identity to remain separate from managed backend registration.",
        "You are testing output layout and future adapter probing without making an execution claim."
      ],
      "not_for": [
        "It contains no managed runtime API.",
        "The supplied source round embeds no CUDA inference binary.",
        "It does not register Backends.Cuda and does not make the CUDA probe available."
      ],
      "key_types": [],
      "concepts": [
        {
          "title": "RID-specific placement",
          "text": "Packable files under native/* are assigned to runtimes/win-x64/native/ by the project."
        },
        {
          "title": "Separate package identity",
          "text": "Managed registration, modern CUDA assets, and Tesla K80 legacy assets are distinct package and evidence boundaries."
        },
        {
          "title": "Execution requires proof",
          "text": "A usable adapter must prove file presence, loadability, compatible runtime libraries, device support, and execution behavior."
        }
      ],
      "examples": [
        {
          "title": "Reference the package only for win-x64 publishing",
          "description": "Keep the RID-specific package reference scoped to the Windows x64 deployment target.",
          "filename": "App.csproj",
          "code": "<Project Sdk=\"Microsoft.NET.Sdk\">\n  <PropertyGroup>\n    <TargetFramework>net9.0</TargetFramework>\n    <RuntimeIdentifier>win-x64</RuntimeIdentifier>\n  </PropertyGroup>\n  <ItemGroup Condition=\"'$(RuntimeIdentifier)' == 'win-x64'\">\n    <PackageReference Include=\"UAIX.LmRuntime.Backends.Cuda.LegacyK80.win-x64\" />\n  </ItemGroup>\n</Project>\n",
          "notes": "A conditional package reference controls restore topology; it does not verify the content or loadability of a native asset."
        },
        {
          "title": "Publish and inspect the native output boundary",
          "description": "Create the RID-specific output and enumerate native files before a deployment claim is made.",
          "filename": "terminal",
          "code": "dotnet publish --configuration Release --runtime win-x64\n\n# Inspect the published output with your normal release tooling.\n# A package ID or directory alone is not proof of a usable CUDA engine.\n",
          "notes": "The supplied package source contains only an asset manifest. Validate the concrete package version and output contents in the release pipeline."
        },
        {
          "title": "Keep CUDA registration and native assets as separate checks",
          "description": "Probe the managed CUDA backend after deployment rather than assuming that restore activated GPU execution.",
          "filename": "CudaDeploymentProbe.cs",
          "code": "using UAIX.LmRuntime.Acceleration;\nusing UAIX.LmRuntime.Backends.Cuda;\n\nvar backend = new CudaRuntimeBackend();\nRuntimeBackendProbeResult probe = await backend.ProbeAsync(\n    new RuntimeBackendOptions\n    {\n        RequestedRuntimeIdentifier = \"win-x64\",\n        RequireNativeAssets = true,\n        NativeAssetDirectory = AppContext.BaseDirectory\n    });\n\nif (!probe.IsAvailable)\n{\n    foreach (string diagnostic in probe.Diagnostics)\n    {\n        Console.Error.WriteLine(diagnostic);\n    }\n}\n",
          "notes": "The supplied diagnostic CUDA backend does not load native files. A production native adapter must provide the proof before IsAvailable can be true."
        }
      ],
      "faq": [
        {
          "q": "Why is there no API reference?",
          "a": "This is a native-asset package slot with no managed assembly API."
        },
        {
          "q": "Does the current package prove CUDA execution?",
          "a": "No. The supplied source asset manifest explicitly states that no CUDA inference binary is embedded."
        },
        {
          "q": "Why is the K80 package separate?",
          "a": "Tesla K80 compute capability 3.7 can require a legacy CUDA and sm_37-specific build that must not be mixed into the modern asset set."
        }
      ],
      "required_for": "the separate Windows x64 Tesla K80 compute-capability 3.7 native-asset package slot",
      "group": "Native asset slots"
    },
    "local-endpoint": {
      "route": "package-local-endpoint",
      "id": "UAIX.LmRuntime.LocalEndpoint",
      "name": "LocalEndpoint",
      "tagline": "The high-level local-only GGUF facade: verified files, bounded loading, isolated sessions, UAIX context evidence, and deterministic greedy generation.",
      "description": "High-level local-only managed GGUF facade for verified model loading, isolated sessions, and deterministic greedy generation.",
      "nuget": "https://www.nuget.org/packages/UAIX.LmRuntime.LocalEndpoint",
      "dependencies": [
        "UAIX.LmRuntime.Acceleration",
        "UAIX.LmRuntime.Backends.CpuManaged",
        "UAIX.LmRuntime.Models.Llama",
        "UAIX.LmRuntime.Tokenization"
      ],
      "transitive_note": "LocalEndpoint selects the managed CPU lane represented by Backends.CpuManaged. GPU registration packages remain separate and do not silently replace LocalEndpoint execution.",
      "audience": "Desktop, service, and worker applications that require local GGUF generation without assembling every lower runtime layer directly.",
      "install_when": [
        "Application integration with local managed GGUF inference.",
        "SHA-256 and byte-count verification, allowed-root checks, reparse-point rejection, execution limits, and path-safe diagnostics.",
        "One isolated LocalGgufSession per conversation or worker with deterministic token observation."
      ],
      "not_for": [
        "Downloading models, starting a server, calling provider APIs, running subprocesses, collecting telemetry, or persisting prompts/content.",
        "Reading .uaix packages, expanding .uai profiles, selecting wiki memory, assembling prompts, or granting command/network authority."
      ],
      "key_types": [
        "LocalGgufRuntime",
        "LocalGgufRuntimeScope",
        "LocalGgufModelLoadOptions",
        "LocalGgufFileExpectation",
        "LocalGgufFileIdentity",
        "LocalGgufModel",
        "LocalGgufSession",
        "LocalGgufGenerationRequest",
        "LocalGgufGenerationResult",
        "LocalUaixRuntimeContext",
        "LocalUaixRuntimeContextEvidence"
      ],
      "concepts": [
        {
          "title": "Verify before parse",
          "text": "The facade can require expected SHA-256 and byte count, constrain the canonical path to an allowed root, reject reparse points, and enforce maximum model size before loading."
        },
        {
          "title": "Session isolation",
          "text": "A loaded model can create multiple explicit sessions. Each session owns its position and UAIX context evidence; no process-global active profile is required."
        },
        {
          "title": "Closed authority boundary",
          "text": "UAIX profile and memory fields are retained as immutable evidence only. Runtime execution, policy override, commands, network, providers, website intake, telemetry, and auto-export flags must remain false."
        }
      ],
      "examples": [
        {
          "title": "Verify, load, and generate locally",
          "description": "Use a trusted model root, exact file identity, explicit UAIX evidence, bounded generation, and deterministic disposal.",
          "filename": "LocalGgufQuickStart.cs",
          "code": "using UAIX.LmRuntime.LocalEndpoint;\n\npublic static class LocalGgufQuickStart\n{\n    /// <summary>\n    /// Loads a verified local GGUF model and performs deterministic greedy generation.\n    /// </summary>\n    /// <param name=\"modelPath\">The local path to the GGUF model.</param>\n    /// <param name=\"trustedModelRoot\">The canonical root under which the model must reside.</param>\n    /// <param name=\"expectedSha256\">The expected SHA-256 digest.</param>\n    /// <param name=\"expectedByteCount\">The exact expected model byte count.</param>\n    /// <param name=\"appInstanceId\">The host application's stable instance identifier.</param>\n    /// <param name=\"importedPackageCorrelationId\">A legacy UAIX package correlation identifier; it grants no authority.</param>\n    /// <param name=\"profileId\">The already validated UAIX profile identifier.</param>\n    /// <param name=\"profileDisplayName\">The display-safe profile name.</param>\n    /// <param name=\"loadSessionId\">The host-owned profile load-session identifier.</param>\n    /// <param name=\"wikiRootId\">The host-owned long-term-memory root identifier.</param>\n    /// <param name=\"workerSessionId\">The host-owned inference session identifier.</param>\n    /// <param name=\"prompt\">The already prepared prompt; the runtime does not assemble it.</param>\n    /// <param name=\"cancellationToken\">A token used to cancel between committed inference steps.</param>\n    /// <returns>The deterministic generation result.</returns>\n    public static LocalGgufGenerationResult Generate(\n        string modelPath,\n        string trustedModelRoot,\n        string expectedSha256,\n        long expectedByteCount,\n        string appInstanceId,\n        string importedPackageCorrelationId,\n        string profileId,\n        string profileDisplayName,\n        string loadSessionId,\n        string wikiRootId,\n        string workerSessionId,\n        string prompt,\n        CancellationToken cancellationToken)\n    {\n        var runtime = new LocalGgufRuntime(\n            new LocalGgufRuntimeScope\n            {\n                ApplicationInstanceId = appInstanceId,\n                UaixPackageId = importedPackageCorrelationId\n            });\n\n        var uaixContext = new LocalUaixRuntimeContext\n        {\n            LoadedUaixProfilePresent = true,\n            LoadedUaixProfileId = profileId,\n            LoadedUaixProfileDisplayName = profileDisplayName,\n            LoadedUaixLoadSessionId = loadSessionId,\n            LoadedUaixUaiRelativePath = $\"Memories/Profiles/{profileId}/.uai\",\n            LoadedUaixSessionRelativePath = $\"Memories/Sessions/{loadSessionId}.json\",\n            LongTermMemoryRootId = wikiRootId,\n            LongTermMemoryRootRelativePath = $\"Profiles/{profileId}\",\n            LongTermMemoryMode = LocalUaixLongTermMemoryMode.Isolated,\n            RuntimeExecutionAllowed = false,\n            MemoryCanOverridePolicy = false,\n            CommandExecutionAllowed = false,\n            NetworkAccessAllowed = false,\n            ProviderApisAllowed = false,\n            WebsitePromptIntakeAllowed = false,\n            TelemetryEnabled = false,\n            AutoExportAllowed = false\n        };\n\n        LocalUaixRuntimeContextEvidence contextEvidence =\n            LocalGgufRuntime.VerifyUaixRuntimeContext(uaixContext);\n\n        using LocalGgufModel model = runtime.LoadVerifiedModel(\n            modelPath,\n            new LocalGgufFileExpectation\n            {\n                ModelSha256 = expectedSha256,\n                ModelByteCount = expectedByteCount\n            },\n            new LocalGgufModelLoadOptions\n            {\n                AllowedRootDirectory = trustedModelRoot,\n                RejectReparsePoints = true,\n                MaximumModelBytes = expectedByteCount,\n                ExecutionLimits = new LocalGgufExecutionLimits\n                {\n                    MaximumPromptCharacters = 32_768,\n                    MaximumGeneratedTokens = 256,\n                    MaximumStopTokenCount = 32\n                }\n            });\n\n        using LocalGgufSession session = model.CreateSession(\n            new LocalGgufSessionContext\n            {\n                SessionId = workerSessionId,\n                UaixRuntimeContext = uaixContext\n            });\n\n        return session.GenerateGreedy(\n            new LocalGgufGenerationRequest\n            {\n                Prompt = prompt,\n                MaximumTokens = 128,\n                ResetSession = true,\n                AddSpecialTokens = false,\n                ParseSpecialTokens = false,\n                EmitTokenizerTrace = false,\n                RemoveSpecialTokens = false,\n                UnparseSpecialTokens = true,\n                CleanSpaces = false\n            },\n            cancellationToken);\n    }\n}",
          "notes": "The host validates and prepares profile, memory, and prompt data. The runtime retains context as evidence only; every authority flag remains false."
        },
        {
          "title": "Observe generated tokens synchronously",
          "description": "Map each committed token to the host worker protocol without giving the runtime ownership of transport or persistence.",
          "filename": "TokenObservationExample.cs",
          "code": "using UAIX.LmRuntime.LocalEndpoint;\n\npublic static class TokenObservationExample\n{\n    /// <summary>\n    /// Generates a bounded response and observes each selected token synchronously.\n    /// </summary>\n    /// <param name=\"session\">The isolated local GGUF session.</param>\n    /// <param name=\"preparedPrompt\">The host-prepared prompt.</param>\n    /// <param name=\"cancellationToken\">A token observed between committed model steps.</param>\n    /// <returns>The complete deterministic generation result.</returns>\n    public static LocalGgufGenerationResult Generate(\n        LocalGgufSession session,\n        string preparedPrompt,\n        CancellationToken cancellationToken)\n    {\n        ArgumentNullException.ThrowIfNull(session);\n        ArgumentException.ThrowIfNullOrWhiteSpace(preparedPrompt);\n\n        return session.GenerateGreedy(\n            new LocalGgufGenerationRequest\n            {\n                Prompt = preparedPrompt,\n                MaximumTokens = 64,\n                ResetSession = true,\n                AddSpecialTokens = true,\n                ParseSpecialTokens = false,\n                RemoveSpecialTokens = true\n            },\n            token => Console.WriteLine(\n                $\"{session.SessionId}:{token.Sequence}:{token.TokenId}:{token.SelectedLogit}\"),\n            cancellationToken);\n    }\n}"
        },
        {
          "title": "Verify associated artifacts without granting execution authority",
          "description": "Hash and bound extra local artifacts while keeping them outside the generation path.",
          "filename": "AssociatedArtifactExample.cs",
          "code": "using UAIX.LmRuntime.LocalEndpoint;\n\npublic static class AssociatedArtifactExample\n{\n    /// <summary>\n    /// Verifies a LocalEndpoint-reviewed associated artifact without granting execution authority.\n    /// </summary>\n    /// <param name=\"runtime\">The local runtime facade performing byte verification.</param>\n    /// <param name=\"artifactPath\">The local associated-artifact path.</param>\n    /// <param name=\"trustedModelRoot\">The canonical root that must contain the artifact.</param>\n    /// <param name=\"expectedSha256\">The expected SHA-256 digest.</param>\n    /// <param name=\"expectedByteCount\">The exact expected byte count.</param>\n    /// <returns>Path-free immutable artifact identity evidence.</returns>\n    public static IReadOnlyList<LocalGgufAssociatedArtifactIdentity> Verify(\n        LocalGgufRuntime runtime,\n        string artifactPath,\n        string trustedModelRoot,\n        string expectedSha256,\n        long expectedByteCount)\n    {\n        ArgumentNullException.ThrowIfNull(runtime);\n\n        string fileName = Path.GetFileName(artifactPath);\n\n        return runtime.VerifyAssociatedArtifacts(\n            [\n                new LocalGgufAssociatedArtifactLoadInput\n                {\n                    ArtifactPath = artifactPath,\n                    Role = \"projector\",\n                    FileName = fileName,\n                    ArtifactSha256 = expectedSha256,\n                    ArtifactByteCount = expectedByteCount\n                }\n            ],\n            new LocalGgufModelLoadOptions\n            {\n                AllowedRootDirectory = trustedModelRoot,\n                RejectReparsePoints = true\n            });\n    }\n}",
          "notes": "Associated artifacts are verified evidence; the facade does not use them for generation unless a future explicitly supported path says otherwise."
        },
        {
          "title": "Inspect the local-only capability boundary",
          "description": "Read the facade capability object instead of inferring behavior from package names.",
          "filename": "CapabilityBoundaryExample.cs",
          "code": "using UAIX.LmRuntime.LocalEndpoint;\n\nvar runtime = new LocalGgufRuntime();\nLocalGgufRuntimeCapabilities capabilities = runtime.Capabilities;\n\nConsole.WriteLine($\"Managed execution: {capabilities.IsManagedExecution}\");\nConsole.WriteLine($\"Explicit verification: {capabilities.SupportsExplicitFileVerification}\");\nConsole.WriteLine($\"Network access: {capabilities.UsesNetworkAccess}\");\nConsole.WriteLine($\"Subprocesses: {capabilities.UsesSubprocesses}\");\nConsole.WriteLine($\"Telemetry: {capabilities.UsesTelemetry}\");\nConsole.WriteLine($\"Server: {capabilities.HostsServer}\");\nConsole.WriteLine($\"Global UAIX profile: {capabilities.UsesGlobalUaixProfile}\");\nConsole.WriteLine($\"Memory grants authority: {capabilities.UaixMemoryGrantsAuthority}\");"
        },
        {
          "title": "Reuse a session for a multi-turn prepared prompt",
          "description": "Preserve model state only when the host deliberately chooses not to reset the session.",
          "filename": "SessionReuseExample.cs",
          "code": "using UAIX.LmRuntime.LocalEndpoint;\n\npublic static class SessionReuseExample\n{\n    /// <summary>\n    /// Executes an initial prepared prompt, then continues the same isolated model session.\n    /// </summary>\n    /// <param name=\"session\">The isolated local GGUF session.</param>\n    /// <param name=\"firstPreparedPrompt\">The complete first-turn prompt prepared by the host.</param>\n    /// <param name=\"continuationPreparedPrompt\">The continuation prompt prepared by the host.</param>\n    /// <param name=\"cancellationToken\">A token observed between committed model steps.</param>\n    /// <returns>The first and continuation generation results.</returns>\n    public static (\n        LocalGgufGenerationResult First,\n        LocalGgufGenerationResult Continuation) GenerateTwoTurns(\n        LocalGgufSession session,\n        string firstPreparedPrompt,\n        string continuationPreparedPrompt,\n        CancellationToken cancellationToken)\n    {\n        ArgumentNullException.ThrowIfNull(session);\n\n        LocalGgufGenerationResult first = session.GenerateGreedy(\n            new LocalGgufGenerationRequest\n            {\n                Prompt = firstPreparedPrompt,\n                MaximumTokens = 64,\n                ResetSession = true,\n                AddSpecialTokens = true\n            },\n            cancellationToken);\n\n        LocalGgufGenerationResult continuation = session.GenerateGreedy(\n            new LocalGgufGenerationRequest\n            {\n                Prompt = continuationPreparedPrompt,\n                MaximumTokens = 64,\n                ResetSession = false,\n                AddSpecialTokens = false\n            },\n            cancellationToken);\n\n        return (first, continuation);\n    }\n}",
          "notes": "The host owns conversation formatting and must ensure that continuation prompts and special-token settings match the model template."
        }
      ],
      "faq": [
        {
          "q": "Which package should an application install first?",
          "a": "Start with LocalEndpoint when the bounded local GGUF facade matches the application. Install lower-level packages directly only when you need their specific extension or inspection surfaces."
        },
        {
          "q": "Why does CreateSession require LocalGgufSessionContext?",
          "a": "The explicit context binds a host session ID and immutable UAIX evidence to the runtime session. The parameterless member is a fail-closed compatibility boundary and should not be used for normal integration."
        },
        {
          "q": "Does the facade read .uaix or .uai files?",
          "a": "No. The host validates and loads those artifacts. The runtime validates a display-safe context object and retains evidence without reading profiles, memory documents, or prompt sources."
        },
        {
          "q": "Can memory enable commands, network, providers, telemetry, or exports?",
          "a": "No. Those authority flags are required to remain false. Any future capability needs a separate host-owned, user-approved gate."
        },
        {
          "q": "Does LocalEndpoint host an HTTP server or OpenAI-compatible endpoint?",
          "a": "No. It is an in-process managed facade. Transport, worker protocols, JSONL events, registries, audit, UI, and persistence remain host responsibilities."
        },
        {
          "q": "How should models be disposed?",
          "a": "Dispose LocalGgufSession before LocalGgufModel. The model owns mapped resources and tracks active sessions; use using declarations or an equivalent deterministic lifetime."
        }
      ],
      "required_for": "application integration",
      "group": "Application facade"
    }
  }
}