<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"><channel><title>inferno.rs</title><description>Chapters on building an LLM inference stack in Rust — tensors through an OpenAI-compatible server.</description><link>https://inference.yashkothari.dev/</link><language>en-us</language><item><title>I.1: GGUF</title><link>https://inference.yashkothari.dev/post/act-i-01-gguf/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-i-01-gguf/</guid><description>I.1: GGUF</description></item><item><title>I.2: Tokenizer</title><link>https://inference.yashkothari.dev/post/act-i-02-tokenizer/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-i-02-tokenizer/</guid><description>I.2: Tokenizer</description></item><item><title>I.3: Tensor</title><link>https://inference.yashkothari.dev/post/act-i-03-tensor/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-i-03-tensor/</guid><description>I.3: Tensor</description></item><item><title>I.4: Backend</title><link>https://inference.yashkothari.dev/post/act-i-04-backend/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-i-04-backend/</guid><description>I.4: Backend</description></item><item><title>I.5: Qwen3 forward</title><link>https://inference.yashkothari.dev/post/act-i-05-qwen3-forward/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-i-05-qwen3-forward/</guid><description>I.5: Qwen3 forward</description></item><item><title>I.6: Greedy generation</title><link>https://inference.yashkothari.dev/post/act-i-06-greedy-generate/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-i-06-greedy-generate/</guid><description>I.6: Greedy generation</description></item><item><title>II.1: Benchmark harness</title><link>https://inference.yashkothari.dev/post/act-ii-01-benchmark-harness/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-ii-01-benchmark-harness/</guid><description>II.1: Benchmark harness</description></item><item><title>II.2: KV cache</title><link>https://inference.yashkothari.dev/post/act-ii-02-kv-cache/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-ii-02-kv-cache/</guid><description>II.2: KV cache</description></item><item><title>II.3: SIMD CPU backend</title><link>https://inference.yashkothari.dev/post/act-ii-03-simd-cpu/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-ii-03-simd-cpu/</guid><description>II.3: SIMD CPU backend</description></item><item><title>II.4: Multithreaded CPU backend</title><link>https://inference.yashkothari.dev/post/act-ii-04-multithreaded-cpu/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-ii-04-multithreaded-cpu/</guid><description>II.4: Multithreaded CPU backend</description></item><item><title>II.5: Metal GPU backend</title><link>https://inference.yashkothari.dev/post/act-ii-05-metal-gpu/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-ii-05-metal-gpu/</guid><description>II.5: Metal GPU backend</description></item><item><title>II.6: Q8_0 quantization</title><link>https://inference.yashkothari.dev/post/act-ii-06-q8-0-quantization/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-ii-06-q8-0-quantization/</guid><description>II.6: Q8_0 quantization</description></item><item><title>III.1: Chat pipeline</title><link>https://inference.yashkothari.dev/post/act-iii-01-chat-pipeline/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-iii-01-chat-pipeline/</guid><description>III.1: Chat pipeline</description></item><item><title>III.2: HTTP API</title><link>https://inference.yashkothari.dev/post/act-iii-02-http-api/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-iii-02-http-api/</guid><description>III.2: HTTP API</description></item><item><title>III.3: SSE streaming</title><link>https://inference.yashkothari.dev/post/act-iii-03-sse-streaming/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-iii-03-sse-streaming/</guid><description>III.3: SSE streaming</description></item><item><title>III.4: Paged KV cache</title><link>https://inference.yashkothari.dev/post/act-iii-04-paged-kv-cache/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-iii-04-paged-kv-cache/</guid><description>III.4: Paged KV cache</description></item><item><title>III.5: Radix prefix cache</title><link>https://inference.yashkothari.dev/post/act-iii-05-radix-prefix-cache/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-iii-05-radix-prefix-cache/</guid><description>III.5: Radix prefix cache</description></item><item><title>III.6: Decode scheduler</title><link>https://inference.yashkothari.dev/post/act-iii-06-decode-scheduler/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-iii-06-decode-scheduler/</guid><description>III.6: Decode scheduler</description></item><item><title>III.7: Batched decode</title><link>https://inference.yashkothari.dev/post/act-iii-07-batched-decode/</link><guid isPermaLink="true">https://inference.yashkothari.dev/post/act-iii-07-batched-decode/</guid><description>III.7: Batched decode</description></item></channel></rss>