
{"id":6548,"date":"2025-10-09T07:24:56","date_gmt":"2025-10-08T23:24:56","guid":{"rendered":"https:\/\/infernews.com\/?page_id=6548"},"modified":"2025-12-23T02:05:01","modified_gmt":"2025-12-22T18:05:01","slug":"%e6%b7%b1%e5%ba%a6%e5%ad%b8%e7%bf%92%e8%b3%87%e6%ba%90","status":"publish","type":"page","link":"https:\/\/infernews.com\/blog\/%e6%b7%b1%e5%ba%a6%e5%ad%b8%e7%bf%92%e8%b3%87%e6%ba%90\/","title":{"rendered":"\u6a5f\u5668\u5b78\u7fd2 \/ \u6df1\u5ea6\u5b78\u7fd2"},"content":{"rendered":"<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"\u3010\u751f\u6210\u5f0f\u4eba\u5de5\u667a\u6167\u8207\u6a5f\u5668\u5b78\u7fd2\u5c0e\u8ad62025\u3011\u7b2c 10 \u8b1b\uff1a\u8a9e\u97f3\u8a9e\u8a00\u6a21\u578b\u767c\u5c55\u53f2 (\u672c\u8ab2\u7a0b\u524d\u6bb5\u5167\u5bb9\u70ba\u6b77\u53f2\u56de\u9867\uff0c2025 \u5e74\u7684\u6280\u8853\u5f9e 1:42:00 \u958b\u59cb)\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_CbIPjrOj2Tc\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FCbIPjrOj2Tc%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/CbIPjrOj2Tc\" \/><meta itemprop=\"duration\" content=\"PT2H29M2S\" \/><meta itemprop=\"uploadDate\" content=\"2025-12-22T08:03:27Z\" \/><\/div><meta itemprop=\"accessibilityFeature\" content=\"captions\" \/><div id=\"lyte_CbIPjrOj2Tc\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FCbIPjrOj2Tc%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">\u3010\u751f\u6210\u5f0f\u4eba\u5de5\u667a\u6167\u8207\u6a5f\u5668\u5b78\u7fd2\u5c0e\u8ad62025\u3011\u7b2c 10 \u8b1b\uff1a\u8a9e\u97f3\u8a9e\u8a00\u6a21\u578b\u767c\u5c55\u53f2 (\u672c\u8ab2\u7a0b\u524d\u6bb5\u5167\u5bb9\u70ba\u6b77\u53f2\u56de\u9867\uff0c2025 \u5e74\u7684\u6280\u8853\u5f9e 1:42:00 \u958b\u59cb)<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/CbIPjrOj2Tc\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FCbIPjrOj2Tc%2F0.jpg\" alt=\"\u3010\u751f\u6210\u5f0f\u4eba\u5de5\u667a\u6167\u8207\u6a5f\u5668\u5b78\u7fd2\u5c0e\u8ad62025\u3011\u7b2c 10 \u8b1b\uff1a\u8a9e\u97f3\u8a9e\u8a00\u6a21\u578b\u767c\u5c55\u53f2 (\u672c\u8ab2\u7a0b\u524d\u6bb5\u5167\u5bb9\u70ba\u6b77\u53f2\u56de\u9867\uff0c2025 \u5e74\u7684\u6280\u8853\u5f9e 1:42:00 \u958b\u59cb)\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"\u8ab2\u7a0b\u7db2\u9801\uff1ahttps:\/\/speech.ee.ntu.edu.tw\/~hylee\/GenAI-ML\/2025-fall.php \u5f71\u7247\u526a\u8f2f\uff1a\u674e\u4e00\u99ff\u52a9\u6559 \u5ef6\u4f38\u95b1\u8b80 On The Landscape of Spoken Language Models: A Comprehensive Survey https:\/\/arxiv.org\/abs\/2504.08528\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"MIT Introduction to Deep Learning (2025) | 6.S191\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_alfdI7S6wCY\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FalfdI7S6wCY%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/alfdI7S6wCY\" \/><meta itemprop=\"duration\" content=\"PT1H9M26S\" \/><meta itemprop=\"uploadDate\" content=\"2025-03-03T16:00:07Z\" \/><\/div><div id=\"lyte_alfdI7S6wCY\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FalfdI7S6wCY%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">MIT Introduction to Deep Learning (2025) | 6.S191<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/alfdI7S6wCY\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FalfdI7S6wCY%2F0.jpg\" alt=\"MIT Introduction to Deep Learning (2025) | 6.S191\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"MIT Introduction to Deep Learning 6.S191: Lecture 1 Foundations of Deep Learning Lecturer: Alexander Amini ** New 2025 Edition ** For all lectures, slides, and lab materials: http:\/\/introtodeeplearning.com\/ Subscribe to stay up to date with new deep learning lectures at MIT, or follow us on @MITDeepLearning on Twitter and Instagram to stay fully-connected!!\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"Become AI Researcher From Scratch - Full Course - LLM, Math, Pytorch, Neural Networks, Transformers\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_44l-xL7OKLo\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2F44l-xL7OKLo%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/44l-xL7OKLo\" \/><meta itemprop=\"duration\" content=\"PT3H8M59S\" \/><meta itemprop=\"uploadDate\" content=\"2025-11-18T16:02:40Z\" \/><\/div><div id=\"lyte_44l-xL7OKLo\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2F44l-xL7OKLo%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">Become AI Researcher From Scratch - Full Course - LLM, Math, Pytorch, Neural Networks, Transformers<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/44l-xL7OKLo\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2F44l-xL7OKLo%2F0.jpg\" alt=\"Become AI Researcher From Scratch - Full Course - LLM, Math, Pytorch, Neural Networks, Transformers\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"Beam AI - https:\/\/beam.ai\/see-a-demo Skool (become AI researcher) - https:\/\/www.skool.com\/become-ai-researcher-2669\/about github - https:\/\/github.com\/vukrosic\/become-elite-ai-researcher Playlist for machine learning fundamentals - https:\/\/www.youtube.com\/playlist?list=PL-9_KFQd8ssLS8TV5-GsYhp6fgZ3gZM_a Discord (Open Superintelligence Lab) - https:\/\/discord.gg\/6AbXGpKTwN X - https:\/\/x.com\/VukRosic99 0:00 - Course Introduction 0:22 - AI Math Roadmap 1:40 - PyTorch Module Overview 2:05 - Neural Networks Overview 3:55 - Transformers Overview 4:45 - Sponsor: Beam AI 7:46 - Math for AI 8:31 - Setup &amp; Requirements 11:30 - Math Curriculum 12:30 - Mathematical Functions 21:26 - Understanding Derivatives 35:39 - Understanding Vectors 48:19 - Understanding Gradients 59:04 - Understanding Matrices 1:10:50 - Understanding Probability 1:21:33 - PyTorch Introduction 1:23:21 - Tensor Operations 1:35:01 - Creating Tensors 1:39:30 - Matrix Multiplication 1:41:03 - Transposing &amp; Permuting 1:43:42 - Indexing &amp; Slicing 1:50:20 - Concatenating Tensors 1:52:43 - Special Tensors 1:56:07 - Single Neuron Explained 2:00:24 - Activation Functions 2:05:24 - Neural Network Layers 2:10:25 - Activation Function Examples 2:13:40 - How Networks Learn 2:14:13 - Attention Mechanism 2:29:35 - Self-Attention Code 2:35:48 - GPT Architecture Overview 2:43:04 - Positional Embeddings 2:47:03 - Causal Self-Attention 2:56:59 - Building the GPT\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n\n<p><a href=\"https:\/\/github.com\/The-Pocket\/PocketFlow-Tutorial-Video-Generator\/tree\/main\/docs\/llm\">Pocket Flow<\/a> \u6559\u5b78\u5c08\u6848\u300cPocketFlow-Tutorial-Video-Generator\u3002Pocket Flow \u672c\u8eab\u662f\u4e00\u500b\u6975\u7c21\u7684\u5716\u5f62\u5de5\u4f5c\u6d41\u6846\u67b6\uff0c\u6bcf\u500b node \u662f\u4e00\u500b\u6b65\u9a5f\uff0cflow \u8ca0\u8cac\u628a\u9019\u4e9b\u6b65\u9a5f\u4e32\u8d77\u4f86\uff0c\u9069\u5408\u505a\u591a\u6b65\u9a5f\u7684\u4ee3\u7406\u3001RAG\u3001\u6559\u5b78\u751f\u6210\u7b49\u61c9\u7528\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"Give me 50 min, I will make Diffusion Model click forever\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_JOqU8_qJQg4\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FJOqU8_qJQg4%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/JOqU8_qJQg4\" \/><meta itemprop=\"duration\" content=\"PT47M11S\" \/><meta itemprop=\"uploadDate\" content=\"2025-12-04T06:46:42Z\" \/><\/div><meta itemprop=\"accessibilityFeature\" content=\"captions\" \/><div id=\"lyte_JOqU8_qJQg4\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FJOqU8_qJQg4%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">Give me 50 min, I will make Diffusion Model click forever<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/JOqU8_qJQg4\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FJOqU8_qJQg4%2F0.jpg\" alt=\"Give me 50 min, I will make Diffusion Model click forever\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"*Text:* https:\/\/github.com\/The-Pocket\/PocketFlow-Tutorial-Video-Generator\/blob\/main\/docs\/llm\/diffusion.md 0:00:00 - Introduction 0:03:57 - The Configuration 0:05:58 - The Forward Process 0:11:16 - Noise Scheduling Code 0:15:55 - The Reverse Process 0:20:14 - The Training Loop 0:24:08 - The UNet Architecture 0:29:15 - Time Embeddings 0:30:49 - The Residual Block 0:34:28 - Sampling Theory 0:39:30 - Sampling Implementation 0:45:36 - Stable Diffusion &amp; Latents *Social media:* X: https:\/\/x.com\/ZacharyHuang12 LinkedIn: https:\/\/www.linkedin.com\/in\/zachary-h-23aa37172\/ Github: https:\/\/github.com\/zachary62 Discord: https:\/\/discord.com\/invite\/hUHHE9Sa6T Medium: https:\/\/medium.com\/@zh2408 Substack: https:\/\/zacharyhuang.substack.com\/ *About Me:* \ud83d\udc4b I&#039;m Zach, an AI researcher at Microsoft Research AI Frontiers. I currently work on LLM Agents &amp; Systems. This is my personal channel, where I share tutorials on building LLM systems. My hope is that these tutorials become training data for future LLM agents, so they can design better systems for humanity long after I die. Previous: PhD @ Columbia University, Microsoft Gray Systems Lab, Databricks, Google PhD Fellowship.\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"\u5b66\u4e60Transformer\uff0c\u5e94\u8be5\u4ece\u8bcd\u5d4c\u5165WordEmbedding\u5f00\u59cb\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_rvvKXXdAxs8\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FrvvKXXdAxs8%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/rvvKXXdAxs8\" \/><meta itemprop=\"duration\" content=\"PT12M2S\" \/><meta itemprop=\"uploadDate\" content=\"2024-08-08T03:00:29Z\" \/><\/div><div id=\"lyte_rvvKXXdAxs8\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FrvvKXXdAxs8%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">\u5b66\u4e60Transformer\uff0c\u5e94\u8be5\u4ece\u8bcd\u5d4c\u5165WordEmbedding\u5f00\u59cb<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/rvvKXXdAxs8\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FrvvKXXdAxs8%2F0.jpg\" alt=\"\u5b66\u4e60Transformer\uff0c\u5e94\u8be5\u4ece\u8bcd\u5d4c\u5165WordEmbedding\u5f00\u59cb\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"\u5b66\u4e60Transformer\uff0c\u5e94\u8be5\u4ece\u8bcd\u5d4c\u5165WordEmbedding\u5f00\u59cb\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>\n\n\n<p>\u5927\u6a21\u578b\u5fae\u8c03\u5168\u6d41\u7a0b\u8be6\u89e3<\/p>\n\n\n\n<p><a href=\"https:\/\/www.youtube.com\/watch?v=kYcGLp-PfKc\">https:\/\/www.youtube.com\/watch?v=kYcGLp-PfKc<\/a><\/p>\n\n\n\n<hr class=\"wp-block-separator has-alpha-channel-opacity\"\/>\n\n\n<div class=\"vlp-link-container vlp-layout-spotlight-clone wp-block-visual-link-preview-link\"><a href=\"https:\/\/www.youtube.com\/@Deepia-ls2fo\" class=\"vlp-link\" title=\"Deepia\" rel=\"nofollow\" target=\"_blank\"><\/a><div class=\"vlp-layout-zone-main\"><span class=\"vlp-block-0 vlp-link-title\">Deepia<\/span><div class=\"vlp-block-1 vlp-link-summary\">Welcome to Deepia, where I animate deep learning concepts with Manim.<\/div><div class=\"vlp-block-2 vlp-link-image\"><img decoding=\"async\" src=\"https:\/\/yt3.googleusercontent.com\/1faLulk5gQAo22JNrz5du3ZmdCE_mhjx6GHq4sSbo0UExnaDP695HEKeTyDWtfEOdjXQXr4LQg=s900-c-k-c0x00ffffff-no-rj\" style=\"max-width: 1024px; max-height: 1024px\" \/><\/div><\/div><\/div>\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"I Visualised Attention in Transformers\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_RNF0FvRjGZk\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FRNF0FvRjGZk%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/RNF0FvRjGZk\" \/><meta itemprop=\"duration\" content=\"PT13M1S\" \/><meta itemprop=\"uploadDate\" content=\"2025-06-30T16:24:54Z\" \/><\/div><div id=\"lyte_RNF0FvRjGZk\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FRNF0FvRjGZk%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">I Visualised Attention in Transformers<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/RNF0FvRjGZk\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FRNF0FvRjGZk%2F0.jpg\" alt=\"I Visualised Attention in Transformers\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"To try everything Brilliant has to offer\u2014free\u2014for a full 30 days, visit https:\/\/brilliant.org\/GalLahat\/ . You\u2019ll also get 20% off an annual premium subscription. Voice type with Peach Beta \ud83c\udf51: https:\/\/peach-voice.com This video was sponsored by Brilliant The music is created by my partner (AI) and me, feel free to use it commercially for your own projects but make sure to credit this video when you do: https:\/\/drive.google.com\/drive\/folders\/1WSKmpkQGOHb_jij4Ut2NpivTcljc7qLS?usp=sharing\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"Attention in transformers, step-by-step | Deep Learning Chapter 6\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_eMlx5fFNoYc\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FeMlx5fFNoYc%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/eMlx5fFNoYc\" \/><meta itemprop=\"duration\" content=\"PT26M10S\" \/><meta itemprop=\"uploadDate\" content=\"2024-04-07T12:53:54Z\" \/><\/div><meta itemprop=\"accessibilityFeature\" content=\"captions\" \/><div id=\"lyte_eMlx5fFNoYc\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FeMlx5fFNoYc%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">Attention in transformers, step-by-step | Deep Learning Chapter 6<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/eMlx5fFNoYc\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FeMlx5fFNoYc%2F0.jpg\" alt=\"Attention in transformers, step-by-step | Deep Learning Chapter 6\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"Demystifying attention, the key mechanism inside transformers and LLMs. Instead of sponsored ad reads, these lessons are funded directly by viewers: https:\/\/3b1b.co\/support Special thanks to these supporters: https:\/\/www.3blue1brown.com\/lessons\/attention#thanks An equally valuable form of support is to simply share the videos. Demystifying self-attention, multiple heads, and cross-attention. Instead of sponsored ad reads, these lessons are funded directly by viewers: https:\/\/3b1b.co\/support The first pass for the translated subtitles here is machine-generated and, therefore, notably imperfect. To contribute edits or fixes, visit https:\/\/www.criblate.com \u0417\u0432\u0443\u043a\u043e\u0432\u0430\u044f \u0434\u043e\u0440\u043e\u0436\u043a\u0430 \u043d\u0430 \u0440\u0443\u0441\u0441\u043a\u043e\u043c \u044f\u0437\u044b\u043a\u0435: \u0412\u043b\u0430\u0434 \u0411\u0443\u0440\u043c\u0438\u0441\u0442\u0440\u043e\u0432. ------------------ Here are a few other relevant resources Build a GPT from scratch, by Andrej Karpathy https:\/\/youtu.be\/kCc8FmEb1nY If you want a conceptual understanding of language models from the ground up, @vcubingx just started a short series of videos on the topic: https:\/\/youtu.be\/1il-s4mgNdI?si=XaVxj6bsdy3VkgEX If you&#039;re interested in the herculean task of interpreting what these large networks might actually be doing, the Transformer Circuits posts by Anthropic are great. In particular, it was only after reading one of these that I started thinking of the combination of the value and output matrices as being a combined low-rank map from the embedding space to itself, which, at least in my mind, made things much clearer than other sources. https:\/\/transformer-circuits.pub\/2021\/framework\/index.html Site with exercises related to ML programming and GPTs https:\/\/www.gptandchill.ai\/codingproblems History of language models by Brit Cruise, \u00a0@ArtOfTheProblem\u00a0 https:\/\/youtu.be\/OFS90-FX6pg An early paper on how directions in embedding spaces have meaning: https:\/\/arxiv.org\/pdf\/1301.3781.pdf ------------------ Timestamps: 0:00 - Recap on embeddings 1:39 - Motivating examples 4:29 - The attention pattern 11:08 - Masking 12:42 - Context size 13:10 - Values 15:44 - Counting parameters 18:21 - Cross-attention 19:19 - Multiple heads 22:16 - The output matrix 23:19 - Going deeper 24:54 - Ending ------------------ These animations are largely made using a custom Python library, manim. See the FAQ comments here: https:\/\/3b1b.co\/faq#manim https:\/\/github.com\/3b1b\/manim https:\/\/github.com\/ManimCommunity\/manim\/ All code for specific videos is visible here: https:\/\/github.com\/3b1b\/videos\/ The music is by Vincent Rubinetti. https:\/\/www.vincentrubinetti.com https:\/\/vincerubinetti.bandcamp.com\/album\/the-music-of-3blue1brown https:\/\/open.spotify.com\/album\/1dVyjwS8FBqXhRunaG5W5u ------------------ 3blue1brown is a channel about animating math, in all senses of the word animate. If you&#039;re reading the bottom of a video description, I&#039;m guessing you&#039;re more interested than the average viewer in lessons here. It would mean a lot to me if you chose to stay up to date on new ones, either by subscribing here on YouTube or otherwise following on whichever platform below you check most regularly. Mailing list: https:\/\/3blue1brown.substack.com Twitter: https:\/\/twitter.com\/3blue1brown Instagram: https:\/\/www.instagram.com\/3blue1brown Reddit: https:\/\/www.reddit.com\/r\/3blue1brown Facebook: https:\/\/www.facebook.com\/3blue1brown Patreon: https:\/\/patreon.com\/3blue1brown Website: https:\/\/www.3blue1brown.com\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>\n\n<div class=\"vlp-link-container vlp-layout-spotlight-clone wp-block-visual-link-preview-link\"><a href=\"https:\/\/developer.aliyun.com\/article\/180611\" class=\"vlp-link\" title=\"\u5f00\u53d1\u8005\u5165\u95e8\u5fc5\u8bfb\uff1a\u6700\u503c\u5f97\u770b\u7684\u5341\u5927\u673a\u5668\u5b66\u4e60\u516c\u5f00\u8bfe-\u963f\u91cc\u4e91\u5f00\u53d1\u8005\u793e\u533a\" rel=\"nofollow\" target=\"_blank\"><\/a><div class=\"vlp-layout-zone-main\"><span class=\"vlp-block-0 vlp-link-title\">\u5f00\u53d1\u8005\u5165\u95e8\u5fc5\u8bfb\uff1a\u6700\u503c\u5f97\u770b\u7684\u5341\u5927\u673a\u5668\u5b66\u4e60\u516c\u5f00\u8bfe-\u963f\u91cc\u4e91\u5f00\u53d1\u8005\u793e\u533a<\/span><div class=\"vlp-block-1 vlp-link-summary\">\u5728\u5f53\u4e0b\u7684\u673a\u5668\u5b66\u4e60\u70ed\u6f6e\uff0c\u4eba\u624d\u532e\u4e4f\u5341\u5206\u663e\u8457\u3002\u622a\u81f3\u76ee\u524d\uff0c\u56fd\u5185\u5f00\u8bbe\u4eba\u5de5\u667a\u80fd\uff08AI\uff09\u4e13\u4e1a\u7684\u9ad8\u6821\u4e0d\u591a\uff0c\u76f8\u5f53\u591a\u7684\u5f00\u53d1\u8005\u662f\u8de8\u754c\u5165\u95e8\uff0c\u9700\u8981\u81ea\u5b66\u5927\u91cf\u77e5\u8bc6\u5e76\u6478\u7d22\u3002\u56e0\u800c\u4f18\u8d28\u7684\u5b66\u4e60\u8d44\u6e90\u81f3\u5173\u91cd\u8981\u3002\u56e0\u6b64\uff0c\u96f7\u950b\u7f51\u641c\u96c6\u4e86\u5168\u4e16\u754c\u8303\u56f4\u5185\u6700\u53d7\u6b22\u8fce\u7684\u673a\u5668\u5b66\u4e60\u8bfe\u7a0b\uff0c\u6574\u7406\u6210\u8fd9\u4efd\u201c\u673a\u5668\u5b66\u4e60\u5341\u5927\u5165\u95e8\u516c\u5f00\u8bfe\u201d\u76d8\u70b9\uff0c\u96c6\u4e2d\u5448\u73b0\u7ed9\u5404\u4f4d\u3002<\/div><div class=\"vlp-block-2 vlp-link-image\"><img decoding=\"async\" src=\"https:\/\/img.alicdn.com\/tfs\/TB1LCE1aQ5E3KVjSZFCXXbuzXXa-200-200.png\" style=\"max-width: 1024px; max-height: 1024px\" \/><\/div><\/div><\/div>\n\n<div class=\"vlp-link-container vlp-layout-spotlight-clone wp-block-visual-link-preview-link\"><a href=\"https:\/\/medium.com\/data-science-collective\/ml-foundations-for-ai-engineers-bda353152d24#3463\" class=\"vlp-link\" title=\"ML Foundations for AI Engineers\" rel=\"nofollow\" target=\"_blank\"><\/a><div class=\"vlp-layout-zone-main\"><span class=\"vlp-block-0 vlp-link-title\">ML Foundations for AI Engineers<\/span><div class=\"vlp-block-1 vlp-link-summary\">A practical guide to teaching computers anything<\/div><div class=\"vlp-block-2 vlp-link-image\"><img decoding=\"async\" src=\"https:\/\/miro.medium.com\/v2\/resize:fit:1200\/1*AMgA4ii72Dd6Z0AFZ2Okag.png\" style=\"max-width: 1024px; max-height: 1024px\" \/><\/div><\/div><\/div>\n\n\n<p><\/p>\n\n\n\n<p>\u5168\u9762\u7684 \u6df1\u5ea6\u5b66\u4e60 \u7b14\u8bb0\uff08\u5305\u542bCV\u3001NLP\uff09- <a href=\"https:\/\/github.com\/AccumulateMore\/CV\">https:\/\/github.com\/AccumulateMore\/CV<\/a><\/p>\n\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"\u9762\u8bd5\u5fc5\u5237\uff1a\u5927\u6a21\u578b\u4e3a\u4ec0\u4e48\u6df1\u5c42\u7f51\u7edc\u9700\u8981\u6b63\u5219\u5316\u548c\u5f52\u4e00\u5316\u7ec4\u5408\uff1f\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_AmXPuhRyW3s\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FAmXPuhRyW3s%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/AmXPuhRyW3s\" \/><meta itemprop=\"duration\" content=\"PT4M9S\" \/><meta itemprop=\"uploadDate\" content=\"2025-10-08T06:30:06Z\" \/><\/div><div id=\"lyte_AmXPuhRyW3s\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FAmXPuhRyW3s%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">\u9762\u8bd5\u5fc5\u5237\uff1a\u5927\u6a21\u578b\u4e3a\u4ec0\u4e48\u6df1\u5c42\u7f51\u7edc\u9700\u8981\u6b63\u5219\u5316\u548c\u5f52\u4e00\u5316\u7ec4\u5408\uff1f<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/AmXPuhRyW3s\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FAmXPuhRyW3s%2F0.jpg\" alt=\"\u9762\u8bd5\u5fc5\u5237\uff1a\u5927\u6a21\u578b\u4e3a\u4ec0\u4e48\u6df1\u5c42\u7f51\u7edc\u9700\u8981\u6b63\u5219\u5316\u548c\u5f52\u4e00\u5316\u7ec4\u5408\uff1f\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"Github\u7b14\u8bb0\uff1ahttps:\/\/github.com\/AccumulateMore\/CV \u4e92\u76f8\u966a\u4f34\uff0c\u4e00\u8d77\u8fdb\u6b65\uff01\u6b22\u8fce\u70b9\u8d5e\u8bc4\u8bba\u50ac\u66f4\uff01 #\u6df1\u5ea6\u5b66\u4e60 #\u673a\u5668\u5b66\u4e60 #\u5927\u6a21\u578b #\u4eba\u5de5\u667a\u80fd #\u7b97\u6cd5 #\u7a0b\u5e8f\u5458 #\u9762\u8bd5 #ai #\u7f16\u7a0b\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>","protected":false},"excerpt":{"rendered":"<p>Pocket Flow \u6559\u5b78\u5c08\u6848\u300cPocketFlow-Tutorial-Video-Generator\u3002Pocket Flow \u672c\u8eab\u662f\u4e00\u500b\u6975\u7c21\u7684\u5716\u5f62\u5de5\u4f5c\u6d41\u6846\u67b6\uff0c\u6bcf\u500b node \u662f\u4e00\u500b\u6b65\u9a5f\uff0cflow \u8ca0\u8cac\u628a\u9019\u4e9b\u6b65\u9a5f\u4e32\u8d77\u4f86\uff0c\u9069\u5408\u505a\u591a\u6b65\u9a5f\u7684\u4ee3\u7406\u3001RAG\u3001\u6559\u5b78\u751f\u6210\u7b49\u61c9\u7528\u3002 \u5927\u6a21\u578b\u5fae\u8c03\u5168\u6d41\u7a0b\u8be6\u89e3 https:\/\/www.youtube.com\/watch?v=kYcGLp-PfKc \u5168\u9762\u7684 \u6df1\u5ea6\u5b66\u4e60 \u7b14\u8bb0\uff08\u5305\u542bCV\u3001NLP\uff09- https:\/\/github.com\/AccumulateMore\/CV<\/p>\n","protected":false},"author":1,"featured_media":0,"parent":0,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"googlesitekit_rrm_CAowvqSiDA:productID":"","footnotes":""},"class_list":["post-6548","page","type-page","status-publish","hentry"],"_links":{"self":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/pages\/6548","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/comments?post=6548"}],"version-history":[{"count":0,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/pages\/6548\/revisions"}],"wp:attachment":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/media?parent=6548"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}