
{"id":5251,"date":"2025-04-02T17:27:57","date_gmt":"2025-04-02T09:27:57","guid":{"rendered":"https:\/\/infernews.com\/?page_id=5251"},"modified":"2025-07-01T03:44:05","modified_gmt":"2025-06-30T19:44:05","slug":"google-%e7%9a%84-transformer-%e6%98%af%e5%a6%82%e4%bd%95%e9%81%8b%e4%bd%9c","status":"publish","type":"page","link":"https:\/\/infernews.com\/blog\/google-%e7%9a%84-transformer-%e6%98%af%e5%a6%82%e4%bd%95%e9%81%8b%e4%bd%9c\/","title":{"rendered":"Google \u7684 Transformer \u662f\u5982\u4f55\u904b\u4f5c"},"content":{"rendered":"\n<p>Google \u7684 Transformer \u6a21\u578b\u662f\u4e00\u500b\u5728\u81ea\u7136\u8a9e\u8a00\u8655\u7406\uff08NLP\uff09\u9818\u57df\u4e2d\u6975\u5177\u5f71\u97ff\u529b\u7684\u6df1\u5ea6\u5b78\u7fd2\u67b6\u69cb\uff0c\u65bc 2017 \u5e74\u7531 Google \u7814\u7a76\u5718\u968a\u63d0\u51fa\uff0c\u767c\u8868\u5728\u8ad6\u6587\u300aAttention is All You Need\u300b\u4e2d\u3002\u9019\u500b\u6a21\u578b\u6700\u521d\u8a2d\u8a08\u7528\u65bc\u8655\u7406\u5e8f\u5217\u6578\u64da\uff0c\u4f8b\u5982\u6a5f\u5668\u7ffb\u8b6f\u4efb\u52d9\uff0c\u4f46\u5176\u61c9\u7528\u5df2\u64f4\u5c55\u5230\u6587\u672c\u751f\u6210\u3001\u6458\u8981\u3001\u554f\u7b54\u7cfb\u7d71\u7b49\u9818\u57df\u3002\u4ee5\u4e0b\u5c07\u8a73\u7d30\u8aaa\u660e Transformer \u7684\u904b\u4f5c\u539f\u7406\u3002<\/p>\n\n\n\n<body>\n    <div id=\"info\">Neural Network &#8211; Multi-Layer Signal Propagation<\/div>\n    <div id=\"container\"><\/div>\n\n    <!-- Use Import Map for Three.js Modules -->\n    <script type=\"importmap\">\n        {\n            \"imports\": {\n                \"three\": \"https:\/\/unpkg.com\/three@0.161.0\/build\/three.module.js\",\n                \"three\/addons\/\": \"https:\/\/unpkg.com\/three@0.161.0\/examples\/jsm\/\"\n            }\n        }\n    <\/script>\n\n    <script type=\"module\">\n        import * as THREE from 'three';\n        import { OrbitControls } from 'three\/addons\/controls\/OrbitControls.js';\n\n        let scene, camera, renderer, controls;\n        let network = { layers: [] }; \/\/ Our network data structure\n        let neuronMeshes = [];        \/\/ Array of arrays: neuronMeshes[layerIdx][neuronIdx]\n        let connectionGroup;          \/\/ Group for connection lines\n        let signals = [];             \/\/ Array to hold active signal animations\n        const clock = new THREE.Clock();\n\n        \/\/ --- Configuration ---\n        const layerSizes = [1, 7, 6, 3]; \/\/ Input(1), Hidden, Hidden, Output layer sizes\n        const layerSpacing = 8;\n        const neuronSpacing = 3;\n        const neuronRadius = 0.5;\n        const signalSpeed = 8; \/\/ Faster signals for better visualization flow\n        const signalRadius = 0.15;\n        const fireInterval = 800; \/\/ Fire input neurons more frequently\n        const flashColor = 0xff4444; \/\/ Color neuron flashes to on hit (RED)\n        const flashDuration = 0.12; \/\/ Shorter flash\n\n        function init() {\n            \/\/ --- Basic Setup ---\n            const container = document.getElementById('container');\n            scene = new THREE.Scene();\n            camera = new THREE.PerspectiveCamera(75, window.innerWidth \/ window.innerHeight, 0.1, 1000);\n            camera.position.z = 20; \/\/ Adjust initial camera\n            camera.position.y = 7;\n\n            renderer = new THREE.WebGLRenderer({ antialias: true });\n            renderer.setSize(window.innerWidth, window.innerHeight);\n            renderer.setPixelRatio(window.devicePixelRatio);\n            renderer.setClearColor(0x111111);\n            container.appendChild(renderer.domElement);\n\n            \/\/ --- Lighting ---\n            const ambientLight = new THREE.AmbientLight(0xcccccc, 0.6);\n            scene.add(ambientLight);\n            const directionalLight = new THREE.DirectionalLight(0xffffff, 0.9);\n            directionalLight.position.set(1, 1.5, 1).normalize();\n            scene.add(directionalLight);\n\n            \/\/ --- Controls ---\n            controls = new OrbitControls(camera, renderer.domElement);\n            controls.enableDamping = true;\n            controls.dampingFactor = 0.05;\n            controls.screenSpacePanning = false;\n            controls.minDistance = 5;\n            controls.maxDistance = 70;\n\n            \/\/ --- Network Creation ---\n            createNetworkStructure(layerSizes);\n            createVisuals();\n\n            \/\/ --- Event Listeners ---\n            window.addEventListener('resize', onWindowResize, false);\n\n            \/\/ --- Start Animation ---\n            animate();\n\n            \/\/ --- Start Firing Input Neurons Periodically ---\n            \/\/ No longer random, always fire the first (only) input neuron\n            setInterval(fireInputNeuron, fireInterval);\n        }\n\n        function createNetworkStructure(sizes) {\n            network.layers = [];\n            const totalLayers = sizes.length;\n            \/\/ Use Math.max even with 1 input, in case other layers are wider\n            const networkWidth = (Math.max(...sizes) - 1) * neuronSpacing;\n\n            for (let i = 0; i < totalLayers; i++) {\n                const layer = { neurons: [] };\n                const numNeurons = sizes[i];\n                const layerDepth = (i - (totalLayers - 1) \/ 2) * layerSpacing;\n                const layerWidth = (numNeurons - 1) * neuronSpacing;\n\n                for (let j = 0; j < numNeurons; j++) {\n                    const neuron = {\n                        id: `L${i}N${j}`,\n                        layerIndex: i, \/\/ Store index directly in data\n                        neuronIndex: j, \/\/ Store index directly in data\n                        position: new THREE.Vector3(\n                            \/\/ If only one neuron, X will be 0, otherwise centered\n                            (numNeurons > 1) ? (j * neuronSpacing) - layerWidth \/ 2 : 0,\n                            0,\n                            layerDepth\n                        ),\n                        connections: []\n                    };\n\n                    if (i < totalLayers - 1) {\n                        for (let k = 0; k < sizes[i + 1]; k++) {\n                            neuron.connections.push({\n                                targetLayerIndex: i + 1,\n                                targetNeuronIndex: k,\n                                weight: Math.random() * 2 - 1 \/\/ Random weight -1 to 1\n                            });\n                        }\n                    }\n                    layer.neurons.push(neuron);\n                }\n                network.layers.push(layer);\n            }\n        }\n\n\n        function createVisuals() {\n            neuronMeshes = [];\n            if (connectionGroup) scene.remove(connectionGroup);\n\n            connectionGroup = new THREE.Group();\n            scene.add(connectionGroup);\n\n            const neuronGeometry = new THREE.SphereGeometry(neuronRadius, 16, 16);\n\n            network.layers.forEach((layer, i) => {\n                const layerNeuronMeshes = [];\n                layer.neurons.forEach((neuron, j) => {\n                    \/\/ --- Create Neuron Mesh ---\n                    let neuronBaseColor = 0x00dd00; \/\/ Input\n                    if (i > 0 && i < network.layers.length - 1) neuronBaseColor = 0x0088ff; \/\/ Hidden\n                    if (i === network.layers.length - 1) neuronBaseColor = 0xff4444; \/\/ Output\n\n                    const neuronMaterial = new THREE.MeshPhongMaterial({\n                         color: neuronBaseColor,\n                         shininess: 30\n                        });\n                    const neuronMesh = new THREE.Mesh(neuronGeometry, neuronMaterial);\n                    neuronMesh.position.copy(neuron.position);\n\n                    \/\/ Store reference data including indices and original color\n                    neuronMesh.userData = {\n                        id: neuron.id,\n                        type: 'neuron',\n                        layerIndex: i,        \/\/ <-- Store layer index\n                        neuronIndex: j,       \/\/ <-- Store neuron index\n                        originalColor: neuronBaseColor,\n                        flashEndTime: undefined\n                    };\n\n                    scene.add(neuronMesh);\n                    layerNeuronMeshes.push(neuronMesh);\n\n                    \/\/ --- Create Connection Lines ---\n                    if (neuron.connections.length > 0) {\n                        neuron.connections.forEach(conn => {\n                            const nextNeuronObject = network.layers[conn.targetLayerIndex].neurons[conn.targetNeuronIndex];\n                            const startPos = neuron.position;\n                            const endPos = nextNeuronObject.position;\n\n                            const points = [startPos, endPos];\n                            const lineGeometry = new THREE.BufferGeometry().setFromPoints(points);\n\n                            const weightAbs = Math.abs(conn.weight);\n                            const lineWeightInfluence = Math.min(weightAbs * 2, 1.0);\n                            const color = new THREE.Color();\n                            if (conn.weight > 0) {\n                                color.lerpColors(new THREE.Color(0x555555), new THREE.Color(0x8888ff), lineWeightInfluence); \/\/ Grey to Blueish\n                            } else {\n                                color.lerpColors(new THREE.Color(0x555555), new THREE.Color(0xff8888), lineWeightInfluence); \/\/ Grey to Reddish\n                            }\n                            const lineMaterial = new THREE.LineBasicMaterial({\n                                color: color,\n                                linewidth: 1, \/\/ Note: linewidth > 1 not reliable\n                                transparent: true,\n                                opacity: 0.2 + weightAbs * 0.5 \/\/ Opacity based on weight\n                            });\n                            const line = new THREE.Line(lineGeometry, lineMaterial);\n                            connectionGroup.add(line);\n                        });\n                    }\n                });\n                neuronMeshes.push(layerNeuronMeshes);\n            });\n        }\n\n        function fireNeuron(layerIndex, neuronIndex) {\n            \/\/ Basic check: Ensure the neuron exists in our network data\n            if (layerIndex >= network.layers.length || neuronIndex >= network.layers[layerIndex].neurons.length) {\n                 console.warn(`Attempted to fire non-existent neuron: L${layerIndex}N${neuronIndex}`);\n                 return;\n            }\n\n            const neuron = network.layers[layerIndex].neurons[neuronIndex];\n            \/\/ console.log(`Firing ${neuron.id}`); \/\/ Uncomment for debugging\n\n            \/\/ Check if this neuron has outgoing connections (i.e., not an output neuron)\n            if (!neuron.connections || neuron.connections.length === 0) {\n                return; \/\/ Don't fire from output layer or if no connections defined\n            }\n\n             \/\/ Check if the MESH for this neuron exists\n             if (!neuronMeshes[layerIndex] || !neuronMeshes[layerIndex][neuronIndex]) {\n                 console.error(\"Could not find source neuron MESH for firing:\", layerIndex, neuronIndex);\n                 return;\n             }\n             const sourcePosition = neuronMeshes[layerIndex][neuronIndex].position; \/\/ Use mesh position as start\n\n\n            neuron.connections.forEach(conn => {\n                \/\/ Get the actual MESH of the target neuron\n                const targetNeuronMesh = neuronMeshes[conn.targetLayerIndex]?.[conn.targetNeuronIndex];\n\n                 if (!targetNeuronMesh) {\n                    console.error(\"Could not find target neuron mesh for connection:\", conn);\n                    return; \/\/ Skip if target mesh doesn't exist\n                }\n\n                const startPos = sourcePosition.clone(); \/\/ Start from the firing neuron's mesh position\n                const endPos = targetNeuronMesh.position.clone(); \/\/ End at the target neuron's mesh position\n                const weight = conn.weight;\n\n                \/\/ --- Create Signal Mesh ---\n                const signalColor = weight > 0 ? 0xccccff : 0xffcccc; \/\/ Light blue\/red signals\n                const signalGeometry = new THREE.SphereGeometry(signalRadius, 8, 8);\n                const signalMaterial = new THREE.MeshBasicMaterial({\n                     color: signalColor,\n                     transparent: true,\n                     opacity: 0.9\n                    });\n                const signalMesh = new THREE.Mesh(signalGeometry, signalMaterial);\n                signalMesh.position.copy(startPos);\n                scene.add(signalMesh);\n\n                \/\/ --- Store Signal Animation Data ---\n                signals.push({\n                    mesh: signalMesh,\n                    start: startPos,\n                    end: endPos,\n                    weight: weight,\n                    progress: 0,\n                    totalDistance: startPos.distanceTo(endPos),\n                    targetMesh: targetNeuronMesh \/\/ Store reference to the target MESH\n                });\n            });\n        }\n\n        function updateSignals(deltaTime, currentTime) {\n            const signalsToRemove = [];\n            const neuronsToFire = []; \/\/ Collect neurons to fire after iterating\n\n            signals.forEach((signal, index) => {\n                const distanceToTravel = signalSpeed * deltaTime;\n                const progressIncrement = signal.totalDistance > 0 ? (distanceToTravel \/ signal.totalDistance) : 1;\n\n                signal.progress += progressIncrement;\n\n                if (signal.progress >= 1) {\n                    signal.progress = 1;\n                    signalsToRemove.push(index); \/\/ Mark for removal\n\n                    \/\/ --- Trigger Flash Effect ---\n                    const targetMesh = signal.targetMesh;\n                    if (targetMesh && targetMesh.material) {\n                        targetMesh.material.color.setHex(flashColor); \/\/ Use the configured flash color\n                        targetMesh.userData.flashEndTime = currentTime + flashDuration;\n\n                        \/\/ --- !! Check if target should fire !! ---\n                        const targetLayerIndex = targetMesh.userData.layerIndex;\n                        const targetNeuronIndex = targetMesh.userData.neuronIndex;\n\n                        \/\/ Only fire if it's not an output neuron and indices are valid\n                        if (targetLayerIndex !== undefined && targetNeuronIndex !== undefined &&\n                            targetLayerIndex < network.layers.length - 1) \/\/ Don't fire from the last layer\n                        {\n                           \/\/ Store to fire *after* this loop\n                           neuronsToFire.push({ layerIndex: targetLayerIndex, neuronIndex: targetNeuronIndex });\n                        }\n                    }\n\n                    \/\/ Clean up signal mesh\n                    scene.remove(signal.mesh);\n                    if (signal.mesh.geometry) signal.mesh.geometry.dispose();\n                    if (signal.mesh.material) signal.mesh.material.dispose();\n\n                } else {\n                    \/\/ Update position using lerp only if not finished\n                    signal.mesh.position.lerpVectors(signal.start, signal.end, signal.progress);\n                }\n            });\n\n            \/\/ Remove finished signals\n            for (let i = signalsToRemove.length - 1; i >= 0; i--) {\n                signals.splice(signalsToRemove[i], 1);\n            }\n\n            \/\/ Fire the neurons that were hit in this frame\n            \/\/ Use Set to avoid firing the same neuron multiple times in one frame\n            const uniqueNeuronsToFire = new Set(neuronsToFire.map(n => `L${n.layerIndex}N${n.neuronIndex}`));\n            uniqueNeuronsToFire.forEach(neuronId => {\n                const parts = neuronId.match(\/L(\\d+)N(\\d+)\/);\n                if(parts && parts.length === 3) {\n                    const layerIdx = parseInt(parts[1], 10);\n                    const neuronIdx = parseInt(parts[2], 10);\n                    fireNeuron(layerIdx, neuronIdx); \/\/ Fire the next neuron immediately\n                }\n            });\n        }\n\n        \/\/ Handles resetting flash effect\n        function updateNeuronEffects(currentTime) {\n             if (!neuronMeshes || neuronMeshes.length === 0) return;\n             neuronMeshes.forEach(layerMeshes => {\n                if (!layerMeshes) return;\n                layerMeshes.forEach(mesh => {\n                    \/\/ Check mesh, userData, flashEndTime, material, and originalColor before accessing\n                    if (mesh?.userData?.flashEndTime !== undefined && currentTime >= mesh.userData.flashEndTime) {\n                        if (mesh.material && typeof mesh.userData.originalColor === 'number') {\n                            mesh.material.color.setHex(mesh.userData.originalColor);\n                        }\n                        delete mesh.userData.flashEndTime; \/\/ Use delete to remove the property\n                    }\n                });\n            });\n        }\n\n        \/\/ Renamed function for clarity, always fires the first neuron\n        function fireInputNeuron() {\n            if (network.layers.length === 0 || network.layers[0].neurons.length === 0) return;\n            fireNeuron(0, 0); \/\/ Always fire Layer 0, Neuron 0\n        }\n\n        function onWindowResize() {\n            camera.aspect = window.innerWidth \/ window.innerHeight;\n            camera.updateProjectionMatrix();\n            renderer.setSize(window.innerWidth, window.innerHeight);\n        }\n\n        function animate() {\n            requestAnimationFrame(animate);\n            const deltaTime = clock.getDelta();\n            const currentTime = clock.elapsedTime;\n\n            controls.update(); \/\/ Update orbit controls\n\n            updateSignals(deltaTime, currentTime); \/\/ Update signals, trigger flashes, and queue next firings\n\n            updateNeuronEffects(currentTime); \/\/ Reset finished flashes\n\n            renderer.render(scene, camera);\n        }\n\n        \/\/ --- Start ---\n        init();\n\n    <\/script>\n<\/body>\n\n\n\n<h2 class=\"wp-block-heading\">\u6838\u5fc3\u6982\u5ff5\uff1a\u6ce8\u610f\u529b\u6a5f\u5236<\/h2>\n\n\n\n<p>Transformer \u7684\u6838\u5fc3\u662f <strong>\u6ce8\u610f\u529b\u6a5f\u5236\uff08Attention Mechanism\uff09<\/strong>\uff0c\u7279\u5225\u662f <strong>\u81ea\u6ce8\u610f\u529b\uff08Self-Attention\uff09<\/strong>\u3002\u8207\u50b3\u7d71\u7684\u5faa\u74b0\u795e\u7d93\u7db2\u7d61\uff08RNN\uff09\u6216\u5377\u7a4d\u795e\u7d93\u7db2\u7d61\uff08CNN\uff09\u4e0d\u540c\uff0cTransformer \u5b8c\u5168\u4f9d\u8cf4\u6ce8\u610f\u529b\u6a5f\u5236\u4f86\u6355\u6349\u5e8f\u5217\u4e2d\u4e0d\u540c\u4f4d\u7f6e\u4e4b\u9593\u7684\u4f9d\u8cf4\u95dc\u4fc2\u3002\u81ea\u6ce8\u610f\u529b\u5141\u8a31\u6a21\u578b\u5728\u8655\u7406\u67d0\u500b\u8a5e\u6642\uff0c\u52d5\u614b\u5730\u95dc\u6ce8\u5e8f\u5217\u4e2d\u7684\u5176\u4ed6\u8a5e\uff0c\u5f9e\u800c\u7406\u89e3\u4e0a\u4e0b\u6587\u3002<\/p>\n\n\n<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"I Visualised Attention in Transformers\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_RNF0FvRjGZk\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FRNF0FvRjGZk%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/RNF0FvRjGZk\" \/><meta itemprop=\"duration\" content=\"PT13M1S\" \/><meta itemprop=\"uploadDate\" content=\"2025-06-30T16:24:54Z\" \/><\/div><div id=\"lyte_RNF0FvRjGZk\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FRNF0FvRjGZk%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">I Visualised Attention in Transformers<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/RNF0FvRjGZk\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FRNF0FvRjGZk%2F0.jpg\" alt=\"I Visualised Attention in Transformers\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"To try everything Brilliant has to offer\u2014free\u2014for a full 30 days, visit https:\/\/brilliant.org\/GalLahat\/ . You\u2019ll also get 20% off an annual premium subscription. Voice type with Peach Beta \ud83c\udf51: https:\/\/peach-voice.com This video was sponsored by Brilliant The music is created by my partner (AI) and me, feel free to use it commercially for your own projects but make sure to credit this video when you do: https:\/\/drive.google.com\/drive\/folders\/1WSKmpkQGOHb_jij4Ut2NpivTcljc7qLS?usp=sharing\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption><\/figcaption><\/figure>\n\n\n<h2 class=\"wp-block-heading\">\u6574\u9ad4\u67b6\u69cb<\/h2>\n\n\n\n<p>Transformer \u7531\u5169\u500b\u4e3b\u8981\u90e8\u5206\u7d44\u6210\uff1a<strong>\u7de8\u78bc\u5668\uff08Encoder\uff09<\/strong> \u548c <strong>\u89e3\u78bc\u5668\uff08Decoder\uff09<\/strong>\u3002<\/p>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u7de8\u78bc\u5668<\/strong>\uff1a\u5c07\u8f38\u5165\u5e8f\u5217\u8f49\u63db\u70ba\u4e00\u7cfb\u5217\u9ad8\u7dad\u8868\u793a\uff0c\u6355\u6349\u8f38\u5165\u7684\u4e0a\u4e0b\u6587\u4fe1\u606f\u3002<\/li>\n\n\n\n<li><strong>\u89e3\u78bc\u5668<\/strong>\uff1a\u6839\u64da\u7de8\u78bc\u5668\u7684\u8f38\u51fa\u751f\u6210\u76ee\u6a19\u5e8f\u5217\uff0c\u4f8b\u5982\u5c07\u6e90\u8a9e\u8a00\u7ffb\u8b6f\u6210\u76ee\u6a19\u8a9e\u8a00\u3002<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\">\u7de8\u78bc\u5668\u7d50\u69cb<\/h3>\n\n\n\n<p>\u7de8\u78bc\u5668\u7531\u591a\u500b\u76f8\u540c\u7684\u5c64\uff08\u901a\u5e38\u662f 6 \u5c64\uff09\u5806\u758a\u800c\u6210\uff0c\u6bcf\u5c64\u5305\u542b\u5169\u500b\u95dc\u9375\u5b50\u5c64\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u591a\u982d\u81ea\u6ce8\u610f\u529b\u6a5f\u5236\uff08Multi-Head Self-Attention\uff09<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u81ea\u6ce8\u610f\u529b\u901a\u904e\u8a08\u7b97\u4e09\u500b\u5411\u91cf\u4f86\u5be6\u73fe\uff1a\n<ul class=\"wp-block-list\">\n<li><strong>Query\uff08\u67e5\u8a62\uff09<\/strong>\uff1a\u8868\u793a\u7576\u524d\u8a5e\u7684\u67e5\u8a62\u3002<\/li>\n\n\n\n<li><strong>Key\uff08\u9375\uff09<\/strong>\uff1a\u8868\u793a\u6240\u6709\u8a5e\u7684\u9375\uff0c\u7528\u65bc\u5339\u914d\u3002<\/li>\n\n\n\n<li><strong>Value\uff08\u503c\uff09<\/strong>\uff1a\u8868\u793a\u6240\u6709\u8a5e\u7684\u503c\uff0c\u7528\u65bc\u52a0\u6b0a\u3002<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>\u904b\u4f5c\u904e\u7a0b\uff1a\n<ol class=\"wp-block-list\">\n<li>\u5c0d\u6bcf\u500b\u8a5e\u8a08\u7b97 Query \u5411\u91cf\u3002<\/li>\n\n\n\n<li>\u5c07 Query \u8207\u6240\u6709\u8a5e\u7684 Key \u5411\u91cf\u9032\u884c\u9ede\u7a4d\uff0c\u5f97\u5230\u6ce8\u610f\u529b\u6b0a\u91cd\u3002<\/li>\n\n\n\n<li>\u7528\u9019\u4e9b\u6b0a\u91cd\u5c0d Value \u5411\u91cf\u52a0\u6b0a\u6c42\u548c\uff0c\u751f\u6210\u8a72\u8a5e\u7684\u4e0a\u4e0b\u6587\u8868\u793a\u3002<\/li>\n<\/ol>\n<\/li>\n\n\n\n<li><strong>\u591a\u982d<\/strong>\uff1a\u5c07\u81ea\u6ce8\u610f\u529b\u4e26\u884c\u57f7\u884c\u591a\u6b21\uff08\u4f8b\u5982 8 \u6b21\uff09\uff0c\u6bcf\u6b21\u4f7f\u7528\u4e0d\u540c\u7684\u7dda\u6027\u8b8a\u63db\uff0c\u6700\u5f8c\u62fc\u63a5\u7d50\u679c\uff0c\u589e\u5f37\u6a21\u578b\u6355\u6349\u591a\u65b9\u9762\u95dc\u4fc2\u7684\u80fd\u529b\u3002<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u524d\u994b\u795e\u7d93\u7db2\u7d61\uff08Feed-Forward Neural Network\uff09<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u4e00\u500b\u7c21\u55ae\u7684\u5168\u9023\u63a5\u7db2\u7d61\uff0c\u5c0d\u6bcf\u500b\u8a5e\u7684\u8868\u793a\u7368\u7acb\u9032\u884c\u8f49\u63db\uff0c\u5305\u542b\u5169\u500b\u7dda\u6027\u5c64\u548c\u4e00\u500b ReLU \u6fc0\u6d3b\u51fd\u6578\uff0c\u63d0\u5347\u6a21\u578b\u7684\u8868\u9054\u80fd\u529b\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u6bcf\u500b\u5b50\u5c64\u5f8c\u9762\u9084\u6709 <strong>\u6b98\u5dee\u9023\u63a5\uff08Residual Connection\uff09<\/strong> \u548c <strong>\u5c64\u6b78\u4e00\u5316\uff08Layer Normalization\uff09<\/strong>\uff0c\u4ee5\u7a69\u5b9a\u8a13\u7df4\u4e26\u63d0\u9ad8\u6027\u80fd\u3002\u7de8\u78bc\u5668\u6700\u7d42\u8f38\u51fa\u4e00\u7d44\u9ad8\u7dad\u5411\u91cf\uff0c\u8868\u793a\u8f38\u5165\u5e8f\u5217\u7684\u4e0a\u4e0b\u6587\u4fe1\u606f\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\">\u89e3\u78bc\u5668\u7d50\u69cb<\/h3>\n\n\n\n<p>\u89e3\u78bc\u5668\u540c\u6a23\u7531\u591a\u500b\u76f8\u540c\u5c64\u5806\u758a\u800c\u6210\uff08\u901a\u5e38\u4e5f\u662f 6 \u5c64\uff09\uff0c\u4f46\u6bcf\u5c64\u6709\u4e09\u500b\u5b50\u5c64\uff1a<\/p>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u63a9\u78bc\u591a\u982d\u81ea\u6ce8\u610f\u529b\u6a5f\u5236\uff08Masked Multi-Head Self-Attention\uff09<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u8207\u7de8\u78bc\u5668\u7684\u81ea\u6ce8\u610f\u529b\u985e\u4f3c\uff0c\u4f46\u52a0\u5165\u4e86\u201c\u63a9\u78bc\u201d\uff08Mask\uff09\uff0c\u78ba\u4fdd\u6a21\u578b\u5728\u751f\u6210\u67d0\u500b\u8a5e\u6642\u53ea\u80fd\u95dc\u6ce8\u7576\u524d\u4f4d\u7f6e\u4e4b\u524d\u7684\u8a5e\uff0c\u9632\u6b62\u201c\u770b\u5230\u672a\u4f86\u201d\u7684\u4fe1\u606f\u6d29\u6f0f\u3002<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u7de8\u78bc\u5668-\u89e3\u78bc\u5668\u6ce8\u610f\u529b\u5c64\uff08Encoder-Decoder Attention\uff09<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u9019\u4e00\u5c64\u8b93\u89e3\u78bc\u5668\u95dc\u6ce8\u7de8\u78bc\u5668\u7684\u8f38\u51fa\u3002<\/li>\n\n\n\n<li>Query \u4f86\u81ea\u89e3\u78bc\u5668\u7684\u81ea\u6ce8\u610f\u529b\u5c64\uff0cKey \u548c Value \u4f86\u81ea\u7de8\u78bc\u5668\u7684\u8f38\u51fa\uff0c\u5e6b\u52a9\u89e3\u78bc\u5668\u52d5\u614b\u9078\u64c7\u8207\u7576\u524d\u751f\u6210\u4efb\u52d9\u76f8\u95dc\u7684\u8f38\u5165\u90e8\u5206\u3002<\/li>\n<\/ul>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u524d\u994b\u795e\u7d93\u7db2\u7d61<\/strong><\/li>\n<\/ol>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u8207\u7de8\u78bc\u5668\u4e2d\u7684\u524d\u994b\u7db2\u7d61\u76f8\u540c\uff0c\u5c0d\u6bcf\u500b\u4f4d\u7f6e\u7684\u8868\u793a\u9032\u884c\u7368\u7acb\u8f49\u63db\u3002<\/li>\n<\/ul>\n\n\n\n<p>\u89e3\u78bc\u5668\u540c\u6a23\u4f7f\u7528\u6b98\u5dee\u9023\u63a5\u548c\u5c64\u6b78\u4e00\u5316\u3002\u5b83\u7684\u4efb\u52d9\u662f\u6839\u64da\u7de8\u78bc\u5668\u7684\u8f38\u51fa\u9010\u6b65\u751f\u6210\u76ee\u6a19\u5e8f\u5217\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u4f4d\u7f6e\u7de8\u78bc\uff08Positional Encoding\uff09<\/h2>\n\n\n\n<p>\u7531\u65bc Transformer \u4e0d\u542b RNN \u6216 CNN\uff0c\u7121\u6cd5\u81ea\u7136\u6355\u6349\u8a5e\u7684\u9806\u5e8f\u4fe1\u606f\uff0c\u56e0\u6b64\u5f15\u5165\u4e86 <strong>\u4f4d\u7f6e\u7de8\u78bc<\/strong>\u3002\u4f4d\u7f6e\u7de8\u78bc\u662f\u4e00\u7d44\u5411\u91cf\uff08\u901a\u5e38\u7531\u6b63\u5f26\u548c\u9918\u5f26\u51fd\u6578\u751f\u6210\uff09\uff0c\u8207\u8a5e\u5d4c\u5165\u76f8\u52a0\uff0c\u70ba\u6bcf\u500b\u8a5e\u6dfb\u52a0\u7368\u7279\u7684\u4f4d\u7f6e\u4fe1\u606f\u3002\u9019\u4f7f\u5f97\u6a21\u578b\u80fd\u5920\u7406\u89e3\u5e8f\u5217\u4e2d\u8a5e\u7684\u76f8\u5c0d\u4f4d\u7f6e\u548c\u9806\u5e8f\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u8a13\u7df4\u8207\u63a8\u7406<\/h2>\n\n\n\n<ul class=\"wp-block-list\">\n<li><strong>\u8a13\u7df4<\/strong>\uff1a\u4f7f\u7528 <strong>\u6559\u5e2b\u5f37\u5236\uff08Teacher Forcing\uff09<\/strong> \u65b9\u6cd5\uff0c\u5373\u89e3\u78bc\u5668\u7684\u8f38\u5165\u662f\u771f\u5be6\u7684\u76ee\u6a19\u5e8f\u5217\uff0c\u800c\u4e0d\u662f\u6a21\u578b\u751f\u6210\u7684\u5e8f\u5217\uff0c\u4ee5\u52a0\u901f\u6536\u6582\u3002<\/li>\n\n\n\n<li><strong>\u63a8\u7406<\/strong>\uff1a\u63a1\u7528 <strong>\u81ea\u56de\u6b78\uff08Auto-Regressive\uff09<\/strong> \u65b9\u5f0f\uff0c\u5f9e\u4e00\u500b\u958b\u59cb\u6a19\u8a18\uff08\u4f8b\u5982\u201c\u201d\uff09\u958b\u59cb\uff0c\u9010\u6b65\u751f\u6210\u6bcf\u500b\u8a5e\uff0c\u4e26\u5c07\u751f\u6210\u7684\u8a5e\u4f5c\u70ba\u4e0b\u4e00\u500b\u6642\u9593\u6b65\u7684\u8f38\u5165\uff0c\u76f4\u5230\u751f\u6210\u7d50\u675f\u6a19\u8a18\uff08\u4f8b\u5982\u201c\u201d\uff09\u3002<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\">\u904b\u4f5c\u512a\u52e2<\/h2>\n\n\n\n<ol class=\"wp-block-list\">\n<li><strong>\u4e26\u884c\u8a08\u7b97<\/strong>\uff1a\u4e0d\u50cf RNN \u6309\u9806\u5e8f\u8655\u7406\u5e8f\u5217\uff0cTransformer \u7684\u6ce8\u610f\u529b\u6a5f\u5236\u5141\u8a31\u540c\u6642\u8655\u7406\u6574\u500b\u5e8f\u5217\uff0c\u5927\u5e45\u63d0\u9ad8\u8a08\u7b97\u6548\u7387\u3002<\/li>\n\n\n\n<li><strong>\u9577\u8ddd\u96e2\u4f9d\u8cf4<\/strong>\uff1a\u81ea\u6ce8\u610f\u529b\u6a5f\u5236\u80fd\u6709\u6548\u6355\u6349\u5e8f\u5217\u4e2d\u9060\u8ddd\u96e2\u7684\u95dc\u4fc2\uff0c\u89e3\u6c7a RNN \u7684\u9577\u7a0b\u4f9d\u8cf4\u554f\u984c\u3002<\/li>\n\n\n\n<li><strong>\u9748\u6d3b\u6027<\/strong>\uff1aTransformer \u7684\u8a2d\u8a08\u4f7f\u5176\u9069\u7528\u65bc\u591a\u7a2e\u4efb\u52d9\uff0c\u50ac\u751f\u4e86 BERT\u3001GPT \u7b49\u5f8c\u7e8c\u6a21\u578b\u3002<\/li>\n<\/ol>\n\n\n\n<h2 class=\"wp-block-heading\">\u7e3d\u7d50<\/h2>\n\n\n\n<p>Google \u7684 Transformer \u901a\u904e\u7de8\u78bc\u5668\u548c\u89e3\u78bc\u5668\u7684\u5354\u4f5c\uff0c\u5229\u7528\u81ea\u6ce8\u610f\u529b\u6a5f\u5236\u548c\u524d\u994b\u795e\u7d93\u7db2\u7d61\uff0c\u5c07\u8f38\u5165\u5e8f\u5217\u8f49\u63db\u70ba\u9ad8\u7dad\u8868\u793a\u4e26\u751f\u6210\u8f38\u51fa\u5e8f\u5217\u3002\u4f4d\u7f6e\u7de8\u78bc\u89e3\u6c7a\u4e86\u9806\u5e8f\u554f\u984c\uff0c\u800c\u591a\u982d\u6ce8\u610f\u529b\u589e\u5f37\u4e86\u4e0a\u4e0b\u6587\u7406\u89e3\u80fd\u529b\u3002\u5176\u9ad8\u6548\u7684\u4e26\u884c\u8a08\u7b97\u548c\u5f37\u5927\u7684\u8868\u9054\u80fd\u529b\u4f7f\u5176\u6210\u70ba\u73fe\u4ee3 NLP \u7684\u57fa\u77f3\uff0c\u4e26\u5f71\u97ff\u4e86\u8a08\u7b97\u6a5f\u8996\u89ba\u7b49\u5176\u4ed6\u9818\u57df\u7684\u767c\u5c55\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>Google \u7684 Transformer \u6a21\u578b\u662f\u4e00\u500b\u5728\u81ea\u7136\u8a9e\u8a00\u8655\u7406\uff08NLP\uff09\u9818\u57df\u4e2d\u6975\u5177\u5f71\u97ff\u529b\u7684\u6df1\u5ea6\u5b78\u7fd2\u67b6\u69cb\uff0c\u65bc 2017 \u5e74\u7531 Google \u7814\u7a76\u5718\u968a\u63d0\u51fa\uff0c\u767c\u8868\u5728\u8ad6\u6587\u300aAttention is All You Need\u300b\u4e2d\u3002\u9019\u500b\u6a21\u578b\u6700\u521d\u8a2d\u8a08\u7528\u65bc\u8655\u7406\u5e8f\u5217\u6578\u64da\uff0c\u4f8b\u5982\u6a5f\u5668\u7ffb\u8b6f\u4efb\u52d9\uff0c\u4f46\u5176\u61c9\u7528\u5df2\u64f4\u5c55\u5230\u6587\u672c\u751f\u6210\u3001\u6458\u8981\u3001\u554f\u7b54\u7cfb\u7d71\u7b49\u9818\u57df\u3002\u4ee5\u4e0b\u5c07\u8a73\u7d30\u8aaa\u660e Transformer \u7684\u904b\u4f5c\u539f\u7406\u3002 Neural Network &#8211; Multi-Layer Signal Propagation \u6838\u5fc3\u6982\u5ff5\uff1a\u6ce8\u610f\u529b\u6a5f\u5236 Transformer \u7684\u6838\u5fc3\u662f \u6ce8\u610f\u529b\u6a5f\u5236\uff08Attention Mechanism\uff09\uff0c\u7279\u5225\u662f \u81ea\u6ce8\u610f\u529b\uff08Self-Attention\uff09\u3002\u8207\u50b3\u7d71\u7684\u5faa\u74b0\u795e\u7d93\u7db2\u7d61\uff08RNN\uff09\u6216\u5377\u7a4d\u795e\u7d93\u7db2\u7d61\uff08CNN\uff09\u4e0d\u540c\uff0cTransformer \u5b8c\u5168\u4f9d\u8cf4\u6ce8\u610f\u529b\u6a5f\u5236\u4f86\u6355\u6349\u5e8f\u5217\u4e2d\u4e0d\u540c\u4f4d\u7f6e\u4e4b\u9593\u7684\u4f9d\u8cf4\u95dc\u4fc2\u3002\u81ea\u6ce8\u610f\u529b\u5141\u8a31\u6a21\u578b\u5728\u8655\u7406\u67d0\u500b\u8a5e\u6642\uff0c\u52d5\u614b\u5730\u95dc\u6ce8\u5e8f\u5217\u4e2d\u7684\u5176\u4ed6\u8a5e\uff0c\u5f9e\u800c\u7406\u89e3\u4e0a\u4e0b\u6587\u3002 \u6574\u9ad4\u67b6\u69cb Transformer \u7531\u5169\u500b\u4e3b\u8981\u90e8\u5206\u7d44\u6210\uff1a\u7de8\u78bc\u5668\uff08Encoder\uff09 \u548c \u89e3\u78bc\u5668\uff08Decoder\uff09\u3002 \u7de8\u78bc\u5668\u7d50\u69cb \u7de8\u78bc\u5668\u7531\u591a\u500b\u76f8\u540c\u7684\u5c64\uff08\u901a\u5e38\u662f 6 \u5c64\uff09\u5806\u758a\u800c\u6210\uff0c\u6bcf\u5c64\u5305\u542b\u5169\u500b\u95dc\u9375\u5b50\u5c64\uff1a \u6bcf\u500b\u5b50\u5c64\u5f8c\u9762\u9084\u6709 \u6b98\u5dee\u9023\u63a5\uff08Residual Connection\uff09 \u548c \u5c64\u6b78\u4e00\u5316\uff08Layer Normalization\uff09\uff0c\u4ee5\u7a69\u5b9a\u8a13\u7df4\u4e26\u63d0\u9ad8\u6027\u80fd\u3002\u7de8\u78bc\u5668\u6700\u7d42\u8f38\u51fa\u4e00\u7d44\u9ad8\u7dad\u5411\u91cf\uff0c\u8868\u793a\u8f38\u5165\u5e8f\u5217\u7684\u4e0a\u4e0b\u6587\u4fe1\u606f\u3002 \u89e3\u78bc\u5668\u7d50\u69cb \u89e3\u78bc\u5668\u540c\u6a23\u7531\u591a\u500b\u76f8\u540c\u5c64\u5806\u758a\u800c\u6210\uff08\u901a\u5e38\u4e5f\u662f 6 \u5c64\uff09\uff0c\u4f46\u6bcf\u5c64\u6709\u4e09\u500b\u5b50\u5c64\uff1a \u89e3\u78bc\u5668\u540c\u6a23\u4f7f\u7528\u6b98\u5dee\u9023\u63a5\u548c\u5c64\u6b78\u4e00\u5316\u3002\u5b83\u7684\u4efb\u52d9\u662f\u6839\u64da\u7de8\u78bc\u5668\u7684\u8f38\u51fa\u9010\u6b65\u751f\u6210\u76ee\u6a19\u5e8f\u5217\u3002 \u4f4d\u7f6e\u7de8\u78bc\uff08Positional Encoding\uff09 \u7531\u65bc Transformer \u4e0d\u542b RNN \u6216 CNN\uff0c\u7121\u6cd5\u81ea\u7136\u6355\u6349\u8a5e\u7684\u9806\u5e8f\u4fe1\u606f\uff0c\u56e0\u6b64\u5f15\u5165\u4e86 [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"parent":0,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"googlesitekit_rrm_CAowvqSiDA:productID":"","footnotes":""},"class_list":["post-5251","page","type-page","status-publish","hentry"],"_links":{"self":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/pages\/5251","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/comments?post=5251"}],"version-history":[{"count":0,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/pages\/5251\/revisions"}],"wp:attachment":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/media?parent=5251"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}