
{"id":3088,"date":"2024-06-09T00:48:42","date_gmt":"2024-06-08T16:48:42","guid":{"rendered":"https:\/\/infernews.com\/?p=3088"},"modified":"2024-06-09T00:53:41","modified_gmt":"2024-06-08T16:53:41","slug":"multimodal-rag","status":"publish","type":"post","link":"https:\/\/infernews.com\/blog\/multimodal-rag\/","title":{"rendered":"Multimodal RAG \u591a\u6a21\u614b\u5411\u91cf\u8cc7\u6599\u5eab ChromaDB"},"content":{"rendered":"<figure class=\"wp-block-embed-youtube wp-block-embed is-type-video is-provider-youtube wp-embed-aspect-16-9 wp-has-aspect-ratio\"><div class=\"lyte-wrapper\" title=\"Multimodal RAG!? - Pushing the Boundaries of AI\" style=\"width:853px;max-width:100%;margin:5px auto;\"><div class=\"lyMe\" id=\"WYL_OPGmeFmFyq0\" itemprop=\"video\" itemscope itemtype=\"https:\/\/schema.org\/VideoObject\"><div><meta itemprop=\"thumbnailUrl\" content=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FOPGmeFmFyq0%2Fhqdefault.jpg\" \/><meta itemprop=\"embedURL\" content=\"https:\/\/www.youtube.com\/embed\/OPGmeFmFyq0\" \/><meta itemprop=\"duration\" content=\"PT22M20S\" \/><meta itemprop=\"uploadDate\" content=\"2024-06-07T13:01:09Z\" \/><\/div><div id=\"lyte_OPGmeFmFyq0\" data-src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FOPGmeFmFyq0%2Fhqdefault.jpg\" class=\"pL\"><div class=\"tC\"><div class=\"tT\" itemprop=\"name\">Multimodal RAG!? - Pushing the Boundaries of AI<\/div><\/div><div class=\"play\"><\/div><div class=\"ctrl\"><div class=\"Lctrl\"><\/div><div class=\"Rctrl\"><\/div><\/div><\/div><noscript><a href=\"https:\/\/youtu.be\/OPGmeFmFyq0\" rel=\"nofollow\"><img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/infernews.com\/blog\/wp-content\/plugins\/wp-youtube-lyte\/lyteCache.php?origThumbUrl=https%3A%2F%2Fi.ytimg.com%2Fvi%2FOPGmeFmFyq0%2F0.jpg\" alt=\"Multimodal RAG!? - Pushing the Boundaries of AI\" width=\"853\" height=\"460\" \/><br \/>Watch this video on YouTube<\/a><\/noscript><meta itemprop=\"description\" content=\"If you thought that RAG was only possible with text based documents like boring pdfs, think again! In this video we go over all you need to know about setting up a RAG based application that allows querying and context over photos! We take advantage of lesser known CLIP models and new ChromaDB integrations to walk through an end to end example of setting up your own multimodal retrieval augmented generation flow. Code Available Here: https:\/\/github.com\/ALucek\/multimodal-rag Resources: ChromaDB: https:\/\/www.trychroma.com\/ CLIP OpenAI Blog Post: https:\/\/openai.com\/index\/clip\/ OpenCLIP: https:\/\/github.com\/mlfoundations\/open_clip Fashionpedia Dataset: https:\/\/huggingface.co\/datasets\/detection-datasets\/fashionpedia LangChain multimodal prompts: https:\/\/python.langchain.com\/v0.2\/docs\/how_to\/multimodal_prompts\/ LAION: https:\/\/laion.ai\/blog\/laion-5b\/ Chapters: 00:00 - Intro &amp; Overview 00:54 - Context: What are CLIP Models? 03:21 - MultiModal RAG Diagram 03:40 - Choosing a Dataset: Fashionpedia 05:14 - Loading &amp; Preparing the Dataset 06:55 - Context: OpenCLIP Embedding Models 08:09 - Setting Up Your VectorDB 09:01 - Adding Images to Your VectorDB 10:06 - Test: Querying the VectorDB 12:23 - Context: Choosing a Vision Model 13:39 - Setting Up RAG Prompt, Model, &amp; Chain 16:01 - Formatting Pictures &amp; Prompt for Vision Models 18:47 - Putting Everything Together 19:48 - Executing the Multimodal RAG Flow! 21:34 - Conclusion &amp; Closing Thoughts #multimodal #artificialintelligence #RAG\"><\/div><\/div><div class=\"lL\" style=\"max-width:100%;width:853px;margin:5px auto;\"><\/div><figcaption>\u82f1\u8a9e\u6559\u5b78<\/figcaption><\/figure>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"googlesitekit_rrm_CAowvqSiDA:productID":"","footnotes":""},"categories":[27],"tags":[],"class_list":["post-3088","post","type-post","status-publish","format-standard","hentry","category-paper"],"_links":{"self":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/posts\/3088","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/comments?post=3088"}],"version-history":[{"count":0,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/posts\/3088\/revisions"}],"wp:attachment":[{"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/media?parent=3088"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/categories?post=3088"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/infernews.com\/blog\/wp-json\/wp\/v2\/tags?post=3088"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}