diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index e8b730a42..b703e3893 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -100,6 +100,7 @@ nav:
               - Quickstart: reranking/index.md
               - Cohere Reranker: reranking/cohere.md
               - Linear Combination Reranker: reranking/linear_combination.md
+              - Reciprocal Rank Fusion Reranker: reranking/rrf.md
               - Cross Encoder Reranker: reranking/cross_encoder.md
               - ColBERT Reranker: reranking/colbert.md
               - Jina Reranker: reranking/jina.md
@@ -185,6 +186,7 @@ nav:
           - Quickstart: reranking/index.md
           - Cohere Reranker: reranking/cohere.md
           - Linear Combination Reranker: reranking/linear_combination.md
+          - Reciprocal Rank Fusion Reranker: reranking/rrf.md
           - Cross Encoder Reranker: reranking/cross_encoder.md
           - ColBERT Reranker: reranking/colbert.md
           - Jina Reranker: reranking/jina.md
diff --git a/docs/src/notebooks/lancedb_reranking.ipynb b/docs/src/notebooks/lancedb_reranking.ipynb
index 14adf8af3..0c9346ced 100644
--- a/docs/src/notebooks/lancedb_reranking.ipynb
+++ b/docs/src/notebooks/lancedb_reranking.ipynb
@@ -13,42 +13,33 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
+      "execution_count": 16,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "6gUUIxGP0n1Z",
-        "outputId": "96e24cff-abfa-46dd-ada5-28b6c15b4f47"
+        "outputId": "0319735d-5986-470b-ad7a-3e6a9a4032f6"
       },
       "outputs": [
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m20.9/20.9 MB\u001b[0m \u001b[31m19.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m227.1/227.1 kB\u001b[0m \u001b[31m16.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.5/4.5 MB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.0/40.0 MB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.8/173.8 kB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m25.5/25.5 MB\u001b[0m \u001b[31m54.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.2/139.2 kB\u001b[0m \u001b[31m17.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m61.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m73.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.7/82.7 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m177.4/177.4 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.2/139.2 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m16.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.6/75.6 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.4/12.4 MB\u001b[0m \u001b[31m51.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.7/82.7 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
             "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m11.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 kB\u001b[0m \u001b[31m12.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.3/21.3 MB\u001b[0m \u001b[31m75.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-            "cudf-cu12 24.4.1 requires pyarrow<15.0.0a0,>=14.0.1, but you have pyarrow 13.0.0 which is incompatible.\u001b[0m\u001b[31m\n",
-            "\u001b[0m"
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25h"
           ]
         }
       ],
       "source": [
-        "!pip install lancedb sentence-transformers tantivy pyarrow==13.0.0 cohere -q"
+        "!pip install lancedb sentence-transformers cohere tantivy pyarrow==13.0.0 -q"
       ]
     },
     {
@@ -113,23 +104,23 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "f_qnH-Dfhi9Z",
-        "outputId": "9d41e17e-c994-473d-cf17-93568e025252"
+        "outputId": "1e22e1b1-a821-4ccb-ff30-1b2d6f8b824e"
       },
       "outputs": [
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
-            "--2024-06-26 13:31:34--  https://raw.githubusercontent.com/AyushExel/assets/main/data_qa.csv\n",
+            "--2024-07-24 14:22:47--  https://raw.githubusercontent.com/AyushExel/assets/main/data_qa.csv\n",
             "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n",
             "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n",
             "HTTP request sent, awaiting response... 200 OK\n",
             "Length: 680439 (664K) [text/plain]\n",
             "Saving to: ‘data_qa.csv’\n",
             "\n",
-            "data_qa.csv         100%[===================>] 664.49K  --.-KB/s    in 0.009s  \n",
+            "data_qa.csv         100%[===================>] 664.49K  --.-KB/s    in 0.03s   \n",
             "\n",
-            "2024-06-26 13:31:34 (71.7 MB/s) - ‘data_qa.csv’ saved [680439/680439]\n",
+            "2024-07-24 14:22:48 (19.9 MB/s) - ‘data_qa.csv’ saved [680439/680439]\n",
             "\n"
           ]
         }
@@ -160,19 +151,57 @@
           "height": 580
         },
         "id": "4Bp9Fdhz7QsM",
-        "outputId": "4cb6b384-7991-4911-e5fd-7fbd42487916"
+        "outputId": "fdcbc090-d526-4dcb-98a2-c0d8090f295d"
       },
       "outputs": [
         {
+          "output_type": "execute_result",
           "data": {
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "summary": "{\n  \"name\": \"data\",\n  \"rows\": 220,\n  \"fields\": [\n    {\n      \"column\": \"Unnamed: 0\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 63,\n        \"min\": 0,\n        \"max\": 219,\n        \"num_unique_values\": 220,\n        \"samples\": [\n          132,\n          148,\n          93\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"query\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 220,\n        \"samples\": [\n          \"What type of examination did scholars perform on ChatGPT, and when was the resulting scholarly paper published?\",\n          \"How do the performance capabilities of the different models compare in evaluating tasks associated with logical reasoning and reading comprehension, specifically noted in tests like LSAT and SAT?\",\n          \"What steps are recommended for users to ensure the responsible use of AI models like Llama 2 in projects or commercial applications?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"context\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 110,\n        \"samples\": [\n          \"Dialogue Turn Baseline + GAtt\\n2 100% 100%\\n4 10% 100%\\n6 0% 100%\\n20 0% 100%\\nTable30: GAttresults. Llama 2-Chat withGAttisabletorefertoattributes100%ofthetime,forupto20\\nturns from our human evaluation. We limited the evaluated attributes to public figures and hobbies.\\nTheattentionnowspansbeyond20turns. Wetestedthemodelabilitytorememberthesystemarguments\\ntroughahumanevaluation. Thearguments(e.g. hobbies,persona)aredefinedduringthefirstmessage,and\\nthen from turn 2 to 20. We explicitly asked the model to refer to them (e.g. \\u201cWhat is your favorite hobby?\\u201d,\\n\\u201cWhatisyourname?\\u201d),tomeasurethemulti-turnmemoryabilityof Llama 2-Chat . Wereporttheresults\\ninTable30. EquippedwithGAtt, Llama 2-Chat maintains100%accuracy,alwaysreferringtothedefined\\nattribute,andso,upto20turns(wedidnotextendthehumanevaluationmore,andalltheexampleshad\\nlessthan4048tokensintotalovertheturns). Asacomparison, Llama 2-Chat withoutGAttcannotanymore\\nrefer to the attributes after only few turns: from 100% at turn t+1, to 10% at turn t+3 and then 0%.\\nGAttZero-shotGeneralisation. Wetriedatinferencetimetosetconstrainnotpresentinthetrainingof\\nGAtt. For instance, \\u201canswer in one sentence only\\u201d, for which the model remained consistent, as illustrated in\\nFigure 28.\\nWe applied first GAtt to Llama 1 , which was pretrained with a context length of 2048 tokens and then\\nfine-tuned with 4096 max length. We tested if GAtt works beyond 2048 tokens, and the model arguably\\nmanaged to understand attributes beyond this window. This promising result indicates that GAtt could be\\nadapted as an efficient technique for long context attention.\\nA.3.6 How Far Can Model-Based Evaluation Go?\\nTo measure the robustness of our reward model, we collected a test set of prompts for both helpfulness and\\nsafety,andaskedannotatorstojudgequalityoftheanswersbasedona7pointLikert-scale(thehigherthe\\nbetter)usingtriplereviews. AsillustratedinFigure29(inAppendix),weobservethatourrewardmodels\\noverallarewellcalibratedwithhumanpreference. Notethatthisenablesustousetherewardasapoint-wise\\nmetric, despite being trained with a Pairwise Ranking Loss.\\n0.0% 2.0% 4.0% 6.0% 8.0%\\nDensity0.00.20.40.60.81.0Reward Model ScoreNo Margin\\n0.0% 2.0% 4.0% 6.0% 8.0%\\nDensity0.00.20.40.60.81.0\\nMargin Small\\n0.0% 2.0% 4.0% 6.0% 8.0%\\nDensity0.00.20.40.60.81.0\\nMargin Large\\nFigure 27: Reward model score distribution shift caused by incorporating preference rating based margin\\ninrankingloss. Withthemarginterm, weobserveabinary splitpatterninrewarddistribution, especially\\nwith a larger margin.\\n54\",\n          \"Model Size CodeCommonsense\\nReasoningWorld\\nKnowledgeReading\\nComprehensionMath MMLU BBH AGI Eval\\nMPT7B 20.5 57.4 41.0 57.5 4.9 26.8 31.0 23.5\\n30B 28.9 64.9 50.0 64.7 9.1 46.9 38.0 33.8\\nFalcon7B 5.6 56.1 42.8 36.0 4.6 26.2 28.0 21.2\\n40B 15.2 69.2 56.7 65.7 12.6 55.4 37.1 37.0\\nLlama 17B 14.1 60.8 46.2 58.5 6.95 35.1 30.3 23.9\\n13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\\nLlama 27B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\\n70B37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\\nTable3: Overallperformanceongroupedacademicbenchmarkscomparedtoopen-sourcebasemodels.\\n\\u2022Popular Aggregated Benchmarks . We report the overall results for MMLU (5 shot) (Hendrycks\\net al., 2020), Big Bench Hard (BBH) (3 shot) (Suzgun et al., 2022), and AGI Eval (3\\u20135 shot) (Zhong\\net al., 2023). For AGI Eval, we only evaluate on the English tasks and report the average.\\nAs shown in Table 3, Llama 2 models outperform Llama 1 models. In particular, Llama 2 70B improves the\\nresultsonMMLUandBBHby \\u22485and\\u22488points,respectively,comparedto Llama 1 65B.Llama 2 7Band30B\\nmodelsoutperformMPTmodelsofthecorrespondingsizeonallcategoriesbesidescodebenchmarks. Forthe\\nFalcon models, Llama 2 7B and 34B outperform Falcon 7B and 40B models on all categories of benchmarks.\\nAdditionally, Llama 2 70B model outperforms all open-source models.\\nIn addition to open-source models, we also compare Llama 2 70B results to closed-source models. As shown\\nin Table 4, Llama 2 70B is close to GPT-3.5 (OpenAI, 2023) on MMLU and GSM8K, but there is a significant\\ngaponcodingbenchmarks. Llama 2 70BresultsareonparorbetterthanPaLM(540B)(Chowdheryetal.,\\n2022)onalmostallbenchmarks. Thereisstillalargegapinperformancebetween Llama 2 70BandGPT-4\\nand PaLM-2-L.\\nWe also analysed the potential data contamination and share the details in Section A.6.\",\n          \"Figure 1: Helpfulness human evaluation results for Llama\\n2-Chatcomparedtootheropen-sourceandclosed-source\\nmodels. Human raters compared model generations on ~4k\\npromptsconsistingofbothsingleandmulti-turnprompts.\\nThe95%confidenceintervalsforthisevaluationarebetween\\n1%and2%. MoredetailsinSection3.4.2. Whilereviewing\\nthese results, it is important to note that human evaluations\\ncanbenoisyduetolimitationsofthepromptset,subjectivity\\nof the review guidelines, subjectivity of individual raters,\\nand the inherent difficulty of comparing generations.\\nFigure 2: Win-rate % for helpfulness and\\nsafety between commercial-licensed base-\\nlines and Llama 2-Chat , according to GPT-\\n4. Tocomplementthehumanevaluation,we\\nused a more capable model, not subject to\\nourownguidance. Greenareaindicatesour\\nmodelisbetteraccordingtoGPT-4. Toremove\\nties, we used win/ (win+loss). The orders in\\nwhichthemodelresponsesarepresentedto\\nGPT-4arerandomlyswappedtoalleviatebias.\\n1 Introduction\\nLarge Language Models (LLMs) have shown great promise as highly capable AI assistants that excel in\\ncomplex reasoning tasks requiring expert knowledge across a wide range of fields, including in specialized\\ndomains such as programming and creative writing. They enable interaction with humans through intuitive\\nchat interfaces, which has led to rapid and widespread adoption among the general public.\\nThecapabilitiesofLLMsareremarkableconsideringtheseeminglystraightforwardnatureofthetraining\\nmethodology. Auto-regressivetransformersarepretrainedonanextensivecorpusofself-superviseddata,\\nfollowed by alignment with human preferences via techniques such as Reinforcement Learning with Human\\nFeedback(RLHF).Althoughthetrainingmethodologyissimple,highcomputationalrequirementshave\\nlimited the development of LLMs to a few players. There have been public releases of pretrained LLMs\\n(such as BLOOM (Scao et al., 2022), LLaMa-1 (Touvron et al., 2023), and Falcon (Penedo et al., 2023)) that\\nmatch the performance of closed pretrained competitors like GPT-3 (Brown et al., 2020) and Chinchilla\\n(Hoffmann et al., 2022), but none of these models are suitable substitutes for closed \\u201cproduct\\u201d LLMs, such\\nasChatGPT,BARD,andClaude. TheseclosedproductLLMsareheavilyfine-tunedtoalignwithhuman\\npreferences, which greatly enhances their usability and safety. This step can require significant costs in\\ncomputeandhumanannotation,andisoftennottransparentoreasilyreproducible,limitingprogresswithin\\nthe community to advance AI alignment research.\\nIn this work, we develop and release Llama 2, a family of pretrained and fine-tuned LLMs, Llama 2 and\\nLlama 2-Chat , at scales up to 70B parameters. On the series of helpfulness and safety benchmarks we tested,\\nLlama 2-Chat models generally perform better than existing open-source models. They also appear to\\nbe on par with some of the closed-source models, at least on the human evaluations we performed (see\\nFigures1and3). Wehavetakenmeasurestoincreasethesafetyofthesemodels,usingsafety-specificdata\\nannotation and tuning, as well as conducting red-teaming and employing iterative evaluations. Additionally,\\nthispapercontributesathoroughdescriptionofourfine-tuningmethodologyandapproachtoimproving\\nLLM safety. We hope that this openness will enable the community to reproduce fine-tuned LLMs and\\ncontinue to improve the safety of those models, paving the way for more responsible development of LLMs.\\nWealsosharenovelobservationswemadeduringthedevelopmentof Llama 2 andLlama 2-Chat ,suchas\\nthe emergence of tool usage and temporal organization of knowledge.\\n3\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"answer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 220,\n        \"samples\": [\n          \"Scholars performed a diagnostic analysis to investigate the AI ethics associated with ChatGPT. Their findings were compiled into a research paper that became accessible as a preprint on arXiv in January 2023.\",\n          \"The MPT 30B model demonstrates considerable proficiency in logical reasoning and reading comprehension tasks, scoring highly on LSAT-LR, LSAT-RC, and SAT-en tests compared to its peers, such as Falcon 40B and Llama 17B. This is indicative of its advanced analytical and comprehension abilities. Conversely, while Falcon 40B shows strengths in LSAT-LR with a score second only to MPT 30B, it trails in SAT-en performance. This variability underscores the diverse capabilities of models based on their structural design and training paradigms.\",\n          \"Users intending to deploy models like Llama 2 are advised to strictly adhere to guidelines laid out in the Responsible Use Guide. This includes employing enhanced safety measures at both the input and output stages of model interaction, as well as carefully tuning the model according to specific use-case requirements to prevent any potential misuse. Additionally, users must comply with the terms set in the Acceptable Use Policy, ensuring their applications do not contravene applicable laws, regulations, and ethical standards. Leveraging provided code examples can further assist developers in replicating the necessary safety protocols and maintaining ethical integrity in their applications.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}",
-              "type": "dataframe",
-              "variable_name": "data"
-            },
+            "text/plain": [
+              "     Unnamed: 0                                              query  \\\n",
+              "0             0  How does the performance of Llama 2-Chat model...   \n",
+              "1             1  What benefits does the enhancement and safety ...   \n",
+              "2             2  How does one ensure the reliability and robust...   \n",
+              "3             3  What methodologies are employed to align machi...   \n",
+              "4             4  What are some of the primary insights gained f...   \n",
+              "..          ...                                                ...   \n",
+              "215         215  How are the terms 'clean', 'not clean', 'dirty...   \n",
+              "216         216  How does the size of the model influence the a...   \n",
+              "217         217  What impact does the model contamination have ...   \n",
+              "218         218  What are the different sizes and types availab...   \n",
+              "219         219  Could you discuss the sustainability measures ...   \n",
+              "\n",
+              "                                               context  \\\n",
+              "0    Llama 2 : Open Foundation and Fine-Tuned Chat ...   \n",
+              "1    Llama 2 : Open Foundation and Fine-Tuned Chat ...   \n",
+              "2    Contents\\n1 Introduction 3\\n2 Pretraining 5\\n2...   \n",
+              "3    Contents\\n1 Introduction 3\\n2 Pretraining 5\\n2...   \n",
+              "4    . . . . . . . . 23\\n4.3 Red Teaming . . . . . ...   \n",
+              "..                                                 ...   \n",
+              "215  Giventhe\\nembarrassinglyparallelnatureofthetas...   \n",
+              "216  Dataset Model Subset Type Avg. Contam. % n ¯X ...   \n",
+              "217  Dataset Model Subset Type Avg. Contam. % n ¯X ...   \n",
+              "218  A.7 Model Card\\nTable 52 presents a model card...   \n",
+              "219  A.7 Model Card\\nTable 52 presents a model card...   \n",
+              "\n",
+              "                                                answer  \n",
+              "0    Llama 2-Chat models have shown to exceed the p...  \n",
+              "1    The safety and enhancement measures implemente...  \n",
+              "2    In the initial steps of model development, the...  \n",
+              "3    Machine learning models can be aligned with de...  \n",
+              "4    The key insights gained from evaluating platfo...  \n",
+              "..                                                 ...  \n",
+              "215  In the discussed dataset analysis, samples are...  \n",
+              "216  The size of the model significantly influences...  \n",
+              "217  Model contamination affects various contaminat...  \n",
+              "218  Llama 2 is available in three distinct paramet...  \n",
+              "219  Throughout the training of Llama 2, which invo...  \n",
+              "\n",
+              "[220 rows x 4 columns]"
+            ],
             "text/html": [
               "\n",
-              "  <div id=\"df-5416cb69-2ef7-4c8e-b9d7-d0e3f9a5b07c\" class=\"colab-df-container\">\n",
+              "  <div id=\"df-7f0cc1a4-3f03-452b-a274-5569309539c0\" class=\"colab-df-container\">\n",
               "    <div>\n",
               "<style scoped>\n",
               "    .dataframe tbody tr th:only-of-type {\n",
@@ -282,7 +311,7 @@
               "    <div class=\"colab-df-buttons\">\n",
               "\n",
               "  <div class=\"colab-df-container\">\n",
-              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5416cb69-2ef7-4c8e-b9d7-d0e3f9a5b07c')\"\n",
+              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7f0cc1a4-3f03-452b-a274-5569309539c0')\"\n",
               "            title=\"Convert this dataframe to an interactive table.\"\n",
               "            style=\"display:none;\">\n",
               "\n",
@@ -334,12 +363,12 @@
               "\n",
               "    <script>\n",
               "      const buttonEl =\n",
-              "        document.querySelector('#df-5416cb69-2ef7-4c8e-b9d7-d0e3f9a5b07c button.colab-df-convert');\n",
+              "        document.querySelector('#df-7f0cc1a4-3f03-452b-a274-5569309539c0 button.colab-df-convert');\n",
               "      buttonEl.style.display =\n",
               "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
               "      async function convertToInteractive(key) {\n",
-              "        const element = document.querySelector('#df-5416cb69-2ef7-4c8e-b9d7-d0e3f9a5b07c');\n",
+              "        const element = document.querySelector('#df-7f0cc1a4-3f03-452b-a274-5569309539c0');\n",
               "        const dataTable =\n",
               "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
               "                                                    [key], {});\n",
@@ -359,8 +388,8 @@
               "  </div>\n",
               "\n",
               "\n",
-              "<div id=\"df-19164947-38ec-4b89-bcac-4c485ea11f8d\">\n",
-              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-19164947-38ec-4b89-bcac-4c485ea11f8d')\"\n",
+              "<div id=\"df-dfb13a8d-ae02-4de8-bb5e-8000c749f494\">\n",
+              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-dfb13a8d-ae02-4de8-bb5e-8000c749f494')\"\n",
               "            title=\"Suggest charts\"\n",
               "            style=\"display:none;\">\n",
               "\n",
@@ -479,14 +508,14 @@
               "    }\n",
               "    (() => {\n",
               "      let quickchartButtonEl =\n",
-              "        document.querySelector('#df-19164947-38ec-4b89-bcac-4c485ea11f8d button');\n",
+              "        document.querySelector('#df-dfb13a8d-ae02-4de8-bb5e-8000c749f494 button');\n",
               "      quickchartButtonEl.style.display =\n",
               "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "    })();\n",
               "  </script>\n",
               "</div>\n",
               "\n",
-              "  <div id=\"id_64f81a80-8121-47e6-9fad-d9652b9cd6db\">\n",
+              "  <div id=\"id_9ea37551-f803-4082-aeab-8b7746977268\">\n",
               "    <style>\n",
               "      .colab-df-generate {\n",
               "        background-color: #E8F0FE;\n",
@@ -530,7 +559,7 @@
               "    <script>\n",
               "      (() => {\n",
               "      const buttonEl =\n",
-              "        document.querySelector('#id_64f81a80-8121-47e6-9fad-d9652b9cd6db button.colab-df-generate');\n",
+              "        document.querySelector('#id_9ea37551-f803-4082-aeab-8b7746977268 button.colab-df-generate');\n",
               "      buttonEl.style.display =\n",
               "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
               "\n",
@@ -544,52 +573,14 @@
               "    </div>\n",
               "  </div>\n"
             ],
-            "text/plain": [
-              "     Unnamed: 0                                              query  \\\n",
-              "0             0  How does the performance of Llama 2-Chat model...   \n",
-              "1             1  What benefits does the enhancement and safety ...   \n",
-              "2             2  How does one ensure the reliability and robust...   \n",
-              "3             3  What methodologies are employed to align machi...   \n",
-              "4             4  What are some of the primary insights gained f...   \n",
-              "..          ...                                                ...   \n",
-              "215         215  How are the terms 'clean', 'not clean', 'dirty...   \n",
-              "216         216  How does the size of the model influence the a...   \n",
-              "217         217  What impact does the model contamination have ...   \n",
-              "218         218  What are the different sizes and types availab...   \n",
-              "219         219  Could you discuss the sustainability measures ...   \n",
-              "\n",
-              "                                               context  \\\n",
-              "0    Llama 2 : Open Foundation and Fine-Tuned Chat ...   \n",
-              "1    Llama 2 : Open Foundation and Fine-Tuned Chat ...   \n",
-              "2    Contents\\n1 Introduction 3\\n2 Pretraining 5\\n2...   \n",
-              "3    Contents\\n1 Introduction 3\\n2 Pretraining 5\\n2...   \n",
-              "4    . . . . . . . . 23\\n4.3 Red Teaming . . . . . ...   \n",
-              "..                                                 ...   \n",
-              "215  Giventhe\\nembarrassinglyparallelnatureofthetas...   \n",
-              "216  Dataset Model Subset Type Avg. Contam. % n ¯X ...   \n",
-              "217  Dataset Model Subset Type Avg. Contam. % n ¯X ...   \n",
-              "218  A.7 Model Card\\nTable 52 presents a model card...   \n",
-              "219  A.7 Model Card\\nTable 52 presents a model card...   \n",
-              "\n",
-              "                                                answer  \n",
-              "0    Llama 2-Chat models have shown to exceed the p...  \n",
-              "1    The safety and enhancement measures implemente...  \n",
-              "2    In the initial steps of model development, the...  \n",
-              "3    Machine learning models can be aligned with de...  \n",
-              "4    The key insights gained from evaluating platfo...  \n",
-              "..                                                 ...  \n",
-              "215  In the discussed dataset analysis, samples are...  \n",
-              "216  The size of the model significantly influences...  \n",
-              "217  Model contamination affects various contaminat...  \n",
-              "218  Llama 2 is available in three distinct paramet...  \n",
-              "219  Throughout the training of Llama 2, which invo...  \n",
-              "\n",
-              "[220 rows x 4 columns]"
-            ]
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "dataframe",
+              "variable_name": "data",
+              "summary": "{\n  \"name\": \"data\",\n  \"rows\": 220,\n  \"fields\": [\n    {\n      \"column\": \"Unnamed: 0\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 63,\n        \"min\": 0,\n        \"max\": 219,\n        \"num_unique_values\": 220,\n        \"samples\": [\n          132,\n          148,\n          93\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"query\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 220,\n        \"samples\": [\n          \"What type of examination did scholars perform on ChatGPT, and when was the resulting scholarly paper published?\",\n          \"How do the performance capabilities of the different models compare in evaluating tasks associated with logical reasoning and reading comprehension, specifically noted in tests like LSAT and SAT?\",\n          \"What steps are recommended for users to ensure the responsible use of AI models like Llama 2 in projects or commercial applications?\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"context\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 110,\n        \"samples\": [\n          \"Dialogue Turn Baseline + GAtt\\n2 100% 100%\\n4 10% 100%\\n6 0% 100%\\n20 0% 100%\\nTable30: GAttresults. Llama 2-Chat withGAttisabletorefertoattributes100%ofthetime,forupto20\\nturns from our human evaluation. We limited the evaluated attributes to public figures and hobbies.\\nTheattentionnowspansbeyond20turns. Wetestedthemodelabilitytorememberthesystemarguments\\ntroughahumanevaluation. Thearguments(e.g. hobbies,persona)aredefinedduringthefirstmessage,and\\nthen from turn 2 to 20. We explicitly asked the model to refer to them (e.g. \\u201cWhat is your favorite hobby?\\u201d,\\n\\u201cWhatisyourname?\\u201d),tomeasurethemulti-turnmemoryabilityof Llama 2-Chat . Wereporttheresults\\ninTable30. EquippedwithGAtt, Llama 2-Chat maintains100%accuracy,alwaysreferringtothedefined\\nattribute,andso,upto20turns(wedidnotextendthehumanevaluationmore,andalltheexampleshad\\nlessthan4048tokensintotalovertheturns). Asacomparison, Llama 2-Chat withoutGAttcannotanymore\\nrefer to the attributes after only few turns: from 100% at turn t+1, to 10% at turn t+3 and then 0%.\\nGAttZero-shotGeneralisation. Wetriedatinferencetimetosetconstrainnotpresentinthetrainingof\\nGAtt. For instance, \\u201canswer in one sentence only\\u201d, for which the model remained consistent, as illustrated in\\nFigure 28.\\nWe applied first GAtt to Llama 1 , which was pretrained with a context length of 2048 tokens and then\\nfine-tuned with 4096 max length. We tested if GAtt works beyond 2048 tokens, and the model arguably\\nmanaged to understand attributes beyond this window. This promising result indicates that GAtt could be\\nadapted as an efficient technique for long context attention.\\nA.3.6 How Far Can Model-Based Evaluation Go?\\nTo measure the robustness of our reward model, we collected a test set of prompts for both helpfulness and\\nsafety,andaskedannotatorstojudgequalityoftheanswersbasedona7pointLikert-scale(thehigherthe\\nbetter)usingtriplereviews. AsillustratedinFigure29(inAppendix),weobservethatourrewardmodels\\noverallarewellcalibratedwithhumanpreference. Notethatthisenablesustousetherewardasapoint-wise\\nmetric, despite being trained with a Pairwise Ranking Loss.\\n0.0% 2.0% 4.0% 6.0% 8.0%\\nDensity0.00.20.40.60.81.0Reward Model ScoreNo Margin\\n0.0% 2.0% 4.0% 6.0% 8.0%\\nDensity0.00.20.40.60.81.0\\nMargin Small\\n0.0% 2.0% 4.0% 6.0% 8.0%\\nDensity0.00.20.40.60.81.0\\nMargin Large\\nFigure 27: Reward model score distribution shift caused by incorporating preference rating based margin\\ninrankingloss. Withthemarginterm, weobserveabinary splitpatterninrewarddistribution, especially\\nwith a larger margin.\\n54\",\n          \"Model Size CodeCommonsense\\nReasoningWorld\\nKnowledgeReading\\nComprehensionMath MMLU BBH AGI Eval\\nMPT7B 20.5 57.4 41.0 57.5 4.9 26.8 31.0 23.5\\n30B 28.9 64.9 50.0 64.7 9.1 46.9 38.0 33.8\\nFalcon7B 5.6 56.1 42.8 36.0 4.6 26.2 28.0 21.2\\n40B 15.2 69.2 56.7 65.7 12.6 55.4 37.1 37.0\\nLlama 17B 14.1 60.8 46.2 58.5 6.95 35.1 30.3 23.9\\n13B 18.9 66.1 52.6 62.3 10.9 46.9 37.0 33.9\\n33B 26.0 70.0 58.4 67.6 21.4 57.8 39.8 41.7\\n65B 30.7 70.7 60.5 68.6 30.8 63.4 43.5 47.6\\nLlama 27B 16.8 63.9 48.9 61.3 14.6 45.3 32.6 29.3\\n13B 24.5 66.9 55.4 65.8 28.7 54.8 39.4 39.1\\n34B 27.8 69.9 58.7 68.0 24.2 62.6 44.1 43.4\\n70B37.5 71.9 63.6 69.4 35.2 68.9 51.2 54.2\\nTable3: Overallperformanceongroupedacademicbenchmarkscomparedtoopen-sourcebasemodels.\\n\\u2022Popular Aggregated Benchmarks . We report the overall results for MMLU (5 shot) (Hendrycks\\net al., 2020), Big Bench Hard (BBH) (3 shot) (Suzgun et al., 2022), and AGI Eval (3\\u20135 shot) (Zhong\\net al., 2023). For AGI Eval, we only evaluate on the English tasks and report the average.\\nAs shown in Table 3, Llama 2 models outperform Llama 1 models. In particular, Llama 2 70B improves the\\nresultsonMMLUandBBHby \\u22485and\\u22488points,respectively,comparedto Llama 1 65B.Llama 2 7Band30B\\nmodelsoutperformMPTmodelsofthecorrespondingsizeonallcategoriesbesidescodebenchmarks. Forthe\\nFalcon models, Llama 2 7B and 34B outperform Falcon 7B and 40B models on all categories of benchmarks.\\nAdditionally, Llama 2 70B model outperforms all open-source models.\\nIn addition to open-source models, we also compare Llama 2 70B results to closed-source models. As shown\\nin Table 4, Llama 2 70B is close to GPT-3.5 (OpenAI, 2023) on MMLU and GSM8K, but there is a significant\\ngaponcodingbenchmarks. Llama 2 70BresultsareonparorbetterthanPaLM(540B)(Chowdheryetal.,\\n2022)onalmostallbenchmarks. Thereisstillalargegapinperformancebetween Llama 2 70BandGPT-4\\nand PaLM-2-L.\\nWe also analysed the potential data contamination and share the details in Section A.6.\",\n          \"Figure 1: Helpfulness human evaluation results for Llama\\n2-Chatcomparedtootheropen-sourceandclosed-source\\nmodels. Human raters compared model generations on ~4k\\npromptsconsistingofbothsingleandmulti-turnprompts.\\nThe95%confidenceintervalsforthisevaluationarebetween\\n1%and2%. MoredetailsinSection3.4.2. Whilereviewing\\nthese results, it is important to note that human evaluations\\ncanbenoisyduetolimitationsofthepromptset,subjectivity\\nof the review guidelines, subjectivity of individual raters,\\nand the inherent difficulty of comparing generations.\\nFigure 2: Win-rate % for helpfulness and\\nsafety between commercial-licensed base-\\nlines and Llama 2-Chat , according to GPT-\\n4. Tocomplementthehumanevaluation,we\\nused a more capable model, not subject to\\nourownguidance. Greenareaindicatesour\\nmodelisbetteraccordingtoGPT-4. Toremove\\nties, we used win/ (win+loss). The orders in\\nwhichthemodelresponsesarepresentedto\\nGPT-4arerandomlyswappedtoalleviatebias.\\n1 Introduction\\nLarge Language Models (LLMs) have shown great promise as highly capable AI assistants that excel in\\ncomplex reasoning tasks requiring expert knowledge across a wide range of fields, including in specialized\\ndomains such as programming and creative writing. They enable interaction with humans through intuitive\\nchat interfaces, which has led to rapid and widespread adoption among the general public.\\nThecapabilitiesofLLMsareremarkableconsideringtheseeminglystraightforwardnatureofthetraining\\nmethodology. Auto-regressivetransformersarepretrainedonanextensivecorpusofself-superviseddata,\\nfollowed by alignment with human preferences via techniques such as Reinforcement Learning with Human\\nFeedback(RLHF).Althoughthetrainingmethodologyissimple,highcomputationalrequirementshave\\nlimited the development of LLMs to a few players. There have been public releases of pretrained LLMs\\n(such as BLOOM (Scao et al., 2022), LLaMa-1 (Touvron et al., 2023), and Falcon (Penedo et al., 2023)) that\\nmatch the performance of closed pretrained competitors like GPT-3 (Brown et al., 2020) and Chinchilla\\n(Hoffmann et al., 2022), but none of these models are suitable substitutes for closed \\u201cproduct\\u201d LLMs, such\\nasChatGPT,BARD,andClaude. TheseclosedproductLLMsareheavilyfine-tunedtoalignwithhuman\\npreferences, which greatly enhances their usability and safety. This step can require significant costs in\\ncomputeandhumanannotation,andisoftennottransparentoreasilyreproducible,limitingprogresswithin\\nthe community to advance AI alignment research.\\nIn this work, we develop and release Llama 2, a family of pretrained and fine-tuned LLMs, Llama 2 and\\nLlama 2-Chat , at scales up to 70B parameters. On the series of helpfulness and safety benchmarks we tested,\\nLlama 2-Chat models generally perform better than existing open-source models. They also appear to\\nbe on par with some of the closed-source models, at least on the human evaluations we performed (see\\nFigures1and3). Wehavetakenmeasurestoincreasethesafetyofthesemodels,usingsafety-specificdata\\nannotation and tuning, as well as conducting red-teaming and employing iterative evaluations. Additionally,\\nthispapercontributesathoroughdescriptionofourfine-tuningmethodologyandapproachtoimproving\\nLLM safety. We hope that this openness will enable the community to reproduce fine-tuned LLMs and\\ncontinue to improve the safety of those models, paving the way for more responsible development of LLMs.\\nWealsosharenovelobservationswemadeduringthedevelopmentof Llama 2 andLlama 2-Chat ,suchas\\nthe emergence of tool usage and temporal organization of knowledge.\\n3\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"answer\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 220,\n        \"samples\": [\n          \"Scholars performed a diagnostic analysis to investigate the AI ethics associated with ChatGPT. Their findings were compiled into a research paper that became accessible as a preprint on arXiv in January 2023.\",\n          \"The MPT 30B model demonstrates considerable proficiency in logical reasoning and reading comprehension tasks, scoring highly on LSAT-LR, LSAT-RC, and SAT-en tests compared to its peers, such as Falcon 40B and Llama 17B. This is indicative of its advanced analytical and comprehension abilities. Conversely, while Falcon 40B shows strengths in LSAT-LR with a score second only to MPT 30B, it trails in SAT-en performance. This variability underscores the diverse capabilities of models based on their structural design and training paradigms.\",\n          \"Users intending to deploy models like Llama 2 are advised to strictly adhere to guidelines laid out in the Responsible Use Guide. This includes employing enhanced safety measures at both the input and output stages of model interaction, as well as carefully tuning the model according to specific use-case requirements to prevent any potential misuse. Additionally, users must comply with the terms set in the Acceptable Use Policy, ensuring their applications do not contravene applicable laws, regulations, and ethical standards. Leveraging provided code examples can further assist developers in replicating the necessary safety protocols and maintaining ethical integrity in their applications.\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
+            }
           },
-          "execution_count": 4,
           "metadata": {},
-          "output_type": "execute_result"
+          "execution_count": 4
         }
       ],
       "source": [
@@ -618,81 +609,81 @@
           "base_uri": "https://localhost:8080/",
           "height": 336,
           "referenced_widgets": [
-            "91f35d9568ec46459fab4efe6a95e734",
-            "c95044413a9b439fbfcd0632a70944a8",
-            "6c7346ac20744690956df20d3d03680c",
-            "1b4311d56bb04fedaa7b64059401d965",
-            "c98336949ffc4ceda45c2c3dcad2a6a1",
-            "6c2c03fc41af44339e845fe0a825bea2",
-            "e7b07a0c8fe14726a611212a8a239baf",
-            "528bac44c27c4d63ba507abf5525d1b8",
-            "7f32d9d86346453292783c856be99825",
-            "9f17cef9530f40d5932107809403efa0",
-            "494400f3134c4fb4907d607beedeaf81",
-            "1853dce77eb94f879ffdfd78de2a0efa",
-            "df5d4ecafccf45a8a1584664a6f723aa",
-            "a165638ba18a48deb8db106a1b5b2290",
-            "813dcc840ffc463593fcc426b5496295",
-            "33d9a142bbf44fa4a90855e27520bb16",
-            "bb4dba99a16949f1ae327eccc219b6de",
-            "5f4cda0c0861409a951291cd6c3366f1",
-            "f267da4ad02349e79c9ba7a0f325b540",
-            "644944bb940046adb953b22f11fcd7b4",
-            "4e3fa16f33204bbfb156114dabb28c04",
-            "054df2d05c7e46638cb31006d67fd367",
-            "81b9ad10745a4a06a11cc26377154214",
-            "f2486513d4b54ddca27ee691e5346a46",
-            "57dbc355b039461684573b7c7e07af3a",
-            "f5400cefb2154be9a4a659acce6edd5e",
-            "febf788f5ad941c5aca7be98645936ad",
-            "de37cca307d1471f9342302b7f3429a9",
-            "c3982bc50b6e4e84b0ad10e5ba523e5d",
-            "2bb82aec434c4d53a36090c9ec6323ae",
-            "5b3f477e3b1d4e2f8a9e254401c6c954",
-            "36f2266eae21439ab778a816044a879a",
-            "fab2133c1dcb4a329ee331686233c4fd",
-            "76636be73f4d42e39719dfa089c4716b",
-            "c74262510b754e3b846751a7216c7dbf",
-            "92c398c9e36542f8b61aa8795facb421",
-            "cd67a7a4ce9a4774b692039c517a882d",
-            "280a79e1521a4cc0924f065f2cb5bd87",
-            "e1733e1b9d49420da2fb5397613cd9f9",
-            "090c494d7e954fa2be0e9fb91f30d9b8",
-            "df7b8f33120c4e10990e17418f8941cf",
-            "6565dc6bca3348de941027ea2573d5cf",
-            "970c0d55bbde4f92ba30900281abe1b3",
-            "eebec9aee091453c8bfbe48539a240ea",
-            "ced1b556ccd94eccbf3edd1aefc7df40",
-            "deef03a9155c45f78712712998cf660d",
-            "48e2a8f80d8c418792734c88e797e982",
-            "fcaa21fb513846ecabf64b88aafe2cd0",
-            "f95a2f8628584be4b9d35b7cfe125939",
-            "ab132a8c917e4151ae2891616b79ee07",
-            "3cc4457279334a2da1fccfbb99b27e9a",
-            "f22c56f6666a462e833bb92fff69f00b",
-            "283857c87a674aa2b42c7dcf858679ec",
-            "9bf91263f8d64ce18aa0275edd09644d",
-            "3280f3b54f1c40b6ad66fac4d4403031",
-            "c5358677106d4c2aa30991b11bdb8ea4",
-            "c7f4de8f49884cd585a95af2aaaa470d",
-            "7a5c63dfc9d74a218c8269a72439e57a",
-            "d49b8015f4a2463885ae6266c7af2f65",
-            "35f968dcb12f49f582bf353605126a81",
-            "4ad87837299b4f12ae33e54b7a9be457",
-            "b5c20304a60a40d5b134c896bc903f6f",
-            "99955dde5d33494684403ac0e8ff119c",
-            "1dd100c461d949ff9b2fb448a9f321f8",
-            "7630b6b8ba764c98a3f1a986c6cb91e4",
-            "d618cacb4426451ab3c32be08f7ad07e"
+            "7d93a81fcd5f4f9c8952396a9f72be02",
+            "ae8bc663ba0e44ddb830a5b50b2e92f8",
+            "de0d2e4cb7b346a4ac0b55b095caff98",
+            "52441756f18a4a52a2a6c839c4ff892c",
+            "d143d1522f564b78a24e92bd0290f4b5",
+            "a4decda69da348dfa0f0ec38c5ceb9d6",
+            "1b95c82d481b4159bf7be3aefa4c0258",
+            "7e49893b47174c138237e9a29584c0d0",
+            "e36ba6906dd74973a48dba81ebb1f799",
+            "f3e98d664e2441ad9198ee0ee947b27e",
+            "fcf4a6a5fd3a45908a7727c4abefde44",
+            "dec5401f1de14ba690c3e829fe4fe0ae",
+            "19f5d1e903ea4f4faf62c179e7669234",
+            "74ad6e3b67554d33ac6422de1e3e475a",
+            "59915c4b816e420b942e6b0996c279f1",
+            "f5e61067348c4b01b3c0ba09e5a52f87",
+            "4a5fdf23f24c4f21896705734bc1e031",
+            "3c419d422bd34cec867538615193558a",
+            "4a6a4f7be2e44ade93e46702f037ebc1",
+            "c460e5a56c624f43baaed1fff6aa72e2",
+            "8a4e12a8f5da4f9990498c562a94116c",
+            "c2253beb48cd4ce7a2eb3f1ce130f520",
+            "1a1832edb11e4363af5a0c55ba013e91",
+            "5e0cd1901ad444d28336c69d75b84e91",
+            "1797c51500d0425496e390ebcf9729ac",
+            "4993c83bb4eb429797bb9928a7c86547",
+            "8f2f21ce257f457e962624bba5c3ca71",
+            "c64af78fb7424710ac50c20293719123",
+            "e2189d31c33f4d02983681d814d7ec28",
+            "2c6fcec076ae49fb9def49c54169e0e3",
+            "40782ef0cbfc4b24839015796c303869",
+            "674ddce0b8cc4fad93c863469fb7496c",
+            "2115bc215a574a4c90accc8e643ccc5c",
+            "4a3f41c780d940eda128cc1efe82cb46",
+            "b18a507ffc8e4b0ba825fdecac8980e1",
+            "796ba03995264bddad91fcc999e0f073",
+            "083b7d262f8945cc9b8fe928dbf9cee1",
+            "aeddd98812264d939c86b026d63682f0",
+            "ac96917d1e2341a5acf2b0236344b57d",
+            "20b2149ba0984c23ad726cc72f21ae6d",
+            "640859301f01490eb5041bd73667bdd0",
+            "be70a0355abf4bed9c8b887917721879",
+            "5bbbad4117bd4949adf34d58bb29d312",
+            "e640e77458964a1ba16654655bbe4ee1",
+            "31f10c25646949868814da02b21c8de2",
+            "eac57eb6d624437b9c04cddcdf1f53f4",
+            "0fb15815e4724685904aada95b00b1b5",
+            "cb45a898fb1745ff94bce928c64bfab5",
+            "bdacf681835145dda8867513e301f403",
+            "9b49cbad496d4e9ab5af69668e842ba6",
+            "6d1a94ae94e548058d34a5df5dbd563d",
+            "455abd9dfd614a53b4fa35f55542a9e9",
+            "878ca6976a414ba2a43e86dbf75ce45c",
+            "6133892fd2da4778bc0cc08667cb2673",
+            "4f30757e8c7f4b4b925d249a5369ed51",
+            "7c8689c8604c4626b6092d33d80ff6cd",
+            "f21f2f75e7ea42d5a6979ee722e80fe2",
+            "019aa571fc8f4f40afd65f728327e0b4",
+            "9b2206d9cd6d415bb4ce0d806aaaf473",
+            "50f3506a5bbc4a529d20b3f85fa78260",
+            "a03c9bd4a5e74a6182bbe774411899a0",
+            "2da0ec344438406ca44fa75e6523867b",
+            "4164c24fbe004abbbb7312da0682c8fc",
+            "db17d3f6979d4ba2b8184603f226e910",
+            "19d1e2c150f44fa08941117a42a2505b",
+            "d23e52576ac74f3a9962ff68f786343b"
           ]
         },
         "id": "B_g5pIkBQ66h",
-        "outputId": "c18d729f-de1e-4970-eb1a-a590ef2a9658"
+        "outputId": "ff31e6b0-745c-4e90-9da7-9e8c7b3c9b6f"
       },
       "outputs": [
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
             "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n",
             "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
@@ -703,88 +694,88 @@
           ]
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "91f35d9568ec46459fab4efe6a95e734",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "7d93a81fcd5f4f9c8952396a9f72be02"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "1853dce77eb94f879ffdfd78de2a0efa",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "dec5401f1de14ba690c3e829fe4fe0ae"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "81b9ad10745a4a06a11cc26377154214",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "1a1832edb11e4363af5a0c55ba013e91"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "76636be73f4d42e39719dfa089c4716b",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "4a3f41c780d940eda128cc1efe82cb46"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "ced1b556ccd94eccbf3edd1aefc7df40",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "31f10c25646949868814da02b21c8de2"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "c5358677106d4c2aa30991b11bdb8ea4",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "7c8689c8604c4626b6092d33d80ff6cd"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         }
       ],
       "source": [
@@ -824,12 +815,12 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "NsL0jWp-Sr1h",
-        "outputId": "6b2a0c84-909c-4c08-e71b-51531819353f"
+        "outputId": "b19c3c19-a192-4adf-cbe1-df24be0b21da"
       },
       "outputs": [
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
             "[{'text': 'Llama 2 : Open Foundation and Fine-Tuned Chat Models\\nHugo Touvron∗Louis Martin†Kevin Stone†\\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\\nCynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\\nHakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev\\nPunit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich\\nYinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra\\nIgor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi\\nAlan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang\\nRoss Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang\\nAngela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic\\nSergey Edunov Thomas Scialom∗\\nGenAI, Meta\\nAbstract\\nIn this work, we develop and release Llama 2, a collection of pretrained and fine-tuned\\nlarge language models (LLMs) ranging in scale from 7 billion to 70 billion parameters.\\nOur fine-tuned LLMs, called Llama 2-Chat , are optimized for dialogue use cases. Our\\nmodels outperform open-source chat models on most benchmarks we tested, and based on\\nourhumanevaluationsforhelpfulnessandsafety,maybeasuitablesubstituteforclosed-\\nsource models. We provide a detailed description of our approach to fine-tuning and safety\\nimprovements of Llama 2-Chat in order to enable the community to build on our work and\\ncontribute to the responsible development of LLMs.\\n∗Equal contribution, corresponding authors: {tscialom, htouvron}@meta.com\\n†Second author\\nContributions for all the authors can be found in Section A.1.arXiv:2307.09288v2  [cs.CL]  19 Jul 2023'}, {'text': 'Contents\\n1 Introduction 3\\n2 Pretraining 5\\n2.1 Pretraining Data . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5\\n2.2 Training Details . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 5\\n2.3 Llama 2 Pretrained Model Evaluation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 7\\n3 Fine-tuning 8\\n3.1 Supervised Fine-Tuning (SFT) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 9\\n3.2 Reinforcement Learning with Human Feedback (RLHF) . . . . . . . . . . . . . . . . . . . . . 9\\n3.3 System Message for Multi-Turn Consistency . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 16\\n3.4 RLHF Results . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 17\\n4 Safety 20\\n4.1 Safety in Pretraining . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 20\\n4.2 Safety Fine-Tuning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 23\\n4.3 Red Teaming . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28\\n4.4 Safety Evaluation of Llama 2-Chat . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .'}, {'text': '. . . . . . . . 23\\n4.3 Red Teaming . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 28\\n4.4 Safety Evaluation of Llama 2-Chat . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 29\\n5 Discussion 32\\n5.1 Learnings and Observations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 32\\n5.2 Limitations and Ethical Considerations . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 34\\n5.3 Responsible Release Strategy . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 35\\n6 Related Work 35\\n7 Conclusion 36\\nA Appendix 46\\nA.1 Contributions . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 46\\nA.2 Additional Details for Pretraining . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 47\\nA.3 Additional Details for Fine-tuning . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 51\\nA.4 Additional Details for Safety . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 58\\nA.5 Data Annotation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 72\\nA.6 Dataset Contamination . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .'}, {'text': '. . . . . . 58\\nA.5 Data Annotation . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 72\\nA.6 Dataset Contamination . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 75\\nA.7 Model Card . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . 77\\n2'}, {'text': 'Figure 1: Helpfulness human evaluation results for Llama\\n2-Chatcomparedtootheropen-sourceandclosed-source\\nmodels. Human raters compared model generations on ~4k\\npromptsconsistingofbothsingleandmulti-turnprompts.\\nThe95%confidenceintervalsforthisevaluationarebetween\\n1%and2%. MoredetailsinSection3.4.2. Whilereviewing\\nthese results, it is important to note that human evaluations\\ncanbenoisyduetolimitationsofthepromptset,subjectivity\\nof the review guidelines, subjectivity of individual raters,\\nand the inherent difficulty of comparing generations.\\nFigure 2: Win-rate % for helpfulness and\\nsafety between commercial-licensed base-\\nlines and Llama 2-Chat , according to GPT-\\n4. Tocomplementthehumanevaluation,we\\nused a more capable model, not subject to\\nourownguidance. Greenareaindicatesour\\nmodelisbetteraccordingtoGPT-4. Toremove\\nties, we used win/ (win+loss). The orders in\\nwhichthemodelresponsesarepresentedto\\nGPT-4arerandomlyswappedtoalleviatebias.\\n1 Introduction\\nLarge Language Models (LLMs) have shown great promise as highly capable AI assistants that excel in\\ncomplex reasoning tasks requiring expert knowledge across a wide range of fields, including in specialized\\ndomains such as programming and creative writing. They enable interaction with humans through intuitive\\nchat interfaces, which has led to rapid and widespread adoption among the general public.\\nThecapabilitiesofLLMsareremarkableconsideringtheseeminglystraightforwardnatureofthetraining\\nmethodology. Auto-regressivetransformersarepretrainedonanextensivecorpusofself-superviseddata,\\nfollowed by alignment with human preferences via techniques such as Reinforcement Learning with Human\\nFeedback(RLHF).Althoughthetrainingmethodologyissimple,highcomputationalrequirementshave\\nlimited the development of LLMs to a few players. There have been public releases of pretrained LLMs\\n(such as BLOOM (Scao et al., 2022), LLaMa-1 (Touvron et al., 2023), and Falcon (Penedo et al., 2023)) that\\nmatch the performance of closed pretrained competitors like GPT-3 (Brown et al., 2020) and Chinchilla\\n(Hoffmann et al., 2022), but none of these models are suitable substitutes for closed “product” LLMs, such\\nasChatGPT,BARD,andClaude. TheseclosedproductLLMsareheavilyfine-tunedtoalignwithhuman\\npreferences, which greatly enhances their usability and safety. This step can require significant costs in\\ncomputeandhumanannotation,andisoftennottransparentoreasilyreproducible,limitingprogresswithin\\nthe community to advance AI alignment research.\\nIn this work, we develop and release Llama 2, a family of pretrained and fine-tuned LLMs, Llama 2 and\\nLlama 2-Chat , at scales up to 70B parameters. On the series of helpfulness and safety benchmarks we tested,\\nLlama 2-Chat models generally perform better than existing open-source models. They also appear to\\nbe on par with some of the closed-source models, at least on the human evaluations we performed (see\\nFigures1and3). Wehavetakenmeasurestoincreasethesafetyofthesemodels,usingsafety-specificdata\\nannotation and tuning, as well as conducting red-teaming and employing iterative evaluations. Additionally,\\nthispapercontributesathoroughdescriptionofourfine-tuningmethodologyandapproachtoimproving\\nLLM safety. We hope that this openness will enable the community to reproduce fine-tuned LLMs and\\ncontinue to improve the safety of those models, paving the way for more responsible development of LLMs.\\nWealsosharenovelobservationswemadeduringthedevelopmentof Llama 2 andLlama 2-Chat ,suchas\\nthe emergence of tool usage and temporal organization of knowledge.\\n3'}]\n"
           ]
@@ -850,7 +841,7 @@
       },
       "source": [
         "## Different Query types in LanceDB\n",
-        "LanceDB allows switching query types with by setting `query_type` argument, which defaults to `vector` when using Embedding API. In this example we'll use `CohereReranker` which is one of many rerankers supported by LanceDB\n",
+        "LanceDB allows switching query types with by setting `query_type` argument, which defaults to `vector` when using Embedding API. In this example we'll use `JinaReranker` which is one of many rerankers supported by LanceDB\n",
         "\n",
         "### Vector search:\n",
         "Vector search\n",
@@ -861,7 +852,7 @@
         "\n",
         "Vector search with Reranking\n",
         "```\n",
-        "reranker = CohereReranker()\n",
+        "reranker = JinaReranker()\n",
         "table.search(query).rerank(reranker=reranker)\n",
         "```\n",
         "\n",
@@ -990,20 +981,20 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "iZzAVl2kJ5mV",
-        "outputId": "88ee5b31-cc3c-402d-d414-d10e3c7cc324"
+        "outputId": "0f4d6e5b-3096-4f58-fc36-c7909b475cfc"
       },
       "outputs": [
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
-            "100%|██████████| 220/220 [00:09<00:00, 22.46it/s]\n",
-            "100%|██████████| 220/220 [00:00<00:00, 330.74it/s]"
+            "100%|██████████| 220/220 [00:10<00:00, 21.62it/s]\n",
+            "100%|██████████| 220/220 [00:00<00:00, 358.03it/s]"
           ]
         },
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
             "\n",
             " Vector Search Hit Rate: 0.6409090909090909\n",
@@ -1011,8 +1002,8 @@
           ]
         },
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
             "\n"
           ]
@@ -1026,6 +1017,24 @@
         "print(f\"FTS Search Hit Rate: {hit_rate_fts}\")"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "2. Reranked vector search\n"
+      ],
+      "metadata": {
+        "id": "-1B5OPDuI8NE"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "ngbS5kvnI6N_"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -1044,27 +1053,27 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "ydLNeAr4acYj",
-        "outputId": "34263ad1-1c21-43a5-f592-16e7f7ca8c45"
+        "outputId": "0e455b2f-a10c-4ad2-ce36-52a90829dd10"
       },
       "outputs": [
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
-            "100%|██████████| 220/220 [00:10<00:00, 20.37it/s]"
+            "100%|██████████| 220/220 [00:10<00:00, 20.60it/s]"
           ]
         },
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
             "\n",
             " Hybrid Search with LinearCombinationReranker Hit Rate: 0.6454545454545455\n"
           ]
         },
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
             "\n"
           ]
@@ -1100,169 +1109,168 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 13,
+      "execution_count": 12,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/",
-          "height": 286,
+          "height": 248,
           "referenced_widgets": [
-            "1833d9358c9b4cb581cc618ac68d7e30",
-            "8609f6b2059943d9bad3254b3d7ec629",
-            "25aefdd4c5644c709e03115bc8ae5f99",
-            "7ba55480b151488782c14afdc459b182",
-            "5414385b19d54478a98969b4d45736fa",
-            "c17520bc49fd4539bceaa78318cce796",
-            "8b7c9f8f944e4c4eb4cff354b42e086a",
-            "559ce10ceb2e4b558ae7124b2d359031",
-            "db0cca3c1f9a4fc695bb7362409b2cbe",
-            "27d1b3b7b4cd4588910f3aa07ab4fc3e",
-            "caab49ce2bf940eb8ac988f88a0dadef",
-            "6e37c2f25d3946fca0564fe3363f4725",
-            "08d187281e1e463785b7a5e88f47f316",
-            "f43a7a4b03b240b7bcef5befa7c91b46",
-            "c800dfef86de45a8b13b6621ef4e1a84",
-            "7926d3c178a949e182328fef895b9cbe",
-            "42caf4b7c7804a91bf320f38f7481ecb",
-            "45c142aca09946929dd91e2ad8463fac",
-            "e215079b606d43738715e21d784b8d2c",
-            "bd455814e03b4dea8169ba39b782e5dc",
-            "3f6c357e36ad48c2a73cbf7f9fe74716",
-            "d202239dc6ac4107b5c5f930cee827d7",
-            "e5e24aa6f68f41e3ba3f7f0585eddf8f",
-            "426a927035474cb2ad4936ca2041d2e1",
-            "cf476ad98f34482ebc14638fa5b7a35f",
-            "52f061513e3845d7819d0d103afe8258",
-            "5c476489e4964893a70aef719c38ac46",
-            "4f7ce5f08915421eb115d3e98cf60007",
-            "ea80b988836247838d21652c75b319ea",
-            "306c491ee6a04737aa400d6ff0a8c463",
-            "f6ecbdd2cad14aa4ad2b184f1889c8f5",
-            "e48fd3d0d8054fb297361c1e2d17f6ba",
-            "a9cb97c0b36e4da7bae4571baf0ec9d4",
-            "607b5519211d47a6ad75d5b2739ee907",
-            "079979603b3a46c89934f3718cdde9a5",
-            "3cca14175fd54dc3b2967d0e14a0df66",
-            "53d7258eb0924da38642be8c714bc6c3",
-            "5b5c3f2bbdbd456ebc26c6069869dcbd",
-            "d82d228598ae4eedb1a92c18aa03e985",
-            "2b361f073b1c49129ede34d359caff88",
-            "d2948d1af00e42e78058248f45059c30",
-            "48fee23cb275447bb43942a38cb94a17",
-            "ea726496c3e8476091d7be2c96def0d7",
-            "0d6b1481bbcd4a949f48396b7a385f6f",
-            "c70a82bbcb974214b1563babd29d50ef",
-            "43447d181ae44d71a449957f5eed54d6",
-            "706d91b06672487aa62dc974b4b28342",
-            "7c8b3122064a4289a9d8b246f9c37dfe",
-            "3c1b70e0586945d5a240a769a61f1ef0",
-            "44907a6e01ee434383566a39de8c05e8",
-            "b2358f595c3f47399530d363f63166a0",
-            "08b6d24ba33942b7bb4f47a7f43c4ad5",
-            "1b5fb3da3e98440aba828920d512c7fa",
-            "ae501dd3797e40b3bd0a018b6212f80e",
-            "8aa85ff7d417493ea7bf43e347fca68d"
+            "d718ecc1163942e4a5dd4cea73a90e8e",
+            "b33b19b3d1984052a7f189045a8cf881",
+            "ec4f2cb69b034876a42404016cd56336",
+            "299a94f5d724418cbfc3ff570f6fc51e",
+            "09d3de90349c4402a763ca9ee05872f1",
+            "f074dee7a025499ab26a16b811967b0d",
+            "5dc2560eb8d441c5b3c19f4dbb082402",
+            "4d055c5e91a14c789d53c41027c13f79",
+            "45a86acf89d74e88b05b236064abfe9e",
+            "d618cebde90545fe9d99255511dd842e",
+            "8ab0e516471a4b82b205227692a9c08c",
+            "77ed41da01bd4032aef1ad5d471e49b8",
+            "fd45203f991d420cbca1de00404fb92d",
+            "270e8fc6c4ed4583ad78e53a6048af39",
+            "4e8e3b7d32a542cc8b8c60bddf03b2fd",
+            "a72c96893f65478f8a618c6bed76a5a6",
+            "d6f67e57cbf64403844ac492fe33a37c",
+            "e71a9e5429a24743bb2f6672a675a0ea",
+            "8909f3170d344efdb39f5d95cc388606",
+            "54658b4b7ab543209f4de19a0b7c7477",
+            "a11e3a6b07114d56bb7ea4ff54f2dfba",
+            "036c0495ef404234a99a0d0945bfb137",
+            "481da1832f694ef6ac05e4d3efd67ac2",
+            "3ddd20f3287d4c53b9a8aa9dc376cf3b",
+            "2d23114ea17646069d8d228775f503a2",
+            "1583bd5518554acdb4201da0262dce80",
+            "cc1ca1cd82864701b376cb77f62bb189",
+            "6c95c1d838794b81a0ca58d97fc4d4bd",
+            "e7d30ffd3a5d4029bc94371e89d39df2",
+            "063e9d8117864292a1f4f7db6bf39fc5",
+            "4098367692a34e59a5f7875546187471",
+            "9fe27f7c08fd4e4690a0ed2a289176ee",
+            "e48ed4e228804a56af1b995b1533eda7",
+            "bc0b3747df404a2ea47be94b90b1bd96",
+            "17d0c553a19d4b55a03a95b68d24734f",
+            "c11f882bb2e04059b92006609471be1c",
+            "bfff2a4f749c46aaa2c1a0a131f13ca4",
+            "72cb183fdc3c444d85a08aa378e48a78",
+            "7fab79549d3a4bc2a2a38e761c85e3bd",
+            "48237a9c806440e0b100f52445502db7",
+            "0f48274c81ee44bb807a7d75ee0762fd",
+            "fc242de73006403886268a2ca9913375",
+            "387599dd3ff9491a8527a3a90e612c82",
+            "0f1a6b08ce6747c383c080ccd8c7783a",
+            "b6448167f80b40508070d3aa8cbc2ea0",
+            "24f579f32a3f48559ecc9b38f39f77cd",
+            "6e58fc22c84c48ba9d38770bf6665ee2",
+            "be452568af594df5884cbebbb23b1a47",
+            "6db85f74ef4c4edb8667d373bd3f96a4",
+            "be30e00481f24011a444511c896336d3",
+            "2d14ed52a93d4299b5de69c89e74ce13",
+            "7e196a41516f4550aa0b94d042d59756",
+            "4ba2b40b6b7945dfba573c9f80465155",
+            "7e761767401c4d9b97c06237b1e3b6eb",
+            "e5765e9cf70240c4ac11bdce2d1eaac8"
           ]
         },
         "id": "dd0jh4gNtm41",
-        "outputId": "731f7c32-838d-4e18-c080-83bece99f506"
+        "outputId": "aa734304-533a-4061-f6e4-17ee167b1933"
       },
       "outputs": [
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
-            "\r  0%|          | 0/220 [00:00<?, ?it/s]/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n",
-            "  warnings.warn(\n"
+            "\r  0%|          | 0/220 [00:00<?, ?it/s]"
           ]
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "1833d9358c9b4cb581cc618ac68d7e30",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "d718ecc1163942e4a5dd4cea73a90e8e"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "6e37c2f25d3946fca0564fe3363f4725",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "pytorch_model.bin:   0%|          | 0.00/268M [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "77ed41da01bd4032aef1ad5d471e49b8"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "e5e24aa6f68f41e3ba3f7f0585eddf8f",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "tokenizer_config.json:   0%|          | 0.00/541 [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "481da1832f694ef6ac05e4d3efd67ac2"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "607b5519211d47a6ad75d5b2739ee907",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "bc0b3747df404a2ea47be94b90b1bd96"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
+          "output_type": "display_data",
           "data": {
-            "application/vnd.jupyter.widget-view+json": {
-              "model_id": "c70a82bbcb974214b1563babd29d50ef",
-              "version_major": 2,
-              "version_minor": 0
-            },
             "text/plain": [
               "special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]"
-            ]
+            ],
+            "application/vnd.jupyter.widget-view+json": {
+              "version_major": 2,
+              "version_minor": 0,
+              "model_id": "b6448167f80b40508070d3aa8cbc2ea0"
+            }
           },
-          "metadata": {},
-          "output_type": "display_data"
+          "metadata": {}
         },
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
-            "100%|██████████| 220/220 [00:52<00:00,  4.16it/s]"
+            "100%|██████████| 220/220 [01:03<00:00,  3.44it/s]"
           ]
         },
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
             " \n",
             " Hybrid Search with CrossEncoderReranker Hit Rate: 0.6772727272727272\n"
           ]
         },
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
             "\n"
           ]
@@ -1270,7 +1278,7 @@
       ],
       "source": [
         "#WARNING:  This cell takes a long time without CUDA\n",
-        "from lancedb.rerankers import CohereReranker, CrossEncoderReranker\n",
+        "from lancedb.rerankers import JinaReranker, CrossEncoderReranker, CohereReranker\n",
         "\n",
         "reranker = CrossEncoderReranker()\n",
         "hit_rate_hybrid = hit_rate(data, tbl, \"hybrid\", reranker=reranker)\n",
@@ -1283,7 +1291,7 @@
         "id": "3lhePSeMQN-p"
       },
       "source": [
-        "2. Cohere Reranker\n"
+        "2. Jina AI Reranker\n"
       ]
     },
     {
@@ -1294,45 +1302,60 @@
           "base_uri": "https://localhost:8080/"
         },
         "id": "O4L0Lqi2tvZn",
-        "outputId": "beed3339-225d-4255-9f9b-0367d300e177"
+        "outputId": "8ccb169c-0632-4ee0-c039-cac29956af08"
       },
       "outputs": [
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
-            "100%|██████████| 220/220 [01:26<00:00,  2.55it/s]"
+            "100%|██████████| 220/220 [01:24<00:00,  2.60it/s]"
           ]
         },
         {
-          "name": "stdout",
           "output_type": "stream",
+          "name": "stdout",
           "text": [
             " \n",
-            " Hybrid Search with CohereReranker Hit Rate: 0.759090909090909\n"
+            " Hybrid Search with JinaReranker Hit Rate: 0.7681818181818182\n"
           ]
         },
         {
-          "name": "stderr",
           "output_type": "stream",
+          "name": "stderr",
           "text": [
             "\n"
           ]
         }
       ],
       "source": [
-        "# Cohere Reranker\n",
+        "# Jina AI Reranker\n",
         "import os\n",
-        "from lancedb.rerankers import CohereReranker\n",
+        "from lancedb.rerankers import JinaReranker\n",
         "\n",
         "# Colab secret setup\n",
         "from google.colab import userdata\n",
+        "os.environ[\"JINA_API_KEY\"] = userdata.get('JINA_API_KEY')\n",
+        "\n",
+        "reranker = JinaReranker(model_name=\"jina-reranker-v2-base-multilingual\")\n",
+        "hit_rate_hybrid = hit_rate(data, tbl, \"hybrid\", reranker=reranker)\n",
+        "print(f\" \\n Hybrid Search with JinaReranker Hit Rate: {hit_rate_hybrid}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
         "os.environ[\"COHERE_API_KEY\"] = userdata.get('COHERE_API_KEY')\n",
         "\n",
-        "reranker = CohereReranker(model_name=\"rerank-english-v3.0\")\n",
+        "reranker = CohereReranker()\n",
         "hit_rate_hybrid = hit_rate(data, tbl, \"hybrid\", reranker=reranker)\n",
         "print(f\" \\n Hybrid Search with CohereReranker Hit Rate: {hit_rate_hybrid}\")"
-      ]
+      ],
+      "metadata": {
+        "id": "n6VZEU9-HnDp"
+      },
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
@@ -1346,9 +1369,10 @@
         "| --- | --- |\n",
         "| Vector |  0.640 |\n",
         "| FTS   |  0.595  |\n",
-        "| Reranked vector | 0.677    |\n",
-        "| Reranked fts  | 0.672    |\n",
-        "| Hybrid | 0.759 |\n"
+        "| Reranked vector (Cohere Reranker) | 0.677    |\n",
+        "| Reranked fts (Cohere Reranker)  | 0.672    |\n",
+        "| Hybrid (Cohere Reranker) | 0.759 |\n",
+        "| Hybrid (Jina Reranker) | 0.768 |\n"
       ]
     },
     {
@@ -1458,7 +1482,9 @@
     },
     {
       "cell_type": "markdown",
-      "metadata": {},
+      "metadata": {
+        "id": "PJ0qSdCgCGi4"
+      },
       "source": []
     }
   ],
@@ -1478,4 +1504,4 @@
   },
   "nbformat": 4,
   "nbformat_minor": 0
-}
+}
\ No newline at end of file
diff --git a/docs/src/reranking/rrf.md b/docs/src/reranking/rrf.md
new file mode 100644
index 000000000..972c2443d
--- /dev/null
+++ b/docs/src/reranking/rrf.md
@@ -0,0 +1,53 @@
+# Reciprocal Rank Fusion Reranker
+
+Reciprocal Rank Fusion (RRF) is an algorithm that evaluates the search scores by leveraging the positions/rank of the documents. The implementation follows this [paper](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf).
+
+
+!!! note
+    Supported Query Types: Hybrid
+
+
+```python
+import numpy
+import lancedb
+from lancedb.embeddings import get_registry
+from lancedb.pydantic import LanceModel, Vector
+from lancedb.rerankers import RRFReranker
+
+embedder = get_registry().get("sentence-transformers").create()
+db = lancedb.connect("~/.lancedb")
+
+class Schema(LanceModel):
+    text: str = embedder.SourceField()
+    vector: Vector(embedder.ndims()) = embedder.VectorField()
+
+data = [
+    {"text": "hello world"},
+    {"text": "goodbye world"}
+    ]
+tbl = db.create_table("test", schema=Schema, mode="overwrite")
+tbl.add(data)
+reranker = RRFReranker()
+
+# Run hybrid search with a reranker
+tbl.create_fts_index("text", replace=True)
+result = tbl.search("hello", query_type="hybrid").rerank(reranker=reranker).to_list()
+
+```
+
+Accepted Arguments
+----------------
+| Argument | Type | Default | Description |
+| --- | --- | --- | --- |
+| `K` | `int` | `60` | A constant used in the RRF formula (default is 60). Experiments indicate that k = 60 was near-optimal, but that the choice is not critical |
+| `return_score` | str | `"relevance"` | Options are "relevance" or "all". The type of score to return. If "relevance", will return only the `_relevance_score`. If "all", will return all scores from the vector and FTS search along with the relevance score. |
+
+
+## Supported Scores for each query type
+You can specify the type of scores you want the reranker to return. The following are the supported scores for each query type:
+
+### Hybrid Search
+|`return_score`| Status | Description |
+| --- | --- | --- |
+| `relevance` | ✅ Supported | Returned rows only have the `_relevance_score` column |
+| `all` | ✅ Supported | Returned rows have vector(`_distance`) and FTS(`score`) along with Hybrid Search score(`_relevance_score`) |
\ No newline at end of file