diff --git a/docs/src/notebooks/hybrid_search.ipynb b/docs/src/notebooks/hybrid_search.ipynb index 6a3cc32b..d61c3612 100644 --- a/docs/src/notebooks/hybrid_search.ipynb +++ b/docs/src/notebooks/hybrid_search.ipynb @@ -3,11 +3,13 @@ { "cell_type": "markdown", "id": "0daef1cd-9130-46b8-8eb8-1b721860e239", - "metadata": {}, + "metadata": { + "id": "0daef1cd-9130-46b8-8eb8-1b721860e239" + }, "source": [ "# Example - Airbnb financial data search\n", "\n", - "\"Open \n", + "\"Open\n", "\n", "The code below is an example of hybrid search, a search algorithm that combines FTS and vector search in LanceDB.\n", "\n", @@ -18,24 +20,36 @@ "cell_type": "code", "execution_count": null, "id": "819fa612", - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "819fa612", + "outputId": "f5593c76-573f-4a04-d0ce-aac7b7ee1466" + }, "outputs": [], "source": [ "# Setup\n", - "!pip install lancedb pandas langchain langchain-community pypdf openai cohere tiktoken sentence_transformers tantivy==0.20.1" + "!pip install lancedb pandas langchain langchain_openai langchain-community pypdf openai cohere tiktoken sentence_transformers tantivy==0.20.1" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "b6864d97-7f85-4d9c-bf05-e9cf9db29e81", - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "b6864d97-7f85-4d9c-bf05-e9cf9db29e81", + "outputId": "6c6dd78d-3213-4bd8-9e74-5faba902e546" + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " ········\n" + "··········\n" ] } ], @@ -49,24 +63,28 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "id": "cfce9804-cd1c-48c3-acd2-e74eb4e290c7", - "metadata": {}, + "metadata": { + "id": "cfce9804-cd1c-48c3-acd2-e74eb4e290c7" + }, "outputs": [], "source": [ "def pretty_print(docs):\n", " for doc in docs:\n", - " print(doc + \"\\n\\n\") " + " print(doc + \"\\n\\n\")" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "efb22cec-5a06-46ac-91c3-53f9b9090109", - "metadata": {}, + "metadata": { + "id": "efb22cec-5a06-46ac-91c3-53f9b9090109" + }, "outputs": [], "source": [ - "from langchain.document_loaders import PyPDFLoader\n", + "from langchain_community.document_loaders import PyPDFLoader\n", "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "\n", "# Load $ABNB's financial report. This may take 1-2 minutes since the PDF is large\n", @@ -85,64 +103,60 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "d3c5ce69-0f75-44cb-9e49-9be665fc156e", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[2024-02-12T20:00:04Z WARN lance::dataset] No existing dataset at /Users/ayushchaurasia/langchain/airbnb.lance, it will be created\n" - ] - } - ], + "metadata": { + "id": "d3c5ce69-0f75-44cb-9e49-9be665fc156e" + }, + "outputs": [], "source": [ "from langchain_community.vectorstores import LanceDB\n", - "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain_openai import OpenAIEmbeddings\n", "import lancedb\n", - "from lancedb.embeddings import get_registry\n", - "from lancedb.pydantic import Vector, LanceModel\n", "\n", - "openai = get_registry().get(\"openai\").create()\n", - "\n", - "class Schema(LanceModel):\n", - " text: str = openai.SourceField()\n", - " vector: Vector(openai.ndims()) = openai.VectorField()\n", "\n", "embedding_function = OpenAIEmbeddings()\n", "\n", "db = lancedb.connect(\"~/langchain\")\n", - "table = db.create_table(\n", - " \"airbnb\",\n", - " schema=Schema,\n", - " mode=\"overwrite\",\n", - ")\n", "\n", "# Load the document into LanceDB\n", - "db = LanceDB.from_documents(docs, embedding_function, connection=table)" + "db = LanceDB.from_documents(docs, embedding_function, connection=db)" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "4284e67e-3a39-4486-a060-11a18f7c0e1f", + "execution_count": null, + "id": "4bd12fb8", "metadata": {}, "outputs": [], "source": [ + "table = db._table\n", "table.create_fts_index(\"text\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 19, "id": "d959a80f-d568-48f4-9d14-7367bcc1ce8d", - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 293 + }, + "id": "d959a80f-d568-48f4-9d14-7367bcc1ce8d", + "outputId": "b02f837d-7fa6-4ec2-b283-e170f5f67637" + }, "outputs": [ { "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"table\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"ddcfa6b1-3de8-4933-a187-6aa7b7ae87b4\",\n \"47f5dd55-b3e7-4879-afba-5ca9eea7341b\",\n \"c391b1e1-6f66-41f2-82ff-18db5a218303\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Class A common stock, par value $0.0001 per share ABNB The Nasdaq Stock Market\\nSecurities registered pursuant to Section 12(g) of the Act:\\nNone______________\\nIndicate by check mark if the registrant is a well-known seasoned issuer, as defined in Rule 405 of the Securities Act. Yes \\u2612 No \\u2610 \\nIndicate by check mark if the registrant is not required to file reports pursuant to Section 13 or Section 15(d) of the Exchange Act. Yes \\u2610 No \\u2612 \\nIndicate by check mark whether the registrant (1) has filed all reports required to be filed by Section 13 or 15(d) of the Securities Exchange Act of 1934 during the preceding 12\\nmonths (or for such shorter period that the registrant was required to file such reports), and (2) has been subject to such filing requirements for the past 90 days. Yes \\u2612 No \\u2610 \\nIndicate by check mark whether the registrant has submitted electronically every Interactive Data File required to be submitted pursuant to Rule 405 of Regulation S-T (\\u00a7 232.405 of\",\n \"As of June 30, 2022, the aggregate market value of the Class A common stock held by non-affiliates of the registrant was approximately $35.1 billion based upon the closing price\\nreported for such date on the NASDAQ Global Select Market.\\nAs of February 3, 2023, 408,928,427 shares of the registrant's Class A common stock were outstanding 222,400,067 shares of the registrant's Class B common stock were\\noutstanding, no shares of the registrant\\u2019s Class C common stock were outstanding, and 9,200,000 shares of the registrant\\u2019s Class H common stock were outstanding.\\n______________\\nDOCUMENTS INCORPORATED BY REFERENCE\\nThe information required by Part III of this Report, to the extent not set forth herein, is incorporated herein by reference from the registrant\\u2019s definitive proxy statement relating to the\",\n \"this chapter) during the preceding 12 months (or for such shorter period that the registrant was required to submit such files). Yes \\u2612 No \\u2610 \\nIndicate by check mark whether the registrant is a large accelerated filer, an accelerated filer, a non-accelerated filer, a smaller reporting company, or an emerging growth company.\\nSee the definitions of \\u201clarge accelerated filer,\\u201d \\u201caccelerated filer,\\u201d \\u201csmaller reporting company\\u201d and \\u201cemerging growth company\\u201d in Rule 12b-2 of the Exchange Act.\\nLarge accelerated filer \\u2612 Accelerated filer\\u2610 \\nNon-accelerated filer \\u2610 Smaller reporting company\\u2610 \\nEmerging growth company\\u2610 \\nIf an emerging growth company, indicate by check mark if the registrant has elected not to use the extended transition period for complying with any new or revised financial\\naccounting standards provided pursuant to Section 13(a) of the Exchange Act. \\u2610\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"metadata\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], "text/plain": [ + " vector \\\n", + "0 [-0.0016961554, -0.03531899, 0.011809787, -0.0... \n", + "1 [-0.021446472, -0.021045355, 0.010823516, -0.0... \n", + "2 [-0.020018686, -0.014233166, -0.010991167, -0.... \n", + "3 [-0.019061018, -0.0022632438, -0.011158161, -0... \n", + "4 [-0.015733723, -0.012287037, -0.004055117, -0.... \n", + "\n", + " id \\\n", + "0 5c66d086-0fed-4270-a91b-c2b67b3ed052 \n", + "1 ddcfa6b1-3de8-4933-a187-6aa7b7ae87b4 \n", + "2 c391b1e1-6f66-41f2-82ff-18db5a218303 \n", + "3 3e896a62-8631-4a54-86bd-ee2f69f3b373 \n", + "4 47f5dd55-b3e7-4879-afba-5ca9eea7341b \n", + "\n", " text \\\n", "0 Table of Contents\\nUNITED STATES\\nSECURITIES A... \n", "1 Class A common stock, par value $0.0001 per sh... \n", @@ -202,15 +451,15 @@ "3 Indicate by check mark whether the registrant ... \n", "4 As of June 30, 2022, the aggregate market valu... \n", "\n", - " vector \n", - "0 [-0.003405824, -0.03212391, 0.012812538, -0.02... \n", - "1 [-0.019193485, -0.02273649, 0.009623382, -0.02... \n", - "2 [-0.020692078, -0.016187502, -0.008877442, -0.... \n", - "3 [-0.019304628, -0.0034501317, -0.011525051, -0... \n", - "4 [-0.014594535, -0.011274607, -0.007967828, -0.... " + " metadata \n", + "0 {'page': 0, 'source': 'https://d18rn0p25nwr6d.... \n", + "1 {'page': 0, 'source': 'https://d18rn0p25nwr6d.... \n", + "2 {'page': 0, 'source': 'https://d18rn0p25nwr6d.... \n", + "3 {'page': 0, 'source': 'https://d18rn0p25nwr6d.... \n", + "4 {'page': 1, 'source': 'https://d18rn0p25nwr6d.... " ] }, - "execution_count": 12, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -222,7 +471,9 @@ { "cell_type": "markdown", "id": "667f4e4a-6ff1-4f1c-ad57-4a2a8b036670", - "metadata": {}, + "metadata": { + "id": "667f4e4a-6ff1-4f1c-ad57-4a2a8b036670" + }, "source": [ "## Vector Search\n", "\n", @@ -231,27 +482,35 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 30, "id": "8a5ab2de-6d75-4785-b838-ed6a825dfa6e", - "metadata": {}, + "metadata": { + "id": "8a5ab2de-6d75-4785-b838-ed6a825dfa6e" + }, "outputs": [], "source": [ - "query = \"What are the specific factors contributing to Airbnb's increased operational expenses in the last fiscal year?\"\n", - "docs = table.search(query).limit(5).to_pandas()[\"text\"].to_list()" + "str_query = \"What are the specific factors contributing to Airbnb's increased operational expenses in the last fiscal year?\"\n", + "query = embedding_function.embed_query(str_query)\n", + "docs = table.search(query, query_type=\"vector\").limit(5).to_pandas()[\"text\"].to_list()" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 31, "id": "5423d333-0f6d-4951-ab3f-6941ad30ba8a", - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5423d333-0f6d-4951-ab3f-6941ad30ba8a", + "outputId": "79557d98-85d1-4a18-db42-10d7adffb7c2" + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "In addition, the number of listings on Airbnb may decline as a result of a number of other factors affecting Hosts, including: the COVID-19 pandemic; enforcement or threatened\n", - "enforcement of laws and regulations, including short-term occupancy and tax laws; private groups, such as homeowners, landlords, and condominium and neighborhood\n", + "In addition, the number of listings on Airbnb may decline as a result of a number of other factors affecting Hosts, including: the COVID-19 pandemic; enforcement or threatenedenforcement of laws and regulations, including short-term occupancy and tax laws; private groups, such as homeowners, landlords, and condominium and neighborhood\n", "associations, adopting and enforcing contracts that prohibit or restrict home sharing; leases, mortgages, and other agreements, or regulations that purport to ban or otherwise restrict\n", "home sharing; Hosts opting for long-term rentals on other third-party platforms as an alternative to listing on our platform; economic, social, and political factors; perceptions of trust\n", "and safety on and off our platform; negative experiences with guests, including guests who damage Host property, throw unauthorized parties, or engage in violent and unlawful\n", @@ -271,12 +530,20 @@ "\n", "Our success depends significantly on existing guests continuing to book and attracting new guests to book on our platform. Our ability to attract and retain guests could be materially\n", "adversely affected by a number of factors discussed elsewhere in these “Risk Factors,” including:\n", - "•events beyond our control such as the ongoing COVID-19 pandemic, other pandemics and health concerns, restrictions on travel, immigration, trade disputes, economic\n", - "downturns, and the impact of climate change on travel including the availability of preferred destinations and the increase in the frequency and severity of weather-related\n", - "events, including fires, floods, droughts, extreme temperatures and ambient temperature increases, severe weather and other natural disasters, and the impact of other\n", + "• events beyond our control such as the ongoing COVID-19 pandemic, other pandemics and health concerns, restrictions on travel, immigration, trade disputes, economic\n", + "downturns, and the impact of climate change on travel including the availability of preferred destinations and the increase in the frequency and severity of weather-relatedevents, including fires, floods, droughts, extreme temperatures and ambient temperature increases, severe weather and other natural disasters, and the impact of other\n", "climate change on seasonal destinations;\n", - "•political, social, or economic instability;\n", - "•Hosts failing to meet guests’ expectations, including increased expectations for cleanliness in light of the COVID-19 pandemic;\n", + "• political, social, or economic instability;\n", + "\n", + "\n", + "• Hosts failing to meet guests’ expectations, including increased expectations for cleanliness in light of the COVID-19 pandemic;• increased competition and use of our competitors’ platforms and services;\n", + "• Hosts failing to provide differentiated, high-quality, and an adequate supply of stays or experiences at competitive prices;\n", + "• guests not receiving timely and adequate community support from us;\n", + "• our failure to provide new or enhanced offerings, tiers, or features that guests value;\n", + "• declines or inefficiencies in our marketing efforts;• negative associations with, or reduced awareness of, our brand;\n", + "• actual or perceived discrimination by Hosts in deciding whether to accept a requested reservation;\n", + "• negative perceptions of the trust and safety on our platform; and\n", + "• macroeconomic and other conditions outside of our control affecting travel and hospitality industries generally.\n", "\n", "\n", "Table of Contents\n", @@ -305,15 +572,6 @@ "Basic $ (16.12)$ (0.57)$ 2.97 \n", "Diluted $ (16.12)$ (0.57)$ 2.79\n", "\n", - "\n", - "Our future revenue growth depends on the growth of supply and demand for listings on our platform, and our business is affected by general economic and business conditions\n", - "worldwide as well as trends in the global travel and hospitality industries and the short and long-term accommodation regulatory landscape. In addition, we believe that our revenue\n", - "growth depends upon a number of factors, including:\n", - "•global macroeconomic conditions, including inflation and rising interest rates and recessionary concerns;\n", - "•our ability to retain and grow the number of guests and Nights and Experiences Booked;\n", - "•our ability to retain and grow the number of Hosts and the number of available listings on our platform;\n", - "•events beyond our control such as pandemics and other health concerns, restrictions on travel and immigration, political, social or economic instability, including international\n", - "\n", "\n" ] } @@ -325,7 +583,9 @@ { "cell_type": "markdown", "id": "8b0150fe-00dc-4aa0-9c8f-33cbf2ed5ac6", - "metadata": {}, + "metadata": { + "id": "8b0150fe-00dc-4aa0-9c8f-33cbf2ed5ac6" + }, "source": [ "## Hybrid Search\n", "LanceDB support hybrid search with custom Rerankers. Here's the summary of latency numbers of some of the Reranking methods available\n", @@ -342,63 +602,62 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 32, "id": "d2aa5893-30c4-4beb-9dae-a55665bd82c7", - "metadata": {}, + "metadata": { + "id": "d2aa5893-30c4-4beb-9dae-a55665bd82c7" + }, "outputs": [], "source": [ - "docs = table.search(query, query_type=\"hybrid\").limit(5).to_pandas()[\"text\"].to_list()" + "docs = table.search(query_type=\"hybrid\").vector(query).text(str_query).limit(5).to_pandas()[\"text\"].to_list()" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 33, "id": "8d6a99c3-92ef-4677-96bb-9b54a11a79fe", - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8d6a99c3-92ef-4677-96bb-9b54a11a79fe", + "outputId": "72f74e97-efb9-4bf2-c612-3dbbf33312bc" + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "In addition, the number of listings on Airbnb may decline as a result of a number of other factors affecting Hosts, including: the COVID-19 pandemic; enforcement or threatened\n", - "enforcement of laws and regulations, including short-term occupancy and tax laws; private groups, such as homeowners, landlords, and condominium and neighborhood\n", + "In addition, the number of listings on Airbnb may decline as a result of a number of other factors affecting Hosts, including: the COVID-19 pandemic; enforcement or threatenedenforcement of laws and regulations, including short-term occupancy and tax laws; private groups, such as homeowners, landlords, and condominium and neighborhood\n", "associations, adopting and enforcing contracts that prohibit or restrict home sharing; leases, mortgages, and other agreements, or regulations that purport to ban or otherwise restrict\n", "home sharing; Hosts opting for long-term rentals on other third-party platforms as an alternative to listing on our platform; economic, social, and political factors; perceptions of trust\n", "and safety on and off our platform; negative experiences with guests, including guests who damage Host property, throw unauthorized parties, or engage in violent and unlawful\n", "\n", "\n", - "(a) The Borrower may, at its election, deliver a Pricing Certificate to the Administrative Agent in respect of the most recently\n", - "ended fiscal year, commencing with the fiscal year ended December 31, 2022, on any date prior to the date that is 270 days following the last\n", - "day of such fiscal year (the\n", - "-50-\n", + "“Initial Delivery Date”); provided that the Pricing Certificate for any fiscal year may be delivered on any date following the Initial DeliveryDate that is prior to the date that is 365 days following the last day of the preceding fiscal year, so long as such Pricing Certificate includes acertification that delivery of such Pricing Certificate on or before the Initial Delivery Date was not possible because (i) the informationrequired to calculate the KPI Metrics for such preceding fiscal year was not available at such time or (ii) the report of the KPI Metrics Auditor,if relevant, was not available at such time (the date of the Administrative Agent’s receipt thereof, each a “Pricing Certificate Date”). Upondelivery of a Pricing Certificate in respect of a fiscal year, (i) the Applicable Rate for the Loans incurred by the Borrower shall be increased ordecreased (or neither increased nor decreased), as applicable, pursuant to the Sustainability Margin Adjustment as set forth in the KPI MetricsCertificate\n", "\n", "\n", - "“Initial Delivery Date”); provided that the Pricing Certificate for any fiscal year may be delivered on any date following the Initial Delivery\n", - "Date that is prior to the date that is 365 days following the last day of the preceding fiscal year, so long as such Pricing Certificate includes a\n", - "certification that delivery of such Pricing Certificate on or before the Initial Delivery Date was not possible because (i) the information\n", - "required to calculate the KPI Metrics for such preceding fiscal year was not available at such time or (ii) the report of the KPI Metrics Auditor,\n", - "if relevant, was not available at such time (the date of the Administrative Agent’s receipt thereof, each a “Pricing Certificate Date”). Upon\n", - "delivery of a Pricing Certificate in respect of a fiscal year, (i) the Applicable Rate for the Loans incurred by the Borrower shall be increased or\n", - "decreased (or neither increased nor decreased), as applicable, pursuant to the Sustainability Margin Adjustment as set forth in the KPI Metrics\n", + "Made Possible by Hosts, Strangers, AirCover, Categories, and OMG marketing campaigns and launches, a $67.9 million increase in our search engine marketing and advertising\n", + "spend, a $25.1 million increase in payroll-related expenses due to growth in headcount and increase in compensation costs, a $22.0 million increase in third-party service provider\n", + "expenses, and a $11.1 million increase in coupon expense in line with increase in revenue and launch of AirCover for guests, partially offset by a decrease of $22.9 million related to\n", + "the changes in the fair value of contingent consideration related to a 2019 acquisition.\n", + "General and Administrative\n", + "2021 2022 % Change\n", + "(in millions, except percentages)\n", + "General and administrative $ 836 $ 950 14 %\n", + "Percentage of revenue 14 % 11 %\n", + "General and administrative expense increased $114.0 million, or 14%, in 2022 compared to 2021, primarily due to an increase in other business and operational taxes of $41.3\n", "\n", "\n", - "including such Sustainability Pricing Adjustment Date and ending on the date immediately preceding the next Sustainability Pricing\n", - "Adjustment Date.\n", - "(b) For the avoidance of doubt, only one Pricing Certificate may be delivered in respect of any fiscal year. It is further understood\n", - "and agreed that the Applicable Rate for Loans incurred by the Borrower will never be reduced or increased by more than 0.050% and that the\n", - "Applicable Rate for the Revolving Commitment Fee will never be reduced or increased by more than 0.010%, pursuant to the Sustainability\n", - "Margin Adjustment and the Sustainability Fee Adjustment, respectively, on any Sustainability Pricing Adjustment Date. For the avoidance of\n", - "doubt, any adjustment to the Applicable Rate for such Loans or such Revolving Commitment Fee by reason of meeting one or both KPI\n", - "Metrics in any fiscal year shall not be cumulative year-over-year. The adjustments pursuant to this Section made on any Sustainability Pricing\n", + "(c) If, for any fiscal year, either (i) no Pricing Certificate shall have been delivered for such fiscal year or (ii) the PricingCertificate delivered for such fiscal year shall fail to include the Diverse Supplier Spend Percentage or GHG Emissions Intensity for suchfiscal year, then the Sustainability Margin Adjustment will be positive 0.050% and/or the Sustainability Fee Adjustment will be positive0.010%, as applicable, in each case commencing on the last day such Pricing Certificate could have been delivered in accordance with theterms of clause (a) above (it being understood that, in the case of the foregoing clause (ii), the Sustainability Margin Adjustment or theSustainability Fee Adjustment will be determined in accordance with such Pricing Certificate to the extent the (A) Sustainability MarginAdjustment or the Sustainability Fee Adjustment is included in such Pricing Certificate and (B) the Administrative Agent has separatelyreceived the Diverse Supplier Spend Percentage and/or GHG Emissions\n", "\n", "\n", - "Adjustment Date shall only apply for the period until the date immediately preceding the next Sustainability Pricing Adjustment Date.\n", - "(c) If, for any fiscal year, either (i) no Pricing Certificate shall have been delivered for such fiscal year or (ii) the Pricing\n", - "Certificate delivered for such fiscal year shall fail to include the Diverse Supplier Spend Percentage or GHG Emissions Intensity for such\n", - "fiscal year, then the Sustainability Margin Adjustment will be positive 0.050% and/or the Sustainability Fee Adjustment will be positive\n", - "0.010%, as applicable, in each case commencing on the last day such Pricing Certificate could have been delivered in accordance with the\n", - "terms of clause (a) above (it being understood that, in the case of the foregoing clause (ii), the Sustainability Margin Adjustment or the\n", - "Sustainability Fee Adjustment will be determined in accordance with such Pricing Certificate to the extent the (A) Sustainability Margin\n", + "Our success depends significantly on existing guests continuing to book and attracting new guests to book on our platform. Our ability to attract and retain guests could be materially\n", + "adversely affected by a number of factors discussed elsewhere in these “Risk Factors,” including:\n", + "• events beyond our control such as the ongoing COVID-19 pandemic, other pandemics and health concerns, restrictions on travel, immigration, trade disputes, economic\n", + "downturns, and the impact of climate change on travel including the availability of preferred destinations and the increase in the frequency and severity of weather-relatedevents, including fires, floods, droughts, extreme temperatures and ambient temperature increases, severe weather and other natural disasters, and the impact of other\n", + "climate change on seasonal destinations;\n", + "• political, social, or economic instability;\n", "\n", "\n" ] @@ -411,7 +670,9 @@ { "cell_type": "markdown", "id": "c4d3e0f3-8d96-47f5-ad1d-514475f1ae55", - "metadata": {}, + "metadata": { + "id": "c4d3e0f3-8d96-47f5-ad1d-514475f1ae55" + }, "source": [ "### Cohere Reranker\n", "This uses Cohere's Reranking API to re-rank the results. It accepts the reranking model name as a parameter. By Default it uses the english-v3 model but you can easily switch to a multi-lingual model.\n", @@ -421,15 +682,21 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 34, "id": "ce2c43c7-1a96-4856-ad9b-28385164f187", - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ce2c43c7-1a96-4856-ad9b-28385164f187", + "outputId": "5316fdc4-8930-45aa-af1d-6f1faef4e97e" + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " ········\n" + "··········\n" ] } ], @@ -440,22 +707,30 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 35, "id": "4adbb3f1-4d21-427b-9bf0-3d7bebf68cf6", - "metadata": {}, + "metadata": { + "id": "4adbb3f1-4d21-427b-9bf0-3d7bebf68cf6" + }, "outputs": [], "source": [ "from lancedb.rerankers import CohereReranker\n", "\n", "reranker = CohereReranker()\n", - "docs = table.search(query, query_type=\"hybrid\").limit(5).rerank(reranker=reranker).to_pandas()[\"text\"].to_list()" + "docs = table.search(query_type=\"hybrid\").vector(query).text(str_query).limit(5).rerank(reranker).to_pandas()[\"text\"].to_list()" ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 36, "id": "a071b3e7-3b8b-42e4-a089-4d6c4094873f", - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "a071b3e7-3b8b-42e4-a089-4d6c4094873f", + "outputId": "2d9066f3-8290-431d-ae08-0d17dad805f7" + }, "outputs": [ { "name": "stdout", @@ -480,6 +755,16 @@ "General and administrative expense increased $114.0 million, or 14%, in 2022 compared to 2021, primarily due to an increase in other business and operational taxes of $41.3\n", "\n", "\n", + "• Hosts failing to meet guests’ expectations, including increased expectations for cleanliness in light of the COVID-19 pandemic;• increased competition and use of our competitors’ platforms and services;\n", + "• Hosts failing to provide differentiated, high-quality, and an adequate supply of stays or experiences at competitive prices;\n", + "• guests not receiving timely and adequate community support from us;\n", + "• our failure to provide new or enhanced offerings, tiers, or features that guests value;\n", + "• declines or inefficiencies in our marketing efforts;• negative associations with, or reduced awareness of, our brand;\n", + "• actual or perceived discrimination by Hosts in deciding whether to accept a requested reservation;\n", + "• negative perceptions of the trust and safety on our platform; and\n", + "• macroeconomic and other conditions outside of our control affecting travel and hospitality industries generally.\n", + "\n", + "\n", "Table of Contents\n", "Airbnb, Inc.\n", "Consolidated Statements of Operations\n", @@ -509,19 +794,10 @@ "\n", "Our success depends significantly on existing guests continuing to book and attracting new guests to book on our platform. Our ability to attract and retain guests could be materially\n", "adversely affected by a number of factors discussed elsewhere in these “Risk Factors,” including:\n", - "•events beyond our control such as the ongoing COVID-19 pandemic, other pandemics and health concerns, restrictions on travel, immigration, trade disputes, economic\n", - "downturns, and the impact of climate change on travel including the availability of preferred destinations and the increase in the frequency and severity of weather-related\n", - "events, including fires, floods, droughts, extreme temperatures and ambient temperature increases, severe weather and other natural disasters, and the impact of other\n", + "• events beyond our control such as the ongoing COVID-19 pandemic, other pandemics and health concerns, restrictions on travel, immigration, trade disputes, economic\n", + "downturns, and the impact of climate change on travel including the availability of preferred destinations and the increase in the frequency and severity of weather-relatedevents, including fires, floods, droughts, extreme temperatures and ambient temperature increases, severe weather and other natural disasters, and the impact of other\n", "climate change on seasonal destinations;\n", - "•political, social, or economic instability;\n", - "•Hosts failing to meet guests’ expectations, including increased expectations for cleanliness in light of the COVID-19 pandemic;\n", - "\n", - "\n", - "In addition, the number of listings on Airbnb may decline as a result of a number of other factors affecting Hosts, including: the COVID-19 pandemic; enforcement or threatened\n", - "enforcement of laws and regulations, including short-term occupancy and tax laws; private groups, such as homeowners, landlords, and condominium and neighborhood\n", - "associations, adopting and enforcing contracts that prohibit or restrict home sharing; leases, mortgages, and other agreements, or regulations that purport to ban or otherwise restrict\n", - "home sharing; Hosts opting for long-term rentals on other third-party platforms as an alternative to listing on our platform; economic, social, and political factors; perceptions of trust\n", - "and safety on and off our platform; negative experiences with guests, including guests who damage Host property, throw unauthorized parties, or engage in violent and unlawful\n", + "• political, social, or economic instability;\n", "\n", "\n" ] @@ -534,21 +810,36 @@ { "cell_type": "markdown", "id": "6630f0c0-6070-4ea7-a191-99092e69ca05", - "metadata": {}, + "metadata": { + "id": "6630f0c0-6070-4ea7-a191-99092e69ca05" + }, "source": [ "Relevance score is returned by Cohere API and is independent of individual FTS and vector search scores." ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 38, "id": "80dc61bb-929c-4fbb-b2cb-20c5d31bc65c", - "metadata": {}, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 293 + }, + "id": "80dc61bb-929c-4fbb-b2cb-20c5d31bc65c", + "outputId": "d09dab34-7756-4c58-8731-5683b3ca7044" + }, "outputs": [ { "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"table\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"id\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"a91b3506-39a2-4b19-8409-08333d83a1c6\",\n \"1694d5a5-7ece-40b8-8022-dc3fa9aaa05a\",\n \"fcc532b9-347b-4e36-8ae8-5a2a726bf574\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Made Possible by Hosts, Strangers, AirCover, Categories, and OMG marketing campaigns and launches, a $67.9 million increase in our search engine marketing and advertising\\nspend, a $25.1 million increase in payroll-related expenses due to growth in headcount and increase in compensation costs, a $22.0 million increase in third-party service provider\\nexpenses, and a $11.1 million increase in coupon expense in line with increase in revenue and launch of AirCover for guests, partially offset by a decrease of $22.9 million related to\\nthe changes in the fair value of contingent consideration related to a 2019 acquisition.\\nGeneral and Administrative\\n2021 2022 % Change\\n(in millions, except percentages)\\nGeneral and administrative $ 836 $ 950 14 %\\nPercentage of revenue 14 % 11 %\\nGeneral and administrative expense increased $114.0 million, or 14%, in 2022 compared to 2021, primarily due to an increase in other business and operational taxes of $41.3\",\n \"Our success depends significantly on existing guests continuing to book and attracting new guests to book on our platform. Our ability to attract and retain guests could be materially\\nadversely affected by a number of factors discussed elsewhere in these \\u201cRisk Factors,\\u201d including:\\n\\u2022 events beyond our control such as the ongoing COVID-19 pandemic, other pandemics and health concerns, restrictions on travel, immigration, trade disputes, economic\\ndownturns, and the impact of climate change on travel including the availability of preferred destinations and the increase in the frequency and severity of weather-relatedevents, including fires, floods, droughts, extreme temperatures and ambient temperature increases, severe weather and other natural disasters, and the impact of other\\nclimate change on seasonal destinations;\\n\\u2022 political, social, or economic instability;\",\n \"\\u2022 Hosts failing to meet guests\\u2019 expectations, including increased expectations for cleanliness in light of the COVID-19 pandemic;\\u2022 increased competition and use of our competitors\\u2019 platforms and services;\\n\\u2022 Hosts failing to provide differentiated, high-quality, and an adequate supply of stays or experiences at competitive prices;\\n\\u2022 guests not receiving timely and adequate community support from us;\\n\\u2022 our failure to provide new or enhanced offerings, tiers, or features that guests value;\\n\\u2022 declines or inefficiencies in our marketing efforts;\\u2022 negative associations with, or reduced awareness of, our brand;\\n\\u2022 actual or perceived discrimination by Hosts in deciding whether to accept a requested reservation;\\n\\u2022 negative perceptions of the trust and safety on our platform; and\\n\\u2022 macroeconomic and other conditions outside of our control affecting travel and hospitality industries generally.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"metadata\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_relevance_score\",\n \"properties\": {\n \"dtype\": \"float32\",\n \"num_unique_values\": 5,\n \"samples\": [\n 0.9790357351303101,\n 0.5007786750793457,\n 0.961605966091156\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe" + }, "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], "text/plain": [ + " vector \\\n", + "0 [0.0034929817, -0.024774546, 0.012623285, -0.0... \n", + "1 [-0.0042489874, -0.005382498, 0.007190078, -0.... \n", + "2 [0.0076079983, -0.013340506, 0.018701892, -0.0... \n", + "3 [-0.008694107, -0.01993283, 0.014201017, -0.02... \n", + "4 [0.005813433, -0.028278675, 0.018041687, -0.02... \n", + "\n", + " id \\\n", + "0 18d4a926-99d9-447f-8b57-264d7a148bd7 \n", + "1 a91b3506-39a2-4b19-8409-08333d83a1c6 \n", + "2 fcc532b9-347b-4e36-8ae8-5a2a726bf574 \n", + "3 72b844e2-cc93-4495-bb67-c2c1a1fd6532 \n", + "4 1694d5a5-7ece-40b8-8022-dc3fa9aaa05a \n", + "\n", " text \\\n", "0 Increased operating expenses, decreased revenu... \n", "1 Made Possible by Hosts, Strangers, AirCover, C... \n", - "2 Table of Contents\\nAirbnb, Inc.\\nConsolidated ... \n", - "3 Our success depends significantly on existing ... \n", - "4 In addition, the number of listings on Airbnb ... \n", + "2 • Hosts failing to meet guests’ expectations, ... \n", + "3 Table of Contents\\nAirbnb, Inc.\\nConsolidated ... \n", + "4 Our success depends significantly on existing ... \n", "\n", - " vector _relevance_score \n", - "0 [0.0034929817, -0.024774546, 0.012623285, -0.0... 0.985328 \n", - "1 [-0.0042489874, -0.005382498, 0.007190078, -0.... 0.979036 \n", - "2 [-0.008569201, -0.019810658, 0.014144964, -0.0... 0.696578 \n", - "3 [0.0027109187, -0.028220002, 0.022864284, -0.0... 0.539923 \n", - "4 [0.0068983347, -0.0147690065, 0.042441186, -0.... 0.460713 " + " metadata _relevance_score \n", + "0 {'page': 18, 'source': 'https://d18rn0p25nwr6d... 0.985328 \n", + "1 {'page': 62, 'source': 'https://d18rn0p25nwr6d... 0.979036 \n", + "2 {'page': 11, 'source': 'https://d18rn0p25nwr6d... 0.961606 \n", + "3 {'page': 72, 'source': 'https://d18rn0p25nwr6d... 0.696578 \n", + "4 {'page': 11, 'source': 'https://d18rn0p25nwr6d... 0.500779 " ] }, - "execution_count": 26, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "table.search(query, query_type=\"hybrid\").limit(5).rerank(reranker=reranker).to_pandas()" + "table.search(query_type=\"hybrid\").vector(query).text(str_query).limit(5).rerank(reranker).to_pandas()" ] }, { "cell_type": "markdown", "id": "41147a46-7ef8-4266-9cec-08a992697de2", - "metadata": {}, + "metadata": { + "id": "41147a46-7ef8-4266-9cec-08a992697de2" + }, "source": [ "### ColBERT Reranker\n", "Colber Reranker is powered by ColBERT model. It runs locally using the huggingface implementation.\n", @@ -646,65 +1174,246 @@ }, { "cell_type": "code", - "execution_count": 29, - "id": "91b06b43-c971-4177-b62f-f941bbbc2ef4", - "metadata": {}, - "outputs": [], - "source": [ - "from lancedb.rerankers import ColbertReranker\n", - "\n", - "reranker = ColbertReranker()\n", - "docs = table.search(query, query_type=\"hybrid\").limit(5).rerank(reranker=reranker).to_pandas()[\"text\"].to_list()" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "e42c46bd-7cdd-4d31-9dbb-ddd1bdf979fa", - "metadata": {}, + "execution_count": 48, + "id": "zsV14JRXB0Xs", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zsV14JRXB0Xs", + "outputId": "4900098d-0dd9-4a5b-9bec-14bad0a81f23" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting rerankers\n", + " Downloading rerankers-0.6.0-py3-none-any.whl.metadata (28 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from rerankers) (2.9.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from rerankers) (4.66.6)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->rerankers) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.23.4 in /usr/local/lib/python3.10/dist-packages (from pydantic->rerankers) (2.23.4)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic->rerankers) (4.12.2)\n", + "Downloading rerankers-0.6.0-py3-none-any.whl (41 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/41.1 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.1/41.1 kB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: rerankers\n", + "Successfully installed rerankers-0.6.0\n" + ] + } + ], + "source": [ + "!pip install rerankers" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "91b06b43-c971-4177-b62f-f941bbbc2ef4", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 334, + "referenced_widgets": [ + "d7770b21d69c4c45b6779a0a79a8d9c2", + "054a0b2d45914048a8ec508d9803c56c", + "add7e408b1734ea9a1d046ce4c39ef9e", + "47d9fc2c4dc44cc8a4d0f4a75f942c8a", + "a503998424394f34ba6f7c112f1c6efa", + "55c408272dd2489a99c831ff20ec9340", + "045197c90d5a4ccf8c0221a4020235da", + "3de12712877c483e8bb984d6eb004847", + "b26c3c7727204c22bd53e0111d221b86", + "fa2e6681eb8a41f1ba4cb22d46e1b7bc", + "c1b1423adfe140ec992dcd1a4ad36b1b", + "55a4200d05054bb295dc66d24acb10e8", + "3b396c309a054c1b8d16dc3ee3a97483", + "56ca1e56bc534e0db28f7e353e58b994", + "75a5a250d8dd408295e2897c22486091", + "fb8ffbf0ab8d4f43a09ee3a11ccdb950", + "1951dc337f584cafac599de1b73288ad", + "9ecf864eed024658bd5201d04d462564", + "7a532995568348b7b608844198848c9f", + "44b7024056de482a858b87ea8c2188f1", + "9acf476e0a7c4df59abd7ef2972775bf", + "c18d77fb1b8541eb937bcf4308992645", + "880662def20c41d1a419774b19ca259d", + "fc696ae381a54f6780f676981716e0f9", + "61f71d9f955c460283524a992d0492e1", + "7b5c08fc36f24bda9c44a89595bce448", + "d4c9ee37d58440b8a69207c90df95736", + "5f2fdccfc6f0445dbd08d92b389d96fc", + "7b5540ac6b4c4fd783e37765f4c51d77", + "5a4c3f55d98d4a3e97bc3dfef3a55dcf", + "44ba4ebbc71d4226a22ec4fc23bc8768", + "24d18127b4404cddb12b4a611bd20be8", + "ceb13b8e32774db183607d1424b9474f", + "b1b529b66911404792162c32b26245a1", + "63473a0152974d22aaa9e9396ef3e728", + "5defb1cd326f4b709d465e2b48a9edef", + "cb2942f98e354d788ae32d01fc59f5d5", + "b4a3413fddb84b62878a66241dbd4efb", + "6fb0e36f0283419f85a2b842b5a2b921", + "b5d6eb4b788946a8bf02c7ddb868eb32", + "7f138f0139294632bb50d446c164c967", + "cde7e23ade984cae9271d5db2a34a197", + "0851ea7f08724be4aa539fc91d66c642", + "a3d614e482994e28bad9644a6787b8db", + "d32c13984f204801860c6ac4563061bb", + "b49348ffe6ad4c5988dbc951f0bd22fd", + "3176ffcda7554b989b0cd23ad340bd92", + "6879ceca561f49e487cf05a3e7920f60", + "df82d77a80c14b9382c8674447447327", + "1cce1e346ff34714a489da3df078a703", + "e0f6ca6fe4e14037a3d513fbce236449", + "5d93708be5a94059b98de774aa5daa71", + "cd3763632f854fe4ada2952ca09245fb", + "69b7322fdcbe49f8a667707e3e0a3249", + "317ed2e5ae444ac5ba4fc07d360644b0", + "a80141d12cb94a2ca9564dd32af3b261", + "622bc2e5edee47c494265d35d0287d39", + "4299ffb4264c46b281ec1dda63a67e5c", + "77d0016687644feabf563a66eb40da85", + "47f0601a33c24f06b3bb10c6cd8dd9b3", + "dfdb3742e4174ab7b67dd83a5ae99711", + "a900bdccbb5141699ab7954a83a06d18", + "09ea0032eddc47bb84bc7d7f3cf7a2d6", + "7f9eacfcf78b42aa91fbc2641050c91c", + "acb2518611a24967a85afb0b60c6e9f5", + "3c728cf1d7f842529116636c1e1c929a" + ] + }, + "id": "91b06b43-c971-4177-b62f-f941bbbc2ef4", + "outputId": "ad72e836-4832-4d25-f64c-e439ef9f37d1" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading ColBERTRanker model colbert-ir/colbertv2.0 (this message can be suppressed by setting verbose=0)\n", + "No device set\n", + "Using device cpu\n", + "No dtype set\n", + "Using dtype torch.float32\n", + "Loading model colbert-ir/colbertv2.0, this might take a while...\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d7770b21d69c4c45b6779a0a79a8d9c2", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/405 [00:00