update build

ucl-dark · Sep 29, 2024 · dddcd03 · dddcd03
1 parent db55d48
commit dddcd03
Show file tree

Hide file tree

Showing 6 changed files with 345 additions and 6 deletions.
diff --git a/index.html b/index.html
@@ -185,6 +185,9 @@ <h2 class="text-center">News</h2>
               <div class="col-12 card-text">
                 <ul class="news-list" style="font-family: 'Exo'">
 
+                        <li style="margin-bottom: 10px"><span class="news-item-date" style="font-weight: bold; font-family: 'Lato'; display: inline-block; width: 96px;">
+                            25/09/2024:</span>  <p><a href="https://arxiv.org/abs/2402.16822">Rainbow Teaming: Open-Ended Generation of Diverse Adversarial Prompts</a> has been accepted to NeurIPS 2024.</p></li>
+
                         <li style="margin-bottom: 10px"><span class="news-item-date" style="font-weight: bold; font-family: 'Lato'; display: inline-block; width: 96px;">
                             19/02/2024:</span>  <p><a href="https://arxiv.org/abs/2306.00867">IQL-TD-MPC: Implicit Q-Learning for Hierarchical Model Predictive Control</a> has been accepted to ICRA 2024 (oral).</p></li>
 
@@ -206,9 +209,6 @@ <h2 class="text-center">News</h2>
                         <li style="margin-bottom: 10px"><span class="news-item-date" style="font-weight: bold; font-family: 'Lato'; display: inline-block; width: 96px;">
                             24/01/2023:</span>  <p><a href="https://arxiv.org/abs/2303.03376">MAESTRO: Open-Ended Environment Design for Multi-Agent Reinforcement Learning</a> has been accepted to ICLR 2023.</p></li>
 
-                        <li style="margin-bottom: 10px"><span class="news-item-date" style="font-weight: bold; font-family: 'Lato'; display: inline-block; width: 96px;">
-                            24/01/2023:</span>  <p><a href="https://arxiv.org/abs/2208.10291">Efficient Planning in a Compact Latent Action Space</a> has been accepted to ICLR 2023.</p></li>
-
                 </ul>
               </div>
           </div>

diff --git a/papers.json b/papers.json
diff --git a/poster_samvelyan2024rainbow.html b/poster_samvelyan2024rainbow.html
@@ -0,0 +1,339 @@
+
+
+
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+
+
+
+    <!-- Required meta tags -->
+    <meta charset="utf-8" />
+    <meta
+      name="viewport"
+      content="width=device-width, initial-scale=1, shrink-to-fit=no"
+    />
+
+    <link rel="stylesheet" href="static/css/main.css" type="text/css"/>
+    <link rel="stylesheet" href="static/css/lazy_load.css" />
+    <link rel="stylesheet" href="static/css/typeahead.css" />
+    <link rel="icon" href="static/images/logo.png">
+
+    <!-- External Javascript libs  -->
+    <script src="https://cdn.jsdelivr.net/npm/d3@5/dist/d3.min.js"></script>
+
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/handlebars.min.js" integrity="sha256-/PJBs6QWvXijOFIX04kZpLb6ZtSQckdOIavLWKKOgXU=" crossorigin="anonymous"></script>
+
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
+    <script src="https://kit.fontawesome.com/c59ce62110.js" crossorigin="anonymous"></script>
+
+    <script
+      src="https://cdn.jsdelivr.net/npm/[email protected]/dist/umd/popper.min.js"
+      integrity="sha384-Q6E9RHvbIyZFJoft+2mJbHaEWldlvI9IOYy5n3zV9zzTtmI3UksdQRVvoxMfooAo"
+      crossorigin="anonymous"
+    ></script>
+
+
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.min.js" integrity="sha256-WqU1JavFxSAMcLP2WIOI+GB2zWmShMI82mTpLDcqFUg=" crossorigin="anonymous"></script>
+
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/min/moment.min.js" integrity="sha256-4iQZ6BVL4qNKlQ27TExEhBN1HFPvAvAMbFavKKosSWQ=" crossorigin="anonymous"></script>
+
+    <script src="https://cdn.jsdelivr.net/npm/[email protected]/builds/moment-timezone-with-data.min.js" integrity="sha256-IWYg4uIC8/erItNXYvLtyYHioRi2zT1TFva8qaAU/ww=" crossorigin="anonymous"></script>
+
+
+    <!-- Library libs -->
+    <script src="static/js/typeahead.bundle.js"></script>
+
+
+
+    <!-- External CSS -->
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha256-YLGeXaapI0/5IgZopewRJcFXomhRMlYYjugPLSyNjTY=" crossorigin="anonymous">
+
+    <!-- External Fonts (no google for china) -->
+    <link
+      href="static/css/Lato.css"
+      rel="stylesheet"
+    />
+    <link href="static/css/Exo.css" rel="stylesheet" />
+    <link
+      href="static/css/Cuprum.css"
+      rel="stylesheet"
+    />
+
+    <title>UCL DARK Lab: Rainbow Teaming: Open-Ended Generation of Diverse Adversarial Prompts</title>
+
+<meta name="citation_title" content="Rainbow Teaming: Open-Ended Generation of Diverse Adversarial Prompts" />
+
+<meta name="citation_author" content="Mikayel Samvelyan" />
+
+<meta name="citation_author" content="Sharath Chandra Raparthy" />
+
+<meta name="citation_author" content="Andrei Lupu" />
+
+<meta name="citation_author" content="Eric Hambro" />
+
+<meta name="citation_author" content="Aram H. Markosyan" />
+
+<meta name="citation_author" content="Manish Bhatt" />
+
+<meta name="citation_author" content="Yuning Mao" />
+
+<meta name="citation_author" content="Minqi Jiang" />
+
+<meta name="citation_author" content="Jack Parker-Holder" />
+
+<meta name="citation_author" content="Jakob Foerster" />
+
+<meta name="citation_author" content="Tim Rocktäschel" />
+
+<meta name="citation_author" content="Roberta Raileanu" />
+
+<meta name="citation_publication_date" content="None" />
+<meta name="citation_conference_title" content="Ucl Deciding, Acting, And Reasoning With Knowledge (Dark) Lab" />
+<meta name="citation_inbook_title" content="None" />
+<meta name="citation_abstract" content="As large language models (LLMs) become increasingly prevalent across many real-world applications, understanding and enhancing their robustness to adversarial attacks is of paramount importance. Existing methods for identifying adversarial prompts tend to focus on specific domains, lack diversity, or require extensive human annotations. To address these limitations, we present Rainbow Teaming, a novel black-box approach for producing a diverse collection of adversarial prompts. Rainbow Teaming casts adversarial prompt generation as a quality-diversity problem, and uses open-ended search to generate prompts that are both effective and diverse. Focusing on the safety domain, we use Rainbow Teaming to target various state-of-the-art LLMs, including the Llama 2 and Llama 3 models. Our approach reveals hundreds of effective adversarial prompts, with an attack success rate exceeding 90% across all tested models. Furthermore, we demonstrate that prompts generated by Rainbow Teaming are highly transferable and that fine-tuning models with synthetic data generated by our method significantly enhances their safety without sacrificing general performance or helpfulness. We additionally explore the versatility of Rainbow Teaming by applying it to question answering and cybersecurity, showcasing its potential to drive robust open-ended self-improvement in a wide range of applications." />
+
+<meta name="citation_keywords" content="open-endednes" />
+
+<meta name="citation_keywords" content="large language models" />
+
+<meta name="citation_keywords" content="safety" />
+
+<meta name="citation_keywords" content="diversity" />
+
+<meta name="citation_pdf_url" content="https://arxiv.org/abs/2402.16822" />
+
+
+  </head>
+
+  <body>
+    <!-- NAV -->
+
+    <nav
+      class="navbar sticky-top navbar-expand-lg navbar-light bg-light mr-auto"
+      id="main-nav"
+    >
+        <div class="container">
+        <!--
+        <a class="navbar-brand" href="index.html">
+          <img
+             class="logo" src="static/images/logo.png"
+             height="auto"
+             width="130px"
+          />
+        </a>
+        -->
+
+        <button
+          class="navbar-toggler"
+          type="button"
+          data-toggle="collapse"
+          data-target="#navbarNav"
+          aria-controls="navbarNav"
+          aria-expanded="false"
+          aria-label="Toggle navigation"
+        >
+          <span class="navbar-toggler-icon"></span>
+        </button>
+        <div
+          class="collapse navbar-collapse text-right flex-grow-1"
+          id="navbarNav"
+        >
+          <ul class="navbar-nav ml-auto">
+
+            <li class="nav-item ">
+              <a class="nav-link" href="index.html">Home</a>
+            </li>
+
+            <li class="nav-item ">
+              <a class="nav-link" href="papers.html">Publications</a>
+            </li>
+
+            <li class="nav-item ">
+              <a class="nav-link" href="speakers.html">Speakers</a>
+            </li>
+
+            <li class="nav-item ">
+              <a class="nav-link" href="https://blog.ucldark.com/">Blog</a>
+            </li>
+
+          </ul>
+        </div>
+      </div>
+    </nav>
+
+
+
+    <!-- User Overrides -->
+
+
+    <div class="container">
+      <!-- Tabs -->
+      <div class="tabs">
+
+      </div>
+      <!-- Content -->
+      <div class="content">
+
+
+<!-- Title -->
+<div class="pp-card m-3" style="">
+  <div class="card-header">
+    <h2 class="card-title main-title text-center" style="">
+      Rainbow Teaming: Open-Ended Generation of Diverse Adversarial Prompts
+    </h2>
+    <h3 class="card-subtitle mb-2 text-muted text-center">
+
+      <a href="papers.html?filter=authors&search=Mikayel Samvelyan" class="text-muted"
+        >Mikayel Samvelyan</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Sharath Chandra Raparthy" class="text-muted"
+        >Sharath Chandra Raparthy</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Andrei Lupu" class="text-muted"
+        >Andrei Lupu</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Eric Hambro" class="text-muted"
+        >Eric Hambro</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Aram H. Markosyan" class="text-muted"
+        >Aram H. Markosyan</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Manish Bhatt" class="text-muted"
+        >Manish Bhatt</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Yuning Mao" class="text-muted"
+        >Yuning Mao</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Minqi Jiang" class="text-muted"
+        >Minqi Jiang</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Jack Parker-Holder" class="text-muted"
+        >Jack Parker-Holder</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Jakob Foerster" class="text-muted"
+        >Jakob Foerster</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Tim Rocktäschel" class="text-muted"
+        >Tim Rocktäschel</a
+      >,
+
+      <a href="papers.html?filter=authors&search=Roberta Raileanu" class="text-muted"
+        >Roberta Raileanu</a
+      >
+
+    </h3>
+    <p class="card-text text-center">
+      <span class="">Keywords:</span>
+
+      <a
+        href="papers.html?filter=keywords&search=open-endednes"
+        class="text-secondary text-decoration-none"
+        >open-endednes</a
+      >,
+
+      <a
+        href="papers.html?filter=keywords&search=large language models"
+        class="text-secondary text-decoration-none"
+        >large language models</a
+      >,
+
+      <a
+        href="papers.html?filter=keywords&search=safety"
+        class="text-secondary text-decoration-none"
+        >safety</a
+      >,
+
+      <a
+        href="papers.html?filter=keywords&search=diversity"
+        class="text-secondary text-decoration-none"
+        >diversity</a
+      >
+
+    </p>
+    <div class="text-center p-3">
+
+      <a class="card-link" data-toggle="collapse" role="button" href="#details">
+        Abstract
+      </a>
+
+      <a class="card-link" target="_blank" href="https://arxiv.org/abs/2402.16822">
+        Paper
+      </a>
+
+    </div>
+  </div>
+</div>
+
+    <div id="details" class="pp-card m-3">
+      <div class="card-body">
+        <div class="card-text">
+          <div id="abstractExample">
+            <span class="font-weight-bold">Abstract:</span>
+            As large language models (LLMs) become increasingly prevalent across many real-world applications, understanding and enhancing their robustness to adversarial attacks is of paramount importance. Existing methods for identifying adversarial prompts tend to focus on specific domains, lack diversity, or require extensive human annotations. To address these limitations, we present Rainbow Teaming, a novel black-box approach for producing a diverse collection of adversarial prompts. Rainbow Teaming casts adversarial prompt generation as a quality-diversity problem, and uses open-ended search to generate prompts that are both effective and diverse. Focusing on the safety domain, we use Rainbow Teaming to target various state-of-the-art LLMs, including the Llama 2 and Llama 3 models. Our approach reveals hundreds of effective adversarial prompts, with an attack success rate exceeding 90% across all tested models. Furthermore, we demonstrate that prompts generated by Rainbow Teaming are highly transferable and that fine-tuning models with synthetic data generated by our method significantly enhances their safety without sacrificing general performance or helpfulness. We additionally explore the versatility of Rainbow Teaming by applying it to question answering and cybersecurity, showcasing its potential to drive robust open-ended self-improvement in a wide range of applications.
+          </div>
+        </div>
+        <p></p>
+      </div>
+    </div>
+
+
+
+      </div>
+    </div>
+
+
+
+    <!-- Google Analytics -->
+    <script
+      async
+      src="https://www.googletagmanager.com/gtag/js?id=UA-"
+    ></script>
+    <script>
+      window.dataLayer = window.dataLayer || [];
+      function gtag() {
+        dataLayer.push(arguments);
+      }
+      gtag("js", new Date());
+      gtag("config", "UA-");
+    </script>
+
+    <!-- Footer -->
+    <footer class="footer bg-light p-4">
+      <div class="container">
+        <p class="float-right"><a href="#">Back to Top</a></p>
+        <p class="text-center">© 2020 UCL DARK Lab</p>
+      </div>
+    </footer>
+
+    <!-- Code for hash tags -->
+    <script type="text/javascript">
+      $(document).ready(function () {
+        if (location.hash !== "") {
+          $('a[href="' + location.hash + '"]').tab("show");
+        }
+
+        $("a[data-toggle='tab']").on("shown.bs.tab", function (e) {
+          var hash = $(e.target).attr("href");
+          if (hash.substr(0, 1) == "#") {
+            var position = $(window).scrollTop();
+            location.replace("#" + hash.substr(1));
+            $(window).scrollTop(position);
+          }
+        });
+      });
+    </script>
+    <script src="static/js/lazy_load.js"></script>
+
+  </body>
+</html>