Skip to content

Commit

Permalink
fixup data
Browse files Browse the repository at this point in the history
  • Loading branch information
thisiszy committed Oct 2, 2024
1 parent b684bbf commit 841da59
Showing 1 changed file with 70 additions and 52 deletions.
122 changes: 70 additions & 52 deletions docs/source/main_page/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,7 @@ <h3 class="text-subtitle">IDM-Single (Accuracy %)</h3>
<th><div class="sticky-header-content">Mind2Web</div></th>
<th><div class="sticky-header-content">AITW</div></th>
<th><div class="sticky-header-content">VWA</div></th>
<th><div class="sticky-header-content">AgentStudio</div></th>
<th><div class="sticky-header-content">Total</div></th>
<th><div class="sticky-header-content">Date</div></th>
<!-- <th><div class="sticky-header-content">Logs</div></th> -->
Expand All @@ -645,7 +646,8 @@ <h3 class="text-subtitle">IDM-Single (Accuracy %)</h3>
<td><p class="number">73.0</p></td>
<td><p class="number">56.0</p></td>
<td><p class="number">50.0</p></td>
<td><p class="number">59.7</p></td>
<td><p class="number">72.0</p></td>
<td><p class="number">61.4</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -656,7 +658,8 @@ <h3 class="text-subtitle">IDM-Single (Accuracy %)</h3>
<td><p class="number">70.0</p></td>
<td><p class="number">56.0</p></td>
<td><p class="number">45.0</p></td>
<td><p class="number">57.0</p></td>
<td><p class="number">78.0</p></td>
<td><p class="number">60.0</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -667,7 +670,8 @@ <h3 class="text-subtitle">IDM-Single (Accuracy %)</h3>
<td><p class="number">62.0</p></td>
<td><p class="number">51.0</p></td>
<td><p class="number">46.0</p></td>
<td><p class="number">53.0</p></td>
<td><p class="number">48.0</p></td>
<td><p class="number">52.3</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -678,7 +682,8 @@ <h3 class="text-subtitle">IDM-Single (Accuracy %)</h3>
<td><p class="number">65.0</p></td>
<td><p class="number">34.0</p></td>
<td><p class="number">31.0</p></td>
<td><p class="number">43.3</p></td>
<td><p class="number">60.0</p></td>
<td><p class="number">45.7</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -689,7 +694,8 @@ <h3 class="text-subtitle">IDM-Single (Accuracy %)</h3>
<td><p class="number">37.0</p></td>
<td><p class="number">20.0</p></td>
<td><p class="number">5.0</p></td>
<td><p class="number">20.7</p></td>
<td><p class="number">20.0</p></td>
<td><p class="number">20.6</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -709,6 +715,7 @@ <h3 class="text-subtitle">IDM-Multiple (Accuracy %)</h3>
<th><div class="sticky-header-content">Mind2Web</div></th>
<th><div class="sticky-header-content">AITW</div></th>
<th><div class="sticky-header-content">VWA</div></th>
<th><div class="sticky-header-content">AgentStudio</div></th>
<th><div class="sticky-header-content">Total</div></th>
<th><div class="sticky-header-content">Date</div></th>
<!-- <th><div class="sticky-header-content">Logs</div></th> -->
Expand All @@ -720,7 +727,8 @@ <h3 class="text-subtitle">IDM-Multiple (Accuracy %)</h3>
<td><p class="number">18.0</p></td>
<td><p class="number">8.0</p></td>
<td><p class="number">7.0</p></td>
<td><p class="number">11.0</p></td>
<td><p class="number">22.2</p></td>
<td><p class="number">12.5</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -731,7 +739,8 @@ <h3 class="text-subtitle">IDM-Multiple (Accuracy %)</h3>
<td><p class="number">13.0</p></td>
<td><p class="number">8.0</p></td>
<td><p class="number">2.0</p></td>
<td><p class="number">7.7</p></td>
<td><p class="number">20.0</p></td>
<td><p class="number">9.3</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -742,7 +751,8 @@ <h3 class="text-subtitle">IDM-Multiple (Accuracy %)</h3>
<td><p class="number">0.0</p></td>
<td><p class="number">0.0</p></td>
<td><p class="number">1.0</p></td>
<td><p class="number">0.3</p></td>
<td><p class="number">2.2</p></td>
<td><p class="number">0.6</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -754,6 +764,7 @@ <h3 class="text-subtitle">IDM-Multiple (Accuracy %)</h3>
<td><p class="number">0.0</p></td>
<td><p class="number">0.0</p></td>
<td><p class="number">0.0</p></td>
<td><p class="number">0.0</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -765,6 +776,7 @@ <h3 class="text-subtitle">IDM-Multiple (Accuracy %)</h3>
<td><p class="number">0.0</p></td>
<td><p class="number">0.0</p></td>
<td><p class="number">0.0</p></td>
<td><p class="number">0.0</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -784,6 +796,7 @@ <h3 class="text-subtitle">IDM-Multiple (Edit Distance)</h3>
<th><div class="sticky-header-content">Mind2Web</div></th>
<th><div class="sticky-header-content">AITW</div></th>
<th><div class="sticky-header-content">VWA</div></th>
<th><div class="sticky-header-content">AgentStudio</div></th>
<th><div class="sticky-header-content">Total</div></th>
<th><div class="sticky-header-content">Date</div></th>
<!-- <th><div class="sticky-header-content">Logs</div></th> -->
Expand All @@ -795,6 +808,7 @@ <h3 class="text-subtitle">IDM-Multiple (Edit Distance)</h3>
<td><p class="number">2.0</p></td>
<td><p class="number">2.1</p></td>
<td><p class="number">2.9</p></td>
<td><p class="number">1.6</p></td>
<td><p class="number">2.3</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
Expand All @@ -806,7 +820,8 @@ <h3 class="text-subtitle">IDM-Multiple (Edit Distance)</h3>
<td><p class="number">2.1</p></td>
<td><p class="number">2.2</p></td>
<td><p class="number">3.5</p></td>
<td><p class="number">2.6</p></td>
<td><p class="number">2.0</p></td>
<td><p class="number">2.5</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -817,7 +832,8 @@ <h3 class="text-subtitle">IDM-Multiple (Edit Distance)</h3>
<td><p class="number">6.0</p></td>
<td><p class="number">4.4</p></td>
<td><p class="number">7.0</p></td>
<td><p class="number">5.8</p></td>
<td><p class="number">3.8</p></td>
<td><p class="number">5.5</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -828,7 +844,8 @@ <h3 class="text-subtitle">IDM-Multiple (Edit Distance)</h3>
<td><p class="number">5.1</p></td>
<td><p class="number">15.4</p></td>
<td><p class="number">5.8</p></td>
<td><p class="number">8.8</p></td>
<td><p class="number">6.3</p></td>
<td><p class="number">8.4</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -839,7 +856,8 @@ <h3 class="text-subtitle">IDM-Multiple (Edit Distance)</h3>
<td><p class="number">294.5</p></td>
<td><p class="number">7.2</p></td>
<td><p class="number">7.2</p></td>
<td><p class="number">103.0</p></td>
<td><p class="number">7.8</p></td>
<td><p class="number">90.6</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand Down Expand Up @@ -884,10 +902,10 @@ <h3 class="text-subtitle">With Observation-Action Pairs (Accuracy %)</h3>
<td>
<p class="model-type">gemini-1.5-pro-001</p>
</td>
<td><p class="number">73.5</p></td>
<td><p class="number">82.0</p></td>
<td><p class="number">62.0</p></td>
<td><p class="number">71.4</p></td>
<td><p class="number">72.5</p></td>
<td><p class="number">84.2</p></td>
<td><p class="number">52.5</p></td>
<td><p class="number">69.7</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -897,10 +915,10 @@ <h3 class="text-subtitle">With Observation-Action Pairs (Accuracy %)</h3>
<td>
<p class="model-type">gemini-1.5-flash-001</p>
</td>
<td><p class="number">68.5</p></td>
<td><p class="number">82.0</p></td>
<td><p class="number">65.0</p></td>
<td><p class="number">69.4</p></td>
<td><p class="number">65.6</p></td>
<td><p class="number">85.2</p></td>
<td><p class="number">57.8</p></td>
<td><p class="number">67.3</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -910,10 +928,10 @@ <h3 class="text-subtitle">With Observation-Action Pairs (Accuracy %)</h3>
<td>
<p class="model-type">claude-3-5-sonnet-20240620</p>
</td>
<td><p class="number">70.0</p></td>
<td><p class="number">92.0</p></td>
<td><p class="number">55.0</p></td>
<td><p class="number">68.9</p></td>
<td><p class="number">68.4</p></td>
<td><p class="number">92.9</p></td>
<td><p class="number">36.6</p></td>
<td><p class="number">65.6</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -923,10 +941,10 @@ <h3 class="text-subtitle">With Observation-Action Pairs (Accuracy %)</h3>
<td>
<p class="model-type">gpt-4o-2024-05-13</p>
</td>
<td><p class="number">68.5</p></td>
<td><p class="number">90.0</p></td>
<td><p class="number">58.0</p></td>
<td><p class="number">68.6</p></td>
<td><p class="number">68.0</p></td>
<td><p class="number">91.5</p></td>
<td><p class="number">41.7</p></td>
<td><p class="number">66.5</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -936,10 +954,10 @@ <h3 class="text-subtitle">With Observation-Action Pairs (Accuracy %)</h3>
<td>
<p class="model-type">Qwen-VL-Chat</p>
</td>
<td><p class="number">52.0</p></td>
<td><p class="number">38.0</p></td>
<td><p class="number">48.0</p></td>
<td><p class="number">48.9</p></td>
<td><p class="number">55.6</p></td>
<td><p class="number">55.1</p></td>
<td><p class="number">64.4</p></td>
<td><p class="number">58.5</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand Down Expand Up @@ -969,10 +987,10 @@ <h3 class="text-subtitle">With Observations Only (Accuracy %)</h3>
<td>
<p class="model-type">gemini-1.5-pro-001</p>
</td>
<td><p class="number">65.0</p></td>
<td><p class="number">86.0</p></td>
<td><p class="number">59.0</p></td>
<td><p class="number">66.3</p></td>
<td><p class="number">61.1</p></td>
<td><p class="number">88.1</p></td>
<td><p class="number">48.1</p></td>
<td><p class="number">62.9</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -982,10 +1000,10 @@ <h3 class="text-subtitle">With Observations Only (Accuracy %)</h3>
<td>
<p class="model-type">gemini-1.5-flash-001</p>
</td>
<td><p class="number">67.0</p></td>
<td><p class="number">72.0</p></td>
<td><p class="number">62.0</p></td>
<td><p class="number">66.3</p></td>
<td><p class="number">64.1</p></td>
<td><p class="number">75.0</p></td>
<td><p class="number">52.5</p></td>
<td><p class="number">63.1</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -995,10 +1013,10 @@ <h3 class="text-subtitle">With Observations Only (Accuracy %)</h3>
<td>
<p class="model-type">claude-3-5-sonnet-20240620</p>
</td>
<td><p class="number">66.0</p></td>
<td><p class="number">86.0</p></td>
<td><p class="number">58.0</p></td>
<td><p class="number">66.6</p></td>
<td><p class="number">64.6</p></td>
<td><p class="number">87.3</p></td>
<td><p class="number">47.5</p></td>
<td><p class="number">64.2</p></td>
<td><span class="label-date">2024-08-17</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -1008,10 +1026,10 @@ <h3 class="text-subtitle">With Observations Only (Accuracy %)</h3>
<td>
<p class="model-type">gpt-4o-2024-05-13</p>
</td>
<td><p class="number">63.5</p></td>
<td><p class="number">84.0</p></td>
<td><p class="number">57.0</p></td>
<td><p class="number">64.6</p></td>
<td><p class="number">61.4</p></td>
<td><p class="number">85.7</p></td>
<td><p class="number">39.4</p></td>
<td><p class="number">60.8</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand All @@ -1021,10 +1039,10 @@ <h3 class="text-subtitle">With Observations Only (Accuracy %)</h3>
<td>
<p class="model-type">Qwen-VL-Chat</p>
</td>
<td><p class="number">54.5</p></td>
<td><p class="number">58.0</p></td>
<td><p class="number">52.0</p></td>
<td><p class="number">54.3</p></td>
<td><p class="number">63.2</p></td>
<td><p class="number">73.4</p></td>
<td><p class="number">67.6</p></td>
<td><p class="number">66.2</p></td>
<td><span class="label-date">2024-06-06</span></p></td>
<!-- <td><p style="text-align: center;">
<a href="">🔗</a>
Expand Down

0 comments on commit 841da59

Please sign in to comment.