-
Notifications
You must be signed in to change notification settings - Fork 21
Expand file tree
/
Copy pathreport-interoperability.html
More file actions
579 lines (511 loc) · 35.8 KB
/
report-interoperability.html
File metadata and controls
579 lines (511 loc) · 35.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>WebAgents Community Group Report on Interoperability for Agents on the Web</title>
<script
src="https://www.w3.org/Tools/respec/respec-w3c"
class="remove"
defer
></script>
<script class="remove">
// All config options at https://respec.org/docs/
var respecConfig = {
specStatus: "CG-DRAFT",
latestVersion: null,
edDraftURI: "https://w3c-cg.github.io/webagents/TaskForces/Interoperability/Reports/report-interoperability.html",
editors: [{ name: "Your Name", url: "https://your-site.com" }],
authors: [
{
name: "Jérémy Lemée",
url: "https://www.alexandria.unisg.ch/entities/person/Jeremy_Lemee"
}
],
github: "https://github.com/w3c-cg/webagents/",
shortName: "webagents-interop",
xref: "web-platform",
group: "webagents",
localBiblio: {
ANTHROPIC24: {
title: "Building Effective Agents",
date: "2024",
href: "https://www.anthropic.com/engineering/building-effective-agents",
publisher: "Anthropic",
},
AUML: {
authors: [
"James J. Odell",
"H. Van Dyke Parunak",
"Bernhard Bauer",
],
title: "Representing Agent Interaction Protocols in UML",
date: "2002",
href: "https://doi.org/10.1007/3-540-44564-1_8",
publisher: "Springer",
},
TMLR23: {
authors: [ "G. Mialon" ],
etAl: true,
title: "Augmented Language Models: A Survey",
date: "2023",
href: "https://arxiv.org/abs/2302.07842",
publisher: "Transactions on Machine Learning Research (TMLR)",
},
FRANKLIN96: {
authors: [
"S. Franklin",
"A. Graesser",
],
title: "Is It an agent, or just a program?: A taxonomy for autonomous agents",
date: "1996",
href: "https://doi.org/10.1007/BFb0013570",
publisher: "Springer",
},
JACAMO: {
authors: [
"Olivier Boissier",
"Rafael H. Bordini",
"Jomi F. Hübner",
"Alessandro Ricci",
],
title: "Multi-agent oriented programming: programming multi-agent systems using JaCaMo",
date: "2020",
href: "https://mitpress.mit.edu/9780262044578/",
publisher: "MIT Press",
},
BSPL: {
authors: [
"Munindar P. Singh",
],
title: "Information-driven interaction-oriented programming: BSPL, the blindingly simple protocol language",
date: "2011",
href: "https://dl.acm.org/doi/abs/10.5555/2031678.2031687",
publisher: "IFAAMAS",
},
AGORA: {
authors: [
"Marro, Samuele", "La Malfa, Emanuele", "Wright, Jesse", "Li, Guohao", "Shadbolt, Nigel", "Wooldridge, Michael", "Torr, Philip"
],
title: "A scalable communication protocol for networks of large language models",
date: "2024",
href: "https://arxiv.org/pdf/2410.11905",
},
COALA: {
authors: [
"Sumers, Theodore","Yao, Shunyu", "Narasimhan, Karthik", "Griffiths, Thomas"
],
title: "Cognitive architectures for language agents",
date: "2023",
href: "https://openreview.net/pdf?id=1i6ZCvflQJ",
publisher: "Transactions on Machine Learning Research"
},
TOOL: {
authors: [
"Wang, Zhiruo", "Cheng, Zhoujun", "Zhu, Hao", "Fried, Daniel", "Neubig, Graham"
],
title: "What are tools anyway? a survey from the language model perspective",
date: "2024",
href: "arXiv preprint arXiv:2403.15452",
},
ACP: {
href: "https://agentcommunicationprotocol.dev/introduction/welcome"
}
}
};
</script>
</head>
<body>
<section id="abstract">
<p>Advances in language agents that can follow instructions and use tools have renewed interest in autonomous agents and multi-agent systems. Like previous generations of agents, language agents are designed for specific tasks, highlighting the need for open networks of agents that complement each other's abilities to tackle more complex problems. New protocols are rapidly emerging to allow agents to discover and use tools, or to discover and interact with other agents. Some of these protocols build on Web standards to promote interoperability, but their alignments, misalignments, and overlaps are unclear. This report synthesizes the large body of research on autonomous agents and multi-agent systems (MAS) to define a conceptual model for understanding Web-based MAS. We use this conceptual model to classify existing technologies and frameworks, to identify relevant standards within the W3C, and to discover standardization gaps (if any).</p>
</section>
<section id="sotd">
<aside class="note" title="A Living Report">
<p>This report is a living document meant to evolve as the group's investigation develops. Versions of the report may be released as separate documents.</p>
</aside>
</section>
<section>
<h2>Introduction</h2>
</section>
<section data-dfn-for="Foo">
<h2>Terminology</h2>
<dl>
<dt><dfn id="dfn-agent">Agent</dfn></dt>
<dd>An entity <a href="#dfn-situated">situated</a> in an environment that perceives its environment and acts on it, over time, in pursuit of its goals. For a detailed discussion of agent definitions, see [[FRANKLIN96]].</dd>
<dt><dfn id="dfn-aip">Agent Interaction Protocol</dfn></dt>
<dd>A specification of communication among two or more <a href="#dfn-agent">agents</a> that states who can say what to whom and when — for example, as message sequence diagrams [[AUML]] or information flows [[BSPL]].</dd>
<dt><dfn id="dfn-artifact">Artifact</dfn> or <dfn id="dfn-tool">Tool</dfn></dt>
<dd>A <a href="https://www.w3.org/TR/webarch/#def-resource">resource</a> [[WEBARCH]] that can be shared and used by <a href="#dfn-agent">agents</a> to support their activities. In some <a href="#dfn-mas">multi-agent systems</a>, agents can construct artifacts to instrument their environments [[JACAMO]].In the context of agentic AI, a tool is a is a functional interface to a program that a language model can use. A tool can enable an LLM to perceive or act in an environment or to perform computations. [[TOOL]]</dd>
<dt><dfn id="dfn-augmented-llm">Augmented Language Model</dfn> or <dfn id="dfn-language-agent">Language Agent</dfn></dt>
<dd>A language model augmented with abilities such as reasoning, tool use, information retrieval, or storing context across interactions. Unlike an <a href="#dfn-agent">agent</a>, an augmented language model does not actively pursue goals and is not <a href="#dfn-situated">situated</a> in an environment. See also [[TMLR23]] and [[ANTHROPIC24]]. A Language agent is an <a href="#dfn-agent">agent</a> that relies on a language model to interact with their environment. The language model can be used to process observations represented in natural or formal languages, generate the actions to perform, and make decisions [[COALA]]. These agents can be created using an <a href="#dfn-augmented-llm">augmented language model</a> as a building block [[ANTHROPIC24]].</dd>
<dt><dfn id="dfn-mas">Multi-Agent System (MAS)</dfn></dt>
<dd>A system composed of <a href="#dfn-agent">agents</a> that are situated in a shared environment and interact with one another to achieve individual or collective goals. Agents can work in collaboration, cooperation, and/or competition. A MAS can be either an open or a closed system. This report is primarily concerned with open MAS.</dd>
<dt><dfn id="dfn-situated">Situatedness</dfn></dt>
<dd>The ability of an <a href="#dfn-agent">agent</a> to interact with its environment directly through perception and action, and to respond in a timely fashion to sensory input.</dd>
<dt>[Term]</dt>
<dd>[To be added]</dd>
</dl>
</section>
<section>
<h2>Agents on the Web</h2>
<section data-dfn-for="Foo">
<h3>Visions of Agents on the Web</h3>
<!-- <aside class="issue">
<p>The vision of agents on the Web is almost as old as the Web itself. This section is to give a concise overview for contextualization. Relevant: WWW94 keynote, DARPA research programs (CoABS and DAML) and the Semantic Web, AgentCities and FIPA, and the recent Dagstuhl seminars.</p>
</aside> -->
<p>The vision of intelligent agents on the Web is almost as old as the Web itself: in a <a href="https://videos.cern.ch/record/2671957">keynote at WWW'94</a>, Sir Tim Berners-Lee was noting that documents on the Web describe real objects and relationships among them, and if the semantics of these objects are represented explicitly then machines can browse through and manipulate reality. This vision was published in 2001 as the Semantic Web [Berners-Lee et al., 2001] — and is now closer to its realization through the standardization of the <a href="https://www.w3.org/WoT/">Web of Things (WoT)</a> at the W3C and the IETF.</p>
<p>In the AI community, the vision of a world-wide open network of intelligent agents can be traced back to the late '90s. In 2002, the <a href="https://cordis.europa.eu/project/id/IST-2000-28385/">AgentCities</a> initiative was reporting a network of 41 agent platforms deployed in 21 countries [Willmott et al., 2002] — with up to 60 registered platforms reported in 2003 [Dale et al., 2003] and 160 platforms in 2005 [Bellifemine et al., 2005]. The network was based on the <a href="http://www.fipa.org/repository/standardspecs.html">standards produced by the Foundation for Intelligent Physical Agents (FIPA)</a>, but quickly faded after the mid-2000s as industry attention shifted to Web services. Another prominent initative was the DARPA Control of Agent-Based Systems (CoABS) research program [TODO], which investigated the control, coordination, and management of large systems of autonomous software agents in military applications. Central to this program, CoABS Grid was the middleware integrating heterogeneous agent-based systems, object-based applications, and legacy systems using remote method invocation as a client-server style for network-based interaction.</p>
<p>The DARPA CoABS program demonstrated the use of agent technology in large-scale practical applications, but also raised a number of challenges, such as enabling software agents to dynamically identify and understand information sources [TODO]. To address these, DARPA launched the Agent Markup Language (DAML) research program, which built on top of existing Web standards and paved the way for the Web Ontology Language (OWL), Semantic Markup for Web Services (OWL-S), and other cornerstones of the Semantic Web. The DAML program thus advanced the original vision of the Web as an information space not only for people but also for intelligent agents, and promoted a shift from custom-built middleware for MAS — such as CoABS Grid or FIPA implementations — to offloading many of those responsibilities to the existing Web infrastructure. Web-based MAS received significant attention over the years, especially with the advent of service-oriented computing in the early 2000s [Singh and Huhns, 2006].</p>
<p>Recent years have brought renewed interest in Web-based MAS, as evidenced by the <a href="https://www.dagstuhl.de/21072">Dagstuhl Seminar 21072</a> (Feb. 2021) and <a href="https://dagstuhl.de/23081">Dagstuhl Seminar 23081</a> (Feb. 2023) on "Agents on the Web" that led to the creation of the <a href="https://www.w3.org/community/webagents/">W3C Autonomous Agents on the Web (WebAgents) Community Group</a>. One key development is the Web of Things (WoT) [TODO], which unlocks new practical use cases for agents on the Web — and implements several visionary ideas expressed in the motivating scenarios from the original Semantic Web paper [Berners-Lee et al., 2001]. Another key development is the recent progreess in language agents that can follow instructions and use tools: just like previous generations of agents, language agents are designed for specific tasks, highlighting the need for open networks of agents that complement each other's abilities to tackle more complex problems. New protocols and frameworks are rapidly emerging to allow agents to discover and use tools, or to discover and interact with other agents — and many of these initiatives build on Web standards tos promote interoperability (e.g., see the <a href="https://modelcontextprotocol.io/">Model Context Protocol</a>, <a href="https://google-a2a.github.io/A2A/">Agent2Agent Protocol</a>, <a href="https://agent-network-protocol.com/">Agent Network Protocol</a>, <a href="https://eclipse.dev/lmos/">Eclipse LMOS</a>).</p>
</section>
<section data-dfn-for="Foo">
<h3>State of Web-based Multi-Agent Systems</h3>
<aside class="issue">
<p>This section is to summarize the large body of related work. We provide a concise overview and defer the details to annexes and existing literature.</p>
</aside>
<aside class="issue">
<p>What follows is a first draft of a comparison table for existing works. This table with evolve with the report. This first draft did not receive any feedback yet, it is only meant to start the discussion — and is bound to be incomplete and inaccurate:</p>
</aside>
<table class="index">
<tr>
<th></th>
<th>Relevant Concepts</th>
<th>Agent Interaction</th>
<th>Tool Use</th>
<th>Identifiers</th>
<th>Descriptions</th>
<th>Discovery Mechanisms</th>
<th>Arch. Style</th>
</tr>
<tr>
<td>MCP</td>
<td><a target="_blank" href="https://modelcontextprotocol.io/docs/concepts/tools">Tool</a>,</br>
<a target="_blank" href="https://modelcontextprotocol.io/docs/concepts/resources">Resource</a>,</br>
<a target="_blank" href="https://modelcontextprotocol.io/docs/concepts/prompts">Prompt</a></td>
<td>N/A</td>
<td>Function calling</td>
<td>Strings (Tools and Prompts),</br>URIs (Resources)</td>
<td><a target="_blank" href="https://modelcontextprotocol.io/docs/concepts/tools#tool-definition-structure">Tool definition</a>,</br>
<a target="_blank" href="https://modelcontextprotocol.io/docs/concepts/resources#resource-discovery">Resource descriptions</a>,</br>
<a target="_blank" href="https://modelcontextprotocol.io/docs/concepts/prompts#prompt-structure">Prompt definitions</a>,</br>(JSON)</td>
<td>Directories (via */list)</td>
<td>Client-Server with streaming RPC connectors (JSON-RPC 2.0, Streamable HTTP)</td>
</tr>
<tr>
<td>UTCP</td>
<td>Manual</td>
<td>N/A</td>
<td>Function calling</td>
<td>URIs (for manuals), strings (for tools described within a manual)</td>
<td>UTCP Manual</td>
<td>/utcp endpoint to expose a manual describing the tools available</td>
<td>REST (for discovering manuals), any (to interact with the tool)</td>
</tr>
<tr>
<td>NLWeb</td>
<td>Natural-language query endpoint</td>
<td>N/A</td>
<td>Function calling via MCP</td>
<td>URIs (Resources)</td>
<td>JSON with schema.org</td>
<td>N/A</td>
<td>Client-Server with streaming RPC connectors through MCP, REST API for human interaction, Web Syndication with RSS</td>
</tr>
<tr>
<td>A2A</td>
<td><a target="_blank" href="https://google.github.io/A2A/topics/key-concepts/#fundamental-communication-elements">Agent Card</a>,</br>
<a target="_blank" href="https://google.github.io/A2A/topics/key-concepts/#fundamental-communication-elements">Task</a></br>
<!-- <a target="_blank" href="https://google.github.io/A2A/topics/key-concepts/#fundamental-communication-elements">Message</a>,</br> -->
<!-- <a target="_blank" href="https://google.github.io/A2A/topics/key-concepts/#fundamental-communication-elements">Part</a>,</br> -->
<!-- <a target="_blank" href="https://google.github.io/A2A/topics/key-concepts/#fundamental-communication-elements">Artifact</a> -->
</td>
<td>Task invocation</td>
<td>N/A</td>
<td>Strings?</td>
<td><a target="_blank" href="https://google.github.io/A2A/specification/#5-agent-discovery-the-agent-card">Agent Card</a>,</br>
<a target="_blank" href="https://google.github.io/A2A/specification/#61-task-object">Task description</a>,</br>(JSON)</td>
<td>Well-known URIs,</br>Directories</td>
<td>Async. Client-Server with streaming RPC connectors and webhooks (JSON-RPC 2.0, HTTP+SSE)</td>
</tr>
<tr>
<td>Agora</td>
<td>Agent,</br><a target="_blank" href="https://agoraprotocol.org/docs/protocol/specification#8-protocol-documents-and-hashing">Protocol Document</a>, </br><a href="https://agoraprotocol.org/docs/protocol/specification#6-message-structure">Message</a></br>Communication Protocol</td>
<td>Communication protocols with protocol negotiation</td>
<td>N/A</td>
<td>N/A</td>
<td><a target="_blank" href="https://agoraprotocol.org/docs/protocol/specification#8-protocol-documents-and-hashing">Protocol Document</a>, </br><a href="https://agoraprotocol.org/docs/protocol/specification#6-message-structure">Message</a></td>
<td>N/A</td>
<td>Client-Server(HTTPS)</td>
</tr>
<tr>
<td>ANP</td>
<td>Agent,</br><a target="_blank" href="https://agent-network-protocol.com/specs/agent-description.html">Agent Description</a>,</br>Communication Protocol</td>
<td>Communication protocols with protocol negotiation</td>
<td>N/A</td>
<td>W3C DID with custom <a target="_blank" href="https://agent-network-protocol.com/specs/did-method.html">Web-based Agent DID Method</a></td>
<td><a target="_blank" href="https://agent-network-protocol.com/specs/agent-description.html">Agent Description</a> (RDF/JSON-LD)</td>
<td>Directories</td>
<td>Peer-to-Peer?</br>(WebSocket subprotocol)</td>
</tr>
<tr>
<td>LMOS</td>
<td><a target="_blank" href="https://eclipse.dev/lmos/docs/agent/">Agent</a>,</br>
<a target="_blank" href="https://eclipse.dev/lmos/docs/multi_agent_system/group_management/">Agent Group</a>,
<a target="_blank" href="https://eclipse.dev/lmos/docs/tool/">Tool</a>,</br>
<a target="_blank" href="https://eclipse.dev/lmos/docs/lmos_protocol/agent_description/">Agent Description</a>,</br>
<a target="_blank" href="https://eclipse.dev/lmos/docs/lmos_protocol/tool_description/">Tool Description</a></td>
<td>Message passing?</br>(in principle: TD interaction affordances)</td>
<td>Property Affordances,</br>Event Affordances,</br>Action Affordances</br>(<a target="_blank" href="https://www.w3.org/TR/wot-thing-description11/">W3C WoT TD</a>)</td>
<td>Uniform identifiers (IRIs, W3C DIDs)</td>
<td><a target="_blank" href="https://eclipse.dev/lmos/docs/lmos_protocol/agent_description/">Agent Description</a>,</br>
<a target="_blank" href="https://eclipse.dev/lmos/docs/lmos_protocol/tool_description/">Tool Description</a></br>(W3C WoT TD; JSON, RDF/JSON-LD)</td>
<td>DNS-SD/mDNS,</br>Well-known URIs,</br>Directories</br>(<a target="_blank" href="https://www.w3.org/TR/wot-discovery/">W3C WoT Discovery</a>)</td>
<td>W3C WoT Arch.? with protocol bindings for HTTP and WebSocket subprotocol</td>
</tr>
<tr>
<td>FIPA</td>
<td>Agent,</br>Agent Directory,</br>Service Directory,</br>Agent Communication Language,</br>Interaction Protocol</br></td>
<td><a target="_blank" href="http://www.fipa.org/specs/fipa00037/SC00037J.html">FIPA Agent Communication Langauge</a>,</br><a target="_blank" href="http://www.fipa.org/repository/standardspecs.html">FIPA Agent Interaction Protocols</a></td>
<td>N/A</td>
<td>FIPA Agent Name</td>
<td><a target="_blank" href="http://www.fipa.org/specs/fipa00023/SC00023K.html#_Toc75951012">FIPA Agent Identifier Description</a></td>
<td>Directories</td>
<td>TODO</td>
</tr>
<!-- <tr>
<td>Solid</td>
<td></td>
<td></td>
<td></td>
<td>URI</td>
<td>FOAF Profile</td>
<td></td>
</tr> -->
<tr>
<td>hMAS</td>
<td><a target="_blank" href="http://purl.org/hmas/Agent">Agent</a>,</br>
<a target="_blank" href="http://purl.org/hmas/Artifact">Artifact</a>,</br>
Agent Body,</br>
<a target="_blank" href="http://purl.org/hmas/Workspace">Workspace</a>,</br>
<a target="_blank" href="http://purl.org/hmas/Signifier">Signifier</a>,</br>
<a target="_blank" href="http://purl.org/hmas/Role">Role</a>,</br>
<a target="_blank" href="http://purl.org/hmas/Group">Group</a>,</br>
<a target="_blank" href="http://purl.org/hmas/Organization">Organization</a>,</br>
<!-- <a target="_blank" href="http://purl.org/hmas/HypermediaMASPlatform">hMAS Platform</a>,</br> -->
<a target="_blank" href="http://purl.org/hmas/ResourceProfile">Resource Profile</a></td>
<td>Message passing,</br>Signifiers for agent body affordances</td>
<td>Signifiers</br>(<a target="_blank" href="https://www.w3.org/TR/wot-thing-description11/">W3C WoT TD</a>, <a target="_blank" href="http://purl.org/hmas/">hMAS ontology</a>)</td>
<td>Uniform identifiers (IRIs, W3C DIDs)</td>
<td><a target="_blank" href="http://purl.org/hmas/ResourceProfile">Resource Profile</a></br>(<a target="_blank" href="https://www.w3.org/TR/wot-thing-description11/">W3C WoT TD</a> or <a target="_blank" href="http://purl.org/hmas/">hMAS ontology</a>; RDF/Turtle)</td>
<td>Hypermedia crawling,</br>Search engines,</br>Directories</td>
<td>Async. Client-Server with REST connectors (HTTP) and brokered pub/sub (W3C WebSub)</td>
</tr>
<tr>
<td>Multi-Agent MicroSevices (MAMS)</td>
<td>Agent,</br>
Agent Body,</br>
Resource,
Microservices</td>
<td>FIPA ACL (over HTTP), REST, HTTP API, JMS</td>
<td>REST, HTTP API, JMS,
<a target="_blank" href="https://www.w3.org/TR/wot-thing-description11/">W3C WOT TD</a></td>
<td>URIs (Agents, Agent Bodies, Resources)</td>
<td>Agent Bodies (JSON,
JSON-LD (inc <a target="_blank" href="https://www.w3.org/2019/wot/hypermedia">W3C WoT Hypermedia Controls Ontology</a>),
<a target="_blank" href="https://www.ietf.org/archive/id/draft-kelly-json-hal-11.html">HAL</a>)</td>
<td>Service Registries (Netflix Eureka), Link Crawling, Link Sharing</td>
<td>Microservices Architecture, Event Driven Architecture, REST</td>
</tr>
</table>
<section data-dfn-for="Foo">
<h3>Agents and Web Services</h3>
<aside class="issue">
<p>This section is to summarize the research on Web-based MAS, from service-oriented computing in the early 2000s to current work on Hypermedia MAS.</p>
</aside>
</section>
<section data-dfn-for="Foo">
<h3>Agents and the Decentralized Social Web</h3>
<aside class="issue">
<p>This section discusses agents in the context of the decentralized Social Web — and especially the Solid ecosystem.</p>
</aside>
</section>
<section data-dfn-for="Foo">
<h3>Agentic AI</h3>
<aside class="issue">
<p>This section is to summarize relevant developments around AI agents and agentic AI (e.g., MCP, A2A, ANP, LMOS, etc.).</p>
</aside>
<p>The concept of Agentic AI refers to AI systems that are able to take autonomous decisions in order to achieve goals. The term is commonly used to refer more specifically to autonomous generative AI systems. </p>
<p>Large Language Models (LLMs) are a core technology to create agentic AI systems. More precisely, a core component to create <a href="#dfn-language-agent">language agents</a>, is an <a href="#dfn-augmented-llm">Augmented Language Model</a> (ALM), which is an LLM extended with the ability to reason and the ability to use <a>tools</a> [[TMLR23]]. These ALMs are building blocks to create agents [[ANTHROPIC24]]. The <a href="https://modelcontextprotocol.io/">Model Context Protocol (MCP)</a> is a protocol to enable ALMs and language agents to connect with external tools and data sources. The protocol thus enables a separation of concerns between agents and tools/data sources. In practice, MCP servers can be run on the same machine or can be accessed through the Internet (e.g., via server sent events or streamable HTTP). <a href="https://news.microsoft.com/source/features/company-news/introducing-nlweb-bringing-conversational-interfaces-directly-to-the-web/">NLWeb</a> relies on MCP to integrate conversational interfaces within websites, thus aiming to become the HTML of the Agentic Web. Another extension is the <a href="https://www.utcp.io/">Universal Tool Calling Protocol (UTCP)</a>, which aims to provide a universal interface for different tools, including MCP tools, and tools directly usable with the HTTP protocol and providing an OpenAPI description. UTCP relies on manuals that indicate how to interact with a set of tools. The protocol relies on this manual to interact with tools and provide information to LLM agents on the nature of the tools. </p>
<p> Agentic AI is also considering communication among language agents. Different protocols are being developed to enable communication of language agents on the Web. The <a href="https://www.a2aprotocol.net/docs/introduction">Agent2Agent (A2A)</a> protocol is a protocol that is meant as a complement to MCP for agent communication. Agents using this protocol describe themselves and their capabilities in an Agent Card that is available on the Web for other agents to read and use. The protocol defines tasks that an agent can achieve on behalf of another and messages to support communication among agents. The protocol relies on <a href="https://www.jsonrpc.org/specification">JSON-RPC</a> for communication. The Agent Communication Protocol (ACP) was another protocol intended to enable communication between LLM agents, which directly used HTTP APIs for agent interactions [[ACP]]. However, the project has been integrated within the A2A project under control by the Linux Foundation [[ACP]]. The <a href="https://agoraprotocol.org/docs/getting-started">Agora protocol</a> is protocol for communication among language agents meant to be as versatile, efficient, and portable as possible, within the limit of the Agent Communication Trilemma between these three properties [[AGORA]]. The Agora protocol enables agents to choose at run time which specific protocol to use for interaction [[AGORA]]. The <a href="https://agent-network-protocol.com/specs/white-paper.html"> Agent Network Protocol (ANP)</a> is another protocol for agents on the Web. ANP defines three layers: the Identity layer, the Meta-Protocol layer, and the Application layer. The Identity layer relies on <a href="https://www.w3.org/TR/did-1.0/">Decentralized Identifiers (DID)</a> to identify the agents. ANP defines a custom DID method <code>did:wba</code>, for Web-based Agents, to enable agents to prove their identities without relying on a central authority. The Meta-Protocol layer enables agents to select which protocol to use for communication. Once a protocol has been selected, the agents communicate using that protocol. Finally, the Application layer defines a JSON-LD Agent Description (AD) to enable agents to provide information about themselves to other agents and an Agent Discovery Protocol to enable agents to discover the ADs of other agents. <a href="https://eclipse.dev/lmos/">Eclipse LMOS (Language Model Operating System)</a> is another project to build an Internet of Agents. Eclipse LMOS relies on DIDs to identify software agents. It also defines an Agent Description Format to describe agents and a Tool Description Format to describe tools. Both description formats are defined as built on top of the <a href="https://www.w3.org/TR/wot-thing-description/"> Thing Description (TD) Format</a>. Eclipse LMOS also defines mecanisms for discovery, and a it can rely on WebSocket as a communication protocol. </p>
</section>
</section>
<section data-dfn-for="Foo">
<h3>Conceptual Overview and Modeling Dimensions</h3>
<aside class="issue">
<p>We need a conceptual model for Web-based MAS to organize the discussion and emerging technologies. We can follow the four modelling dimensions used in research on engineering MAS: agent, environment, interaction, and organization.</p>
</aside>
<div class="figure">
<img class="figure" src="images/MAS-modelling-dimensions.png" alt="Modelling dimensions for Multi-Agent Systems" width="500" />
<p class="caption">Modelling Dimensions for Engineering Multi-Agent Systems [Demazeu, 1995]</p>
</div>
</section>
<section data-dfn-for="Foo">
<h3>Architectural Considerations</h3>
<aside class="issue">
<p>Different approaches choose different distribution styles, e.g. MCP uses an RPC-style of distribution. This section discusses these different styles with reference to the Web architecture. The discussion could go into many directions, what emerged in our meetings is a need to look deeper than the agent level of abstraction. What seems important is to identify alignments and misalignments with the Web architecture.</p>
</aside>
</section>
</section>
<section data-dfn-for="Foo">
<h2>Identification</h2>
<aside class="issue">
<p>We need to identify agents, tools, protocols, and other entities in a Web-based MAS in a uniform way and independent of context. Additional requirements may be relevant depending on the degree of openness, e.g. see the <a href="https://www.w3.org/TR/did-1.0/#design-goals">requirements identified by the DID WG</a>.</p>
</aside>
<section data-dfn-for="Foo">
<h3>Relevant Standards and Initiatives</h3>
<section data-dfn-for="Foo">
<h3>Agent Identification</h3>
<aside class="issue">
<p>E.g., FIPA, SemWeb/FOAF, hMAS, LMOS, A2A</a>.</p>
</aside>
</section>
<section data-dfn-for="Foo">
<h3>Tool Identification</h3>
<aside class="issue">
<p>E.g., MCP, hMAS, MAMS, LMOS</a>.</p>
</aside>
</section>
</section>
<section data-dfn-for="Foo">
<h3>Discussion</h3>
<aside class="issue">
<p>Identify commonalities among the different initiatives and pontential gaps</a>.</p>
</aside>
</section>
</section>
<section>
<h2>Profiles</h2>
<aside class="issue">
<p>We need to describe non-information resources that we interact with in a Web-based MAS: agents, tools, etc.</p>
</aside>
<section data-dfn-for="Foo">
<h3>Relevant Standards and Initiatives</h3>
<section data-dfn-for="Foo">
<h3>Agent Profiles</h3>
</section>
<section data-dfn-for="Foo">
<h3>Tool Profiles</h3>
</section>
</section>
<section data-dfn-for="Foo">
<h3>Discussion</h3>
<aside class="issue">
<p>Identify commonalities among the different initiatives and pontential gaps</a>.</p>
</aside>
</section>
</section>
<section>
<h2>Verifiable Credentials</h2>
<aside class="issue">
<p>See <a href="https://github.com/w3c-cg/webagents/issues/83">GitHub Issue 83</a></p>
</aside>
<section data-dfn-for="Foo">
<h3>Relevant Standards</h3>
</section>
<section data-dfn-for="Foo">
<h3>Discussion</h3>
<aside class="issue">
<p>Identify commonalities among the different initiatives and pontential gaps</a>.</p>
</aside>
</section>
</section>
<section>
<h2>Discovery</h2>
<section data-dfn-for="Foo">
<h3>Relevant Standards and Initiatives</h3>
<section data-dfn-for="Foo">
<h3>Agent Discovery</h3>
</section>
<section data-dfn-for="Foo">
<h3>Tool Discovery</h3>
</section>
</section>
<section data-dfn-for="Foo">
<h3>Discussion</h3>
<aside class="issue">
<p>Identify commonalities among the different initiatives and pontential gaps</a>.</p>
</aside>
</section>
</section>
<section>
<h2>Agent-to-Agent Interaction</h2>
<section data-dfn-for="Foo">
<h3>Relevant Standards and Initiatives</h3>
<section>
<h3>Agents and People</h3>
</section>
</section>
<section data-dfn-for="Foo">
<h3>Discussion</h3>
<aside class="issue">
<p>Identify commonalities among the different initiatives and pontential gaps</a>.</p>
</aside>
</section>
</section>
<section>
<h2>Agent-Environment Interaction</h2>
<section data-dfn-for="Foo">
<h3>Relevant Standards and Initiatives</h3>
<section>
<h3>Tool Use</h3>
</section>
</section>
<section data-dfn-for="Foo">
<h3>Discussion</h3>
<aside class="issue">
<p>Identify commonalities among the different initiatives and pontential gaps</a>.</p>
</aside>
</section>
</section>
<section>
<h2>Norms, Policies, and Organizations</h2>
<aside class="issue">
<p>See <a href="https://github.com/w3c/odrl/issues/112">ODRL Issue 112</a></p>
</aside>
<section data-dfn-for="Foo">
<h3>Relevant Standards and Initiatives</h3>
</section>
<section data-dfn-for="Foo">
<h3>Discussion</h3>
<aside class="issue">
<p>Identify commonalities among the different initiatives and pontential gaps</a>.</p>
</aside>
</section>
</section>
<section>
<h2>Security and Privacy</h2>
<section data-dfn-for="Foo">
<h3>Relevant Standards</h3>
<section>
<h3>Authentication and Authorization</h3>
</section>
</section>
<section data-dfn-for="Foo">
<h3>Discussion</h3>
<aside class="issue">
<p>Identify commonalities among the different initiatives and pontential gaps</a>.</p>
</aside>
</section>
</section>
<section>
<h2>Conclusions: A Strategy for Agents on the Web</h2>
</section>
<section>
<h2>Acknowledgements</h2>
</section>
<!-- <section id="conformance">
<p>
This is required for specifications that contain normative material.
</p>
</section> -->
</body>
</html>