%%%%%%%%%%%%%%%%%%%%%%%%%% To allow same version tex / htx
\def\ifhtx{\iffalse}    % Lines used only for the HTML version
\ifhtx			% Declarations for HTX (HTML)
\begin{head}
  \tag{META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=iso-8859-1"}
  \begin{title}VOTable \Version\ Proposal\end{title}
  \tag{META NAME="robots" content="index"}
  \tag{META NAME="description" content="VOTable Proposal"}
  \tag{META REV="made" HREF="mailto:VOTable@ivoa.net"}
  \tag{META NAME="GENERATOR" CONTENT="StarOffice/5.2 (Linux)"}
  \tag{META NAME="AUTHOR" CONTENT="Francois Ochsenbein, Roy Williams"}
  \tag{META NAME="CREATED" CONTENT="20020315;23390000"}
  \tag{META NAME="CHANGEDBY" CONTENT="Francois Ochsenbein"}
  \tag{META NAME="CHANGED" CONTENT="20020315"}
\end{head}
\Beg{body}{\bg{white}}
\def\Arefx#1#2{\Aref{#1}{#2}}
\else			% Declarations for LaTeX
\documentclass[10pt,notitlepage,onecolumn]{ivoa}
%\documentclass[12pt,notitlepage,onecolumn]{ivoa}
%\documentclass{article}
%\usepackage[pdftex]{graphicx}	% Accept Images
\usepackage{color}		% Accept Colors
\usepackage{verbatim}		% Accept Colors

%% Comment/uncomment lines below to follow your LateX distribution...
%%
%% If document is processed with latex, dvips and ps2pdf
%%
\ifx\pdftexversion\undefined 
  \usepackage[dvips]{graphicx}
  \DeclareGraphicsExtensions{.eps,.ps}
%% Uncomment following line if you want PDF thumbnails
%  \usepackage[ps2pdf]{thumbpdf}
% for old hyperref, use:
  \usepackage[ps2pdf]{hyperref}
%% for recent hyperref, use:
%  \usepackage[ps2pdf,bookmarks=true,bookmarksnumbered=true,hypertexnames=false,breaklinks=true,%
%  colorlinks,linkcolor=blue,urlcolor=blue]{hyperref}

%%
%% else if document is processed with pdflatex
%%
\else                        
  \usepackage[pdftex]{graphicx} %% graphics for pdftex (supports .pdf .jpg .png)
  \usepackage{epstopdf}         %% requires epstopdf
%% this is to support .ps files :
  \makeatletter
  \g@addto@macro\Gin@extensions{,.ps}
  \@namedef{Gin@rule@.ps}#1{{pdf}{.pdf}{`ps2pdf #1}}
  \makeatother
%% comment above lines if you have included ps files
%\DeclareGraphicsExtensions{.pdf,.jpg,.png}
%% Uncomment following line if you want PDF thumbnails
%  \usepackage[pdftex]{thumbpdf}    
%% for old hyperref, use:
  \usepackage[ps2pdf]{hyperref}
% for recent hyperref, use:
%  \usepackage[pdftex,bookmarks=true,bookmarksnumbered=true,hypertexnames=false,breaklinks=true,%
%  colorlinks,linkcolor=blue,urlcolor=blue]{hyperref}
  \pdfadjustspacing=1
  \def\A#1#2{{#2}\footnote{#1}}
\fi

\let\fg=\color			% fg = foreground color
\let\Beg=\begin
\topmargin=-1cm
\raggedbottom
\oddsidemargin=0cm
\textwidth=17.5cm		% The default width is too small...
\textheight=23.5cm		% The default width is too small...
\arrayrulewidth=0.75pt\renewcommand{\arraystretch}{1.2}
\definecolor{DarkRed}{rgb}{0.5,0,0}
\definecolor{DarkBlue}{rgb}{0,0,0.5}
\definecolor{DarkGreen}{rgb}{0,0.5,0}
\definecolor{DarkPurple}{rgb}{0.3,0.1,0.5}
\definecolor{DarkGoldenrod}{rgb}{0.72,0.5,0.05}
\def\slash {{\fg{blue}/}}
\def\attr#1{{\tt{\fg{DarkRed}#1}}}
\def\requiredattr#1{{\sf\bf{\fg{DarkPurple}#1}}}
\def\elem#1{{\tt{\fg{DarkRed}#1}}}
\def\attrval#1#2{{\tt{\fg{DarkRed}#1}="{\fg{DarkPurple}#2}"}}
\def\elemdef#1#2{{\fg{blue}$<$}{\tt{\fg{DarkRed}#1}#2}{\fg{blue}$>$}}
\def\Aref#1#2{section~\ref{#1}}
\def\Arefx#1#2{appendix ~\ref{#1}}
\def\Tref#1#2{Table~\ref{#1}}
\def\Fref#1#2{Figure~\ref{#1}}
\def\value#1{{\tt"}{\fg{DarkPurple}#1}{\tt"}}
\def\order{$\oplus$ }
\def\unorder{{\large $\circ$ }}
\def\choice{{$\mapsto$ }}
%\def\inputverbatim#1{%
%\begingroup \catcode``=13 \@noligs \tt \let\do\@makeother \dospecials
%\obeylines \obeyspaces \frenchspacing \input #1 \endgroup }
\def\thickrule{\noindent\rule{\textwidth}{1pt}}
\def\Plain#1{{\sf #1}}
\begin{document}
\newenvironment{TABULAR}[2]{\begin{tabular}{#2}}{\end{tabular}}
\newenvironment{plain}{\begin{quote}}{\end{quote}}
%\def\plain#1{#1}
\fi
%%%%%%%%%%%%%%%%%%%%%%%%%% To allow same version tex / htx

%%
%%  Header of the document...
%%
% Provide a title for your document
\title{VOTable Format Definition}
% Give date and version number
\date{2004-01-30}

% Choose one document type from below
%\ivoatype{IVOA Note}
\ivoatype{IVOA Working Draft}
%\ivoatype{IVOA Proposed Recommendation}
%\ivoatype{IVOA Recommendation}

\version{1.091}
% Give author list: separate different authors with \\
% You can add email addresses with links \url{mailto:yourname@ivoa.net}
\def\name#1{\makebox[10em][l]{#1}}
\author{
\normalsize \name{Fran\c cois {\bf Ochsenbein}}
	\quad{\em Observatoire Astronomique de Strasbourg, France} \\
\normalsize \name{Roy {\bf Williams}}
	\quad{\em California Institute of Technology, USA} \\
\normalsize \hspace*{-0.75em}\name{{\em with contributions from:}}\\
\normalsize \name{Clive {\bf Davenhall}}
	\quad{\em University of Edinburgh, UK} \\
\normalsize \name{Daniel {\bf Durand}}
	\quad{\em Canadian Astronomy Data Centre, Canada} \\
\normalsize \name{Pierre {\bf Fernique}}
	\quad{\em Observatoire Astronomique de Strasbourg, France} \\
\normalsize \name{David {\bf Giaretta}}
	\quad{\em Rutherford Appleton Laboratory, UK} \\
\normalsize \name{Robert {\bf Hanisch}}
	\quad{\em Space Telescope Science Institute, USA} \\
\normalsize \name{Tom {\bf McGlynn}}
	\quad{\em NASA Goddard Space Flight Center, USA} \\
\normalsize \name{Alex {\bf Szalay}}
	\quad{\em Johns Hopkins University, USA} \\
\normalsize \name{Mark B. {\bf Taylor}}
	\quad{\em Physics, Bristol University, UK} \\
\normalsize \name{Andreas {\bf Wicenec}}
	\quad{\em European Southern Observatory, Germany} \\
}
\urlthisversion{\url{http://cdsweb.u-strasbg.fr/doc/VOTable/votable-1-1.htx}}
\urllastversion{\url{http://cdsweb.u-strasbg.fr/doc/VOTable/votable-1-0.htx}}
\previousversion{1.0 (2002-04-15)}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\begin{document}


\maketitle % print header in standard form
  
\section*{Abstract}
This document describes the standards adopted for the version 1.1 of
the VOTable format, and supersedes the previous version 1.0 of 15 April 2002.
The differences between versions 1.0 and 1.1 are summarized in 
\Aref{diff}{the last section}.

\noindent The main part of this document describes the adopted part of the
VOTable standard; it is followed by appendices presenting extensions
which have been proposed and/or discussed, but which are not part of 
the standard.

\section*{Status of this document}
This is an IVOA Proposed Recommendation for review by IVOA members and other
interested parties. It is a draft document and may be updated, replaced, or
obsoleted by other documents at any time. It is inappropriate to use IVOA
Working Drafts as reference materials or to cite them as other than 
``work in progress''. 
A list of current IVOA Recommendations and other technical documents
can be found at http://www.ivoa.net/Documents/.

\section*{Acknowledgments}
This document is based on the W3C documentation standards, but has been adapted
for the IVOA.


\tableofcontents

%\clearpage
\section{Introduction}

The VOTable format is a %proposed 
XML standard for representing a set
of tables. In this context, a table is an unordered set of rows, each of
a uniform format, as specified in the table {\em metadata}. Each row 
in a table is a
sequence of table cells, and each of these contains
either a primitive data type, or an array of such primitives. 
VOTable is derived from the
Astrores format [1], itself modeled on the FITS Table format [2];
VOTable was designed to be closer to the FITS Binary Table format.

\subsection{Why VOTable?}

Astronomers have always been at the forefront of developments in
information technology, and funding agencies across the world have
recognized this by supporting the Virtual Observatory movement, in
the hopes that other sciences and business can follow their lead in
making online data both {\it interoperable} and {\it scalable}.

VOTable is designed as a flexible storage and exchange format for
tabular data, with particular emphasis on astronomical tables.

Interoperability is encouraged through the use of standards (XML).
%because physical quantities are tagged not only with units, but also
%through a Uniform Content Descriptor (UCD) that expresses the nature
%of the quantity (eg. Gunn J magnitude, declination). The XML fabric
The XML fabric
allows applications to easily validate an input document, as well as
facilitating transformations through XSLT (eXtensible Style Language
Transformation) engines.

\subsubsection*{Grid Computing}

VOTable has built-in features for big-data and Grid computing. It
allows metadata and data to be stored separately, with the remote
data linked. % according to the Xlink model. 
Processes can then use
metadata to `get ready' for their input data, or to organize
third-party or parallel transfers of the data. Remote data allow the
metadata to be sent in email and referenced in documents without
pulling the whole dataset with it: just as we are used to the idea of
sending a pointer to a document (URL) in place of the document, so we
can now send metadata-rich pointers to data tables in place of the
tables themselves. The remote data is referenced with the URL syntax
{{\sf protocol://location}},
meaning that arbitrarily complex protocols are allowed.

When we are working with very large tables in a
distributed-computing environment (``the Grid"), the data
stream between processors, with flows being filtered, joined, and
cached in different geographic locations. It would be very difficult
if the number of rows of the table were required in the header --
we would need to stream in the whole table into a cache, compute the
number of rows, then stream it again for the computation. In the
Grid-data environment, the component in short supply is not the
computers, but rather these very large caches! Furthermore, these
remote data streams may be created dynamically by another process or
cached in temporary storage: for this reason VOTable can express that
remote data may not be available after a certain time (\attr{expires}).
Data on the net may require authentication for access, so VOTable
allows expression of password or other identity information (the
`{\attr{rights}}'
attribute).

\subsubsection*{Data Storage: Flexible and Efficient}

The data part in a  VOTable may be represented using one of three 
different formats: TABLEDATA, FITS and BINARY. TABLEDATA is a
pure XML format so that small tables can be easily handled in their
entirety by XML tools. The FITS binary table format is well-known to
astronomers, and VOTable can be used either to encapsulate such a
file, or to re-encode the metadata; unfortunately it is difficult to
stream FITS, since the dataset size is required in the header 
(NAXIS2 keyword), and FITS requires a specification up front of the maximum
size of its variable-length arrays. The BINARY format
is supported for efficiency and ease of programming: no FITS
library is required, and the streaming paradigm is supported.

We hope that VOTable can be used in different ways, as a data
storage and transport format, and also as a way to store metadata
alone (table structure only).  In the latter case, we can imagine a
VOTable structure being sent to a server, which can then open a
high-bandwidth connection to receive the actual data, using the
previously-digested structure as a way to interpret the stream of
bytes from the data socket. Alternatively, the metadata can be sent
alone as an implicit query to a server, which will respond with the
data part of the table filled in.

VOTable can be used for small numbers of small records (pure XML
tables), or for large numbers of simple records (streaming data), or
it can be used for small numbers of larger objects. In the latter
case, there will be software to spread large data blocks among
multiple processors on the Grid. Currently the most complex structure
that can be in a VOTable Cell is a multidimensional array. 


\iffalse
\subsubsection*{Future}
In future versions of the VOTable format, we expect to benefit
from both experience  and tool-building. Such tools include presentation
and transformations of the metadata (and data too, when it is in
XML), using XML transformation language and software: XSL and XSLT.
We would like to migrate to the more powerful document validation
provided by XSchema [8] rather than DTD -- a draft version of the
VOTable format in XSchema is included as \Aref{xsd}{Appendix}.

We also expect XSchema to allow better modularization of the
document schema, so that, for example, users might put whatever
serialized objects they wish into the table cells. In this way, we
expect to use VOTable for handling the flow of large data objects
through Grid resources, objects such as FITS files or XDF [7]
documents. Also, it would also mean, for example, that the description of a
table could contain arbitrary HTML instead of the current version --
plain text with paragraph markers; or that an XML definition of
non-standard astronomical coordinate systems could be seamlessly
integrated.

VOTable is derived from {\em Astrores}, which is specified not only as a way to
write a data table, but also as a way to specify how to address a
{\it request}  to data tables. We
expect to sharpen and formalize this dichotomy with the benefit of
experience, building into VOTable the ways of making
sophisticated querying mechanisms and protocols.

We expect to add features for efficiency in the future also: to
specify that the data stream has a particular sort order, to specify
that a column in one table is a key in to another table; to specify
that one table is an index into another. The binary format will be 
extended to facilitate large-scale streaming.
\fi

\subsection{XML Conventions}

VOTable is constructed with \A{http://www.w3.org/XML/}{XML} (extensible Markup Language), a
powerful standard for structured data throughout the Internet
industries. It derives %through simplification 
from SGML, %which has been 
a standard used in the publishing industry and for 
technical documentation for many years. XML
consists of {\it elements} and payload, where an element consists of
a {\it start tag} (the part in angle brackets), the payload, and an
{\it end tag} (with angle brackets and a slash). Elements can
contain other elements. Elements can also bear
{\attr{attributes}}
(keyword-value combinations).
%, such as the {\elem{PARAM}} elements above. 


The payload may be in two forms: parsed or unparsed character
data. Examples are:

\begin{verbatim}
<text>Fran&#231;ois</text>
<text><![CDATA[ a <= (b & c) ]]></text>
\end{verbatim}

In the first example, the sequence {\tt \&\#231;} is interpreted as
part of the ISO/IEC 10646 character set, and translates to an
accented character, so that the text is ``Fran\c{c}ois".
The second example uses the special {\tt CDATA} sequence so that the
characters {\tt <}, {\tt >}, and {\tt\&} can be used without interpretation;
in this case, any ASCII characters are allowed except the terminating
sequence {\tt]]>} For more information, see any book on
XML.


\subsection{Syntax policy}

Following the general XML rule, element and attribute names are
case-sensitive and have to be used with the specified 
capitalisation. For VOTable, we have adopted the convention that
element names are spelled in uppercase
and attribute names in lowercase (with an
exception for the {\attr{ID}}
attribute). 
Element and attribute names are further distinguished in
this paper by being shown in a {{\tt fixed-width}} font.

\section{Data Model}

In this section we define the data model of a VOTable, and in the
next sections its syntax when expressed as XML. The data model of
VOTable can be expressed as:    

\medskip
\begin{tabular}{rrcp{0.7\textwidth}}
\hspace{3em}&{\bf VOTable} &=& hierarchy of {\bf Metadata} + associated
	{\bf TableData}\\
&{\bf Metadata} &=& {\bf Parameters} + {\bf Infos} + {\bf Descriptions}
                + {\bf {\fg{black}Links + Fields + Groups}}\\
&{\bf Table} &=& list of {\bf Fields + TableData}\\
&{\bf TableData} &=& stream of {\bf Rows}\\
&{\bf Row} &=& list of {\bf Cells}\\
&{\bf Cell} &=& 
	$\left\{
        \begin{tabular}{l}
	 {\bf Primitive} \\
        or variable-length list of {\bf Primitives} \\
        or multidimensional array of {\bf Primitives}\\
        \end{tabular}
	\right.$
	\\
&{\bf Primitive} &=& integer, character, float, floatComplex, etc
(see \Tref{primitives}{table of primitives} below).
\end{tabular}

\medskip
\par\noindent
Metadata is divided into that which concerns the table itself 
(parameters), and the definitions of the fields (or column
attributes) of the table. 
Each \elem{FIELD} represents the metadata 
that can be found at the
top of the column in a paper version of the table: 
in the example introduced in \Aref{example1}{the
section} below, the first \elem{FIELD} has its \attr{name} attribute
set to \value{RA}. The Field can be thought of as a class definition,
and the table cells below it are the instances of that class.

A parameter ({\elem{PARAM}})
is similar to a {\elem{FIELD}},
except that it has a \attr{value} attribute.
Parameters can be seen as ``constant columns'', containing for instance
FITS keywords or any other
information pertaining to the table itself or its environment, as the
{\tt Epoch} parameter in the above example.

\label{elem:INFO}
An informative parameter ({\elem{INFO}})
is a restricted form of the {\elem{PARAM}} --  it has only the
\attr{name} / \attr{value} pair of attributes.

The ordered list of Fields at the top of the table thus provides a
template for a Row object (also called a {\it record}). The
template allows interpretation of the data in the Row. In VOTable,
there is no advance specification of the number of rows in the table:
this is to allow streaming of large tables, as discussed above. The
record is a set of Cells, with the number of Cells the same for each
Row, and the same as the number of Fields defined in the Metadata.

From Version 1.1, columns may be logically grouped, so that it is
possible to define table substructures made of column associations.
Such an association is declared as a \elem{GROUP}, which typically
contains columns (\elem{FIELD}) and associated parameters (\elem{PARAM}).

\subsection{Primitives}

\ifhtx\label{primitives}
\begin{center}\Beg{tabular}{CELLSPACING=4 CELLPADDING=4}{|rlrl|}
\else
\begin{table}[hbt]
 \begin{center}\begin{tabular}{|r|l|c|r|}
\fi\hline
  {\attr{datatype}} & Meaning & \attr{FITS} &
      { Bytes} \\
 \hline
 \value{boolean}      & Logical 	&\value{L}& 1  \\
 \value{bit}          & Bit  		&\value{X}& *  \\
 \value{unsignedByte} & Byte (0 to 255)	&\value{B}& 1  \\
 \value{short}        & Short Integer 	&\value{I}& 2  \\
 \value{int}          & Integer 	&\value{J}& 4  \\
 \value{long}         & Long integer 	&\value{K}& 8  \\
 \value{char}         & ASCII Character &\value{A}& 1  \\
 \value{unicodeChar}  & Unicode Character&        & 2 \\
 \value{float}        & Floating point 	&\value{E}& 4  \\
 \value{double}       & Double 		&\value{D}& 8  \\
 \value{floatComplex} & Float Complex 	&\value{C}& 8  \\
 \value{doubleComplex}& Double Complex  &\value{M}& 16 \\
 %logical & 1 \\
 %bit & * \\
 %byte & 1\\
 %short & 2 \\
 %int & 4 \\
 %long & 8 \\
 %char & 1 \\
 %unicodeChar & 2 \\
 %float & 4 \\
 %double & 8 \\
 %floatComplex & 8 \\
 %doubleComplex & 16 \\
\hline\end{tabular}\end{center}
\ifhtx\par
\else\caption{\label{primitives}List of the Primitives
{\em(details in \Aref{sec:datatypes}{})}}\end{table}
\fi

Each Cell is composed from Primitives, each of which is a datatype
of fixed-length binary representation, as listed in 
\Tref{primitives}{the accompanying table}. %(detailed in 
%\Aref{sec:datatypes}{the section below}).
Cells may consist of a single Primitive (this is
the default), or of a multidimensional array of Primitives (see
\Aref{array}{the next section}).

Except for the Bit type, each primitive has the fixed length in
bytes given in \Tref{primitives}{the table}. 
Bit scalars and arrays are stored in
the minimum number of bytes feasible (so that $b$ bits take the integer
part of $(b+7)/8$ bytes). %It is this fixed size that allows efficiency
%in storage, so that the memory used is minimized. 
These primitives
are described in more detail in \Aref{sec:datatypes}{section 7}.

VOTables support two kinds of characters: ASCII 1-byte characters
and Unicode 2-byte characters. Unicode is a way to represent
characters that is an alternative to ASCII. It uses two bytes per
character instead of one, it is strongly supported by XML tools, and
it can handle a large variety of international alphabets. Therefore
VOTable supports not only ASCII strings ({\attrval{datatype}{char}}),
but also Unicode ({\attrval{datatype}{unicodeChar}}).

Note that strings are not a primitive type: strings are 
represented in VOTable as an array of characters. %in an characters are.
%In VOTable, characters are Primitives, either one byte for an
%ASCII character or two bytes for a Unicode character. 


\subsection{Multidimensional Arrays}\label{array}
\label{sec:dim}

A table cell can contain an array of a given primitive type. The
array is specified by a sequence of dimensions, with the first
dimension changing fastest, and the last dimension that may be variable
in length. For example, the following \elem{FIELD} definition
declares a table cell which may contain a set of up to 10 images,
each 64x64 bytes:

\elemdef{FIELD}{ \attrval{ID}{thumbs} \attrval{datatype}{unsignedByte} 
  \attrval{arraysize}{64x64x10*}\slash}

The string in the \attr{arraysize} attribute expressed these
dimensions, each integer separated by the {\tt x} character,
except the last. The last (slowest-varying) subscript of a
multidimensional array may have variable length, meaning that the
dimensionality of the final subscript may be different for different
rows of the table. In this case, there may be just an asterisk, in
which case the array may be arbitarily large; or a number followed by
an asterisk, meaning that this subscript is guaranteed not to exceed
this value.

\iffalse
Variable-length arrays are more efficient in storage and data
transfer, but less efficient computationally, because extra pointer
dereferencing is required to access the data. 
%The reason that only
%the slowest-varying subscript can be variable is effectively a demand
%that the data provider pack the data into a small number of
%fixed-size containers as much as is possible, rather than a large
%number of small containers.
\fi

Strings can therefore be represented in VOTable as a fixed- or variable-length 
array of characters:

\elemdef{FIELD}{ \attrval{name}{unboundedString} \attrval{datatype}{char}
       \attrval{arraysize}{*}\slash}
%\begin{verbatim}
%<FIELD ID="unboundedString" datatype="char" arraysize="*"/>
%\end{verbatim}

A 1D array of strings can be represented as a 2D array of characters, but
given the logic above, it is possible to define a variable-length array
of fixed-length strings,
but not a fixed-length array of variable-length strings.
A convention to express an array of variable-length strings was
proposed (see \Aref{sec:arraystring}{in the appendix}) but is not
part of this standard.

\subsection{Compatibility with FITS Binary Tables}

VOTable is closely compatible with the FITS Binary Table format.
Henceforth, we shall abbreviate ``FITS Binary Table  and its
Conventions" simply by the word ``FITS". Given a FITS
file that represents a binary table, the header may be converted to
VOTable, with a pointer to the original file, or with the original
file included directly in VOTable. Since the original file is still
present, it is clear that no data has been lost. A {\elem{PARAM}}
element can be used to hold any FITS keyword with its value
and comment string.

We might ask two more significant questions, about how much of
the FITS header and data can be represented in VOTable. The answer is
that there is considerable overlap. 

For instance, the recommended formatting of the data for an
edition of the data is expressed by the non-mandatory TDISP keyword:
%FITS has semantics for how data is to be
%represented when printed, expressed by the non-mandatory TDISP keyword: 
for example F12.4 means 12 characters are to be used, and 4 decimal
places. This has been converted in VOTable as the attributes {\attr{width}}
and {\attr{precision}}
which, connected with {\bf {\attr{datatype}}},
are semantically identical to the TDISP keyword. 
\iffalse
Note that error estimation and the number
of digits to print are rather different semantically.
\fi

\subsubsection*{What can FITS do but not VOTable?}

FITS has a complex semantics (the ``Substring
Array" convention) for structuring a single string as a
collection of substrings, and VOTable 1.1 does not support this
{\em(see however the \Aref{sec:arraystring}{appendix} which
proposes  a compatible VOTable definition)}. 
The current version of VOTable allows fixed and variable-length strings, 
as well as variable-length arrays of fixed length strings.

\subsubsection*{What can VOTable do but not FITS?}

VOTable supports separating of data from metadata and the
streaming of tables, and other ideas from modern distributed
computing. It bridges two ways to express structured data: XML and
FITS. It tries (through the UCD -- see \Aref{sec:ucd}{below}) 
to express formally the semantic
content of a parameter or field. It has the hierarchy and flexibility
of XML: using \elem{GROUP} elements introduced in version 1.1, 
a VOTable can represent 
structures of arbitrary complexity; and the ID attribute can be used in XML 
to enable what are essentially pointers.

FITS does not handle Unicode (extended alphabet) characters.

\bigskip

\noindent{\fg{black}It should be noticed that the transformation
of FITS to VOTable is meant to be reversible: 
any FITS table can be converted to a VOTable without loss of
information and the resulting VOTable can be converted back to a
FITS table also without loss of information.
However, it is
possible to create new VOTables which cannot be converted to FITS
tables without loss of information.
\iffalse
the conversion of a FITS file into VOTable does not lose any information, and a
transformation back into FITS is possible. It will however not be
possible to transform any VOTable into a FITS file without losing
some information.
\fi}


\section{The VOTable Document Structure}
\label{elem:VOTABLE}

The overall VOTable document structure is described and controlled
by its \A{http://vizier.u-strasbg.fr/xml/VOTable-1.1.xsd}{XML Schema} 
referenced at its top. % of the document.
%The XML Schema corresponding to this version of VOTable 
%is accessible from
%http://vizier.u-strasbg.fr/cgi-bin/VizieR
%however hardly human-readable, and is replaced in this document
%by its equivalent
This schema actually represents the VOTable definition, which means
that documents claiming to represent VOTables should pass through
W3C XML Schema validators without error.
An illustration of the XML Schema is given in \Aref{dtd}{section 7}.

An example %(presented in \Aref{example1}{below}) 
is used here to illustrate the components of a VOTable document
described in the following sections.
Basically, 
a VOTable document consists of a single all-containing element
called {\elem{VOTABLE}},
which contains descriptive elements ({\elem{DESCRIPTION}},
\elem{DEFINITIONS}, \elem{INFO}),
followed by one or more {\elem{RESOURCE}} elements.
Each Resource element contains one or more \elem{TABLE} elements,
and possibly other \elem{RESOURCE} elements. 

The \elem{TABLE} element, the actual heart of VOTable, contains 
a description of the columns and parameters 
(described in \Aref{sec:field}{the next section})
followed by the data values 
(described in \Aref{sec:data}{the following section}).

\subsection{Example}

This simple example of a VOTable document lists 3 galaxies with their
velocity with its error, and the estimated distance.

%
\label{example1}
\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}
\else\begingroup\small
\fi
\begin{verbatim}
<?xml version="1.0"?>
<VOTABLE version="1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 xsi:noNamespaceSchemaLocation="http://vizier.u-strasbg.fr/xml/VOTable.xsd">
  <DEFINITIONS>
  <COOSYS ID="J2000" equinox="2000." epoch="2000." system="eq_FK5"/>
  </DEFINITIONS>
  <RESOURCE name="myFavouriteGalaxies">
    <TABLE name="results">
      <DESCRIPTION>Velocities and Distance estimations</DESCRIPTION>
      <PARAM name="Epoch" datatype="float" ucd="TIME_EPOCH" 
             value="2003.875/">
      <FIELD name="RA"   ID="col1" ucd="POS_EQ_RA_MAIN" ref="J2000" datatype="float"
             width="6" precision="2" unit="deg"/>
      <FIELD name="Dec"  ID="col2" "POS_EQ_DEC_MAIN" ref="J2000" datatype="float"
             width="6" precision="2" unit="deg"/>
      <FIELD name="Name" ID="col3" ucd="ID_MAIN" datatype="char" arraysize="8*"/>
      <FIELD name="RVel" ID="col4" ucd="VELOC_HC" datatype="int"
             width="5" unit="km/s"/>
      <FIELD name="e_RVel" ID="col5" ucd="ERROR" datatype="int"
             width="3" unit="km/s"/>
      <FIELD name="R" ID="col6" ucd="PHYS_DISTANCE_TRUE" datatype="float"
             width="4" precision="1" unit="Mpc">
        <DESCRIPTION>Distance of Galaxy, assuming H=75km/s/Mpc</DESCRIPTION>
      </FIELD>
      <DATA>
        <TABLEDATA>
        <TR>
          <TD>010.68</TD><TD>+41.27</TD><TD>N  224</TD><TD>-297</TD><TD>5</TD><TD>0.7</TD>
        </TR>
        <TR>
          <TD>287.43</TD><TD>-63.85</TD><TD>N 6744</TD><TD>839</TD><TD>6</TD><TD>10.4</TD>
        </TR>
        <TR>
          <TD>023.48</TD><TD>+30.66</TD><TD>N  598</TD><TD>-182</TD><TD>3</TD><TD>0.7</TD>
        </TR>
        </TABLEDATA>
      </DATA>
    </TABLE>
  </RESOURCE>
</VOTABLE>
\end{verbatim}
\ifhtx\End{TABULAR}
\else
%\caption{\label{example1}A simple VOTable example}
\endgroup
\fi

This simple \elem{VOTable} document shows a single \elem{RESOURCE} made of a single \elem{TABLE};
the table is made of 6 columns, each described by a \elem{FIELD}, and has
one additional \elem{PARAM} parameter (the Epoch). The actual rows are
listed in the \elem{DATA} part of the table, here in  XML format 
(introduced by \elem{TABLEDATA}); each cell is marked by the \elem{TD} element, 
and follow the same order as their \elem{FIELD} description:
{\sl RA, Dec, Name, RVel, e\_RVel, R}.

\subsection{ID and name attributes}
\label{sec:name}

Most of the elements defined by VOTable may or have to bear {\em names},
like a \elem{RESOURCE}, a \elem{TABLE}, a \elem{PARAM} or a \elem{FIELD}.
Naming an element is generally possible by means of one of or
both \attr{ID} and \attr{name} attributes.

{\attr{ID}} and {\attr{name}} attributes have a different role in 
VOTable: the ID is meant as a {\em unique identifier} of an element
seen as a VOTable component, 
while the name is meant for presentation purposes, and need
not to be unique throughout the VOTable document.
%The value of each of \attr{ID} and \attr{name} attributes defaults
%to the other value.

\iffalse
defaults to the same as the \elem{ID} value if not present.
The ID's of the fields and parameters {\em must be unique throughout the
document} -- this is part of the XML specification, and we intend that
eventually ID's can be used to tie together data sources and
applications that read and write them. The name of an object, when
not present, defaults to the ID value, and when it is present is
intended for presentation to humans.
\fi

The {\attr{ID}} attribute %(as defined by Xpointer standard) 
is therefore required in the elements which {\em have to be referenced},
but in principle any element may have an {\attr{ID}} attribute.
According to the XML standard, the attribute {\attr{ID}}
is a string beginning with a letter or underscore ({\tt{\_}}),
followed by a sequence of letters, digits, or any of the
punctuation characters {\tt.} (dot), {\tt-} (dash), {\tt\_} (underscore),
or {\tt:} (colon).
%and each ID {\it must be unique} in the XML document. For example
%{\attrval{ref}{apple}}
%refers to the element that contains {\attrval{ID}{apple}}
%in the current XML document. The {\attr{ID}} attribute is required
%for the elements which have to be referenced, but in principle any element 
%may have an {\attr{ID}} attribute.
%Elements that support the {\attr{ref}}
%attribute (and can point to those with ID) are: {\elem{FIELD}},
%{\elem{PARAM}}, and {\elem{TABLE}}.

In summary, 
the {\attr{ID}} is different from the {\attr{name}}
attribute in that (a) the ID attribute is made from a restricted character
set, and must be unique throughout a VOTable document 
%(or else the document is considered invalid in the XML sense), 
whereas names are standard XML attributes and need not be unique; 
and (b) there should be support in the parsing
software to look up references and extract the relevant element with
matching ID. 


\subsection{DEFINITIONS element}

This element may contain a definition of a coordinate system,
stored in a {\elem{COOSYS}}
element. The \elem{COOSYS} element
provides attributes for equinox and epoch, as well as a
specification of  the celestial coordinate system. 
The \elem{COOSYS} element being the only astronomy specific part 
of VOTable, it may be deprecated in the  future,
as it is expected that a more formal structuring of the coordinate system 
will be designed, which would encompass conventions used in space science
or solar physics. Its current definition is given below.

The \elem{DEFINITIONS} element may also include one or more {\elem{PARAM}}
elements (\Aref{sec:field}{section 4}) 
that may contain user-specific data. Each of these may have
an \attr{ID} attribute, that can be referenced with the \attr{ref} attribute 
of other elements. 

\subsubsection*{The \elem{COOSYS} element}
\label{elem:COOSYS}
This element defines a celestial coordinate system, to which the
components of a position on the celestial sphere refer.
It has an \attr{ID} attribute --- required if the
\elem{COOSYS} element has to be referred via the \attr{ref} attribute
of the position components, which is generally the case ---
a  \attr{system} attribute which specifies the coordinate system
among \value{ICRS}, \value{eq\_FK5},  \value{eq\_FK4}, \value{ecl\_FK4},
    \value{ecl\_FK5}, \value{galactic}, \value{supergalactic},
    \value{barycentric}, \value{geo\_app} and a user-defined \value{xy}
    value. \attr{equinox} is the parameter required to fix the 
    equatorial or ecliptic systems (as e.g. \value{J2000} as
    the default \value{eq\_FK5} or \value{B1950} as the default 
    \value{eq\_FK4}), and \attr{epoch} specifies the epoch of the positions
    if necessary.

As mentioned above, the \elem{COOSYS} may be deprecated in the  future
in favor of a more generic way of describing the conventions used to define
the positions.

\subsection{RESOURCE element}
\label{sec:resource}
\label{elem:RESOURCE}

A VOTable document contains one or more {\elem{RESOURCE}}
elements, each of these providing a description and the
data values of some logically independent data structure.


Each \elem{RESOURCE} may include the descriptive elements {\elem{DESCRIPTION}}, 
{\elem{INFO}}, {\elem{COOSYS}} and {\elem{PARAM}};
it may also contain {\elem{LINK}}
elements to provide URL-type pointers that give further information.

The main component of a \elem{RESOURCE} is typically one or more \elem{TABLE}
elements -- in other terms a \elem{RESOURCE} is basically a set
of related tables. The \elem{RESOURCE} is recursive (it can contain other
\elem{RESOURCE} elements), which means that the set of tables making up
a \elem{RESOURCE} may become a complex structure.

A \elem{RESOURCE} may have one or both of the \attr{name} or \attr{ID}
attributes (see \Aref{sec:name}{above}); it may also be qualified by
\attrval{type}{meta}, meaning that the resource is {\em descriptive}
only (does not contain any actual data in any of its sub-elements).

%\section{5 Parameters and Hierarchy}


%The VOTable document is a hierarchy of {\elem{RESOURCE}}
%elements, each of which can contain other {\elem{RESOURCE}}s.
%The root of the tree is the single {\elem{VOTABLE}}
%element that constitutes the entire XML document, and the leaves of
%the tree are {\elem{TABLE}}s.
%The resource elements may also contain parameters ({\elem{PARAM}}).

\iffalse
The {\elem{RESOURCE}} may also contain contain other {\elem{RESOURCE}}s:
the following is a complete VOTable which contains no tables, only
a hierarchy of parameters.

\begin{verbatim}
<?xml version="1.0"?>
<!DOCTYPE VOTABLE SYSTEM "http://ivoa.net/xml/VOTable.dtd">
<VOTABLE version="1.1">
  <RESOURCE ID="Stars">
    <PARAM ID="Mass" datatype="float" unit="solMass" value="1"/>
    <RESOURCE ID="BigStars">
      <PARAM ID="Mass-big" datatype="float" unit="solMass" value="10"/>
    </RESOURCE>
    <RESOURCE ID="SmallStars">
      <PARAM ID="Mass-small" datatype="float" unit="solMass" value="0.2"/>
      <RESOURCE ID="VerySmallStars">
        <PARAM ID="Mass-tiny" datatype="float" unit="solMass" value="0.05"/>
      </RESOURCE>
    </RESOURCE>
  </RESOURCE>
</VOTABLE>
\end{verbatim}

The main ingredient of the {\elem{RESOURCE}}
element is one or more {\elem{TABLE}}s.
These are described in \Aref{sec:field}{section 5} of this document.
\fi

\subsection{LINK element}
\label{sec:link}
\label{elem:LINK}

The {\elem{LINK}} element is to provide pointers to other documents 
or data servers on the Internet through a URL. In VOTable, the {\elem{LINK}}
element may be part of a {\elem{RESOURCE}},
{\elem{TABLE}}, \elem{GROUP} or {\elem{FIELD}} elements. The {\attr{href}}
attribute of the {\elem{LINK}} element can comprise any arbitrary protocol,
for example \value{http://server/file} or \value{bizarre://server/file}.
VOTable parsers are not required to understand arbitrary protocols,
but are required to understand the following three common protocols:
\value{file:}, \value{http:} and \value{ftp:}.
\iffalse	%%% OLD
is meant to provide a URL that is at least valid syntactically, even
though there need be no assurance that the link will actually connect
and deliver data. It may be that a strange protocol is implied that
the parser does not know about, for example {\value{httpg://server/file}}.
However, parsers are expected to understand at least the {\value{file}},
{\value{http}} and {\value{ftp}} protocols.
\fi		%%% OLD

The {\attr{gref}}
attribute is meant for a higher-level protocol of some type, perhaps
a logical name for a data resource, perhaps a GLU reference [5].

In the Astrores format, from which VOTable is derived, 
there is additional semantics for the {\elem{LINK}}
element; the \elem{href} attribute is used as a template for creating
URL's. This behavior is explained in \Arefx{LINK}{Appendix A}, and it represents
%a further proposal, 
a possible extension of VOTable.

In addition to the referencing \attr{href} and \attr{gref} attributes
and to the naming \attr{name} and \attr{ID} attributes 
(see \Aref{sec:name}{name and ID}), the \elem{LINK} element
may announce the mime type of the data it references 
with a \attr{content-type}  attribute (e.g. \attrval{content-type}{image/fits}),
and specify the role of the link by a \attr{content-role} attribute
(e.g. \attrval{content-role}{doc} for an access to a documentation).

\subsection{TABLE element}
\label{elem:TABLE}

The \elem{TABLE} element represents the basic data structure in VOTable;
it is made of a description of the table structure (the {\em metadata})
essentially in the form of \elem{PARAM} and \elem{FIELD} elements
(detailed in \Aref{sec:field}{the next section}),
followed by the {\em values} of the described fields in a \elem{DATA}
element (detailed in \Aref{sec:data}{the section below}).

The \elem{TABLE} element is always contained in a \elem{RESOURCE} element:
in other terms
any \elem{TABLE} element has a single father made of the 
\elem{RESOURCE} element
in which the table is embedded. 

The \elem{TABLE} element contains 
a {\elem{DESCRIPTION}} element for descriptive remarks, followed
by a mixed collection of \elem{PARAM}, \elem{FIELD} or \elem{GROUP} elements
which describe a parameter (constant column), a field (column) or a group of
columns respectively. \elem{PARAM} and \elem{FIELD} elements are detailed in 
\Aref{sec:field}{the next section}, and the \elem{GROUP} element
%introduced in Version 1.1
is presented in \Aref{sec:group}{the following section}.

Furthermore the \elem{TABLE} element may contain {\elem{LINK}} elements
that provide URL-type pointers, exactly like the {\elem{LINK}} elements 
existing within a \elem{RESOURCE} element (see \Aref{sec:link}{above}).

The last element included in a \elem{TABLE} is the optional \elem{DATA} 
element (see \Aref{sec:data}{below}): a table without any
actual data is quite valid, and is typically used to supply a complete
description of an existing resource e.g. for query purposes.

The \elem{TABLE} element may have the naming attributes \attr{name} and/or 
\attr{ID} (see \Aref{sec:name}{name and ID conventions}). A \elem{TABLE}
may also have a \attr{ref} attribute referencing the ID of another
table previously described, which is interpreted as
{\em defining a table having a structure identical to the one referenced}:
this facility avoids a repetition of the definition of tables which
may be present many times in a VOTable document.

\iffalse
\section{XML}

VOTable is constructed with XML (extensible Markup Language), a
powerful standard for structured data throughout the Internet
industries. It derives %through simplification 
from SGML, %which has been 
a standard used in the publishing industry and for 
technical documentation for many years. XML
consists of {\it elements} and payload, where an element consists of
a {\it start tag} (the part in angle brackets), the payload, and an
{\it end tag} (with angle brackets and a slash). Elements can
contain other elements. Elements can also bear
{\attr{attributes}}
(keyword-value combinations), such as the {\elem{PARAM}}
elements above. 


The payload may be in two forms: parsed or unparsed character
data. Examples are:

\begin{verbatim}
<text>Fran&#231;ois</text>
<text><![CDATA[ a <= (b & c) ]]></text>
\end{verbatim}

In the first example, the sequence {\tt \&\#231;} is interpreted as
part of the ISO/IEC 10646 character set, and translates to an
accented character, so that the text is ``Fran\c{c}ois".
The second example uses the special {\tt CDATA} sequence so that the
characters {\tt <}, {\tt >}, and {\tt\&} can be used without interpretation;
in this case, any ASCII characters are allowed except the terminating
sequence {\tt]]>} For more information, see any book on
XML.

\subsection{Lists}

Within a table cell, multiple Primitives can be formatted by
separating them by whitespace, to express
compound primitives (complex numbers) and multidimensional arrays.
Text tokens are separated by contiguous whitespace (ASCII space {\tt{0x20}},
tab {\tt{0x9}}, carriage-return {\tt{0xD}}, newline {\tt{0xA}},
vertical tab {\tt{0xB}}).
There are no null tokens, comments, quote characters, or separators.
However, it is possible to include special characters through an
escape mechanism as in HTML: for example the string {\tt New York}
would be encoded as {\tt New\&nbsp;York}. 

Thus a table cell that contains an array of three complex numbers
could be represented as:

\begin{verbatim}
<TD>1.0 0.0   -0.5 0.866   -0.5 -0.866</TD>
\end{verbatim}

However, it should be noted that in a character array (a string) no
space is needed to separate each element (a character).

\subsection{Syntax policy}

Following the general XML rule, element and attribute names are
case-sensitive and have to be used with the specified 
capitalisation. For VOTable, we have adopted the convention that
element names should be in uppercase
and attribute names in lowercase (with an
exception for the {\attr{ID}}
attribute). Element and attribute names are further distinguished in
this paper by being in {{\tt fixed-width}} font.

\subsection{Xlink and STREAM}

The \elem{STREAM} element is used to point to remote table data, and as
such it closely follows the W3C specification called ``Xlink".
The {\elem{STREAM}}
implements the interface defined by Xlink; in particular it is an
Xlink with {\attrval{type}{locator}}.
However, {\elem{STREAM}}
has more attributes than Xlink allows for: a {\attr{rights}}
attribute for authentication information; and {\attr{expires}}
attribute for when the link may cease to be valid; and an {\attr{encoding}}
attribute if the data is filtered, (for example compression or
binary-to-ascii filtering). Therefore we will wait until a future
release to formalize the relationship between Xlink and {\elem{STREAM}}.


\subsection{Location of the DTD}

A VOTable document, like all XML documents, should be {\it well-formed}, meaning
that it obeys the syntax rules of XML: for example, elements should start and
end properly and be properly nested. The document may be further constrained to
be {\it valid}, meaning that it follows the VOTable syntax rules, as defined in
the DTD of \Aref{dtd}: for example the data type ``float" is valid, but not the
datatype ``real". Access to the DTD is necessary to check validity. Valid XML
documents may employ certain advanced features of XML, features that can
significantly improve the usability of a document, including: linking
mechanisms, entities and attributes. Valid XML documents offer much more to the
document process than those that may be simply well-formed. Document authoring,
processing, storage and display are made easier because documents exist in a
structured environment. Authors create documents against a pre-defined structure
and benefit from a clear document model.

There are three ways to give the document access to its DTD structure: by
embedding the DTD directly into the XML file, or by referencing a local file, or
by referencing a remote file. We should point out that many parsers will simply
stop if the DTD reference cannot be resolved, rather than falling back to a
non-validated document. Any XML book will explain the syntax of these options.

\begin {itemize}

\item Embedding the DTD means that the document is longer, since it includes
all the text of the DTD (\Aref{dtd}{below}). 
It also means that the document is more
portable, since it does not rely on external references that may not be
available. However, the receiver of such a document does not know that it is a
proper VOTable, since the DTD may be different from the official version. 

\item If the DTD is a local file, it means that the document processing still
works even when the machine is disconnected from the internet. However, when the
XML file is moved to a new directory or sent by email, the DTD must move with
it.

\item Referencing a DTD at a standard place on the internet makes the XML
document portable, and files can be moved at will. However, the document
processing will fail when not connected to the internet. We expect the VOTable
DTD to be mirrored at several sites, including:

\begin{tabular}{l}
{\bf\fg{DarkRed}\Plain{http://ivoa.net/xml/VOTable.dtd}}\\
{\bf\fg{DarkRed}\Plain{http://cdsweb.u-strasbg.fr/xml/VOTable.dtd}}\\
\end{tabular}
\end{itemize}
\fi

\section{FIELDs and PARAMeters}
\label{sec:field}

The atoms of the table structure are represented by \elem{FIELD} and
\elem{PARAM} elements, where \elem{FIELD} represents the description
of an actual table column, while \elem{PARAM} supplies a value
which remains constant over the whole table, like the \attr{Epoch}
in \Aref{example1}{the example}. A \elem{PARAM} may therefore be
viewed as a \elem{FIELD} which keeps a {\em constant value} over all
the rows of a table, and the only difference between the two elements
is the existence of a \attr{value} attribute in a \elem{PARAM}
which does not exist in a \elem{FIELD}.

A {\elem{FIELD}} or \elem{PARAM} element may have several sub-elements, 
including the informational {\elem{DESCRIPTION}}
and {\elem{LINK}} elements; it may also include a {\elem{VALUES}} element
that can express limits and ranges of the values that the
corresponding cell can contain, such as minimum (\elem{MIN}), 
maximum (\elem{MAX}), or
enumeration of possible values (\elem{OPTION}). 

\subsection{FIELD attributes}
\label{elem:FIELD}
\label{elem:PARAM}
The valid attributes of a \elem{FIELD} or \elem{PARAM} are:

\begin{itemize}
\item	the \attr{name} and/or \attr{ID}. The \attr{ID} attribute is required
	if the field has to be referenced (see 
	\Aref{sec:name}{the generic ID rule}).
	It may help to include the ordinal number of 
	the column in the table in the value of the \attr{ID} attribute
	as e.g. \attrval{ID}{col3} when a single table is involved: 
	the connection to the
	corresponding column would become
	more obvious, especially in the FITS data serialization
	which uses the ordinal column number in the keywords containing
	the metadata related to that column. 

\item	the \attr{datatype}, which expresses the nature of the data 
	that is described as one of the permitted primitives 
	(see \Tref{primitives}{table above} and their exact meaning 
	in \Aref{sec:datatypes}{section 7}).
	This attribute determines
	how data are read and stored internally;
	it is {\em required}, except when the \attr{ref} attributes exists
	in which case the \elem{FIELD} is just referenced 
	(see \Aref{sec:group}{the GROUP definitions})

\item	the \attr{arraysize} attribute exists when 
	the corresponding table cell contains more than one of the specified
	datatype, as explained \Aref{sec:dim}{above}.
	Note that strings are are not a primitive type,
	and have to be described as an array of characters.

\item	{\fg{black}}the \attr{width} and \attr{precision} attributes define the 
	numerical accuracy associated to the data (see \Aref{sec:form}{below})

\item	the \attr{unit} attribute specifies the units in which
	the values of the corresponding column are expressed
	(see \Aref{sec:unit}{below})

\item	the \attr{ucd} attribute supplies a standardized classification
	of the physical quantity expressed in the column
	(see \Aref{sec:ucd}{below}).

\item	the \attr{utype} attribute, introduced in VOTable 1.1, is meant
	to express the role of the column in the context of an external
	data model (see \Aref{sec:utype}{below}).

\item	the \attr{ref} attribute defines the field as being a {\em reference}
	to a column having the referenced \attr{ID} attribute. This attribute
	normally exists alone: if present, it precludes the existence
	of any other attribute except a \attr{utype} attribute,
	and a \attr{value} attribute for \elem{PARAM} elements.

\item	The \attr{type} attribute is {\em not} part of this standard,
	but is reserved for future extensions.
	In Astrores it was used to
	express some pecularities of the column in the table
	as \attrval{type}{hidden} (see \Arefx{query}{link substitutions})
	and \attrval{type}{no\_query} 
	(see \Arefx{query}{Query Extension});
	an additional \attrval{type}{location} value is proposed
	to express columns containing parts of URIs 
	(see \Arefx{location}{fields as pointers}).
	The \attr{type} is not part of this standard, 
	but is reserved for future extensions.

\end{itemize}

In addition, in the \elem{PARAM} element only:
\begin{itemize}
\item	the \attr{value} attribute which exists only in the \elem{PARAM}
	element; this attribute is moreover {\em required}, even 
	when the \elem{PARAM} contains the \attr{ref} attribute.
\end{itemize}

\subsection{Numerical Accuracy}
\label{sec:form}


The VOTable format is meant for transferring, storing, and
processing tabular data, and is not intended for presentation
purposes: therefore (in contrast to Astrores) we generally avoid
giving rules on presentation, such as formatting. 
Inevitably however some at least of the data will have to be presented --
either as actual tables,  or in forms or graphs, etc...
Two attributes were retained for this purpose:

\begin{itemize}
\item	the {\attr{width}} attribute %of the {\elem{FIELD}},
	is meant as a hint to the application
	about the number of characters to be used for input
	or output of the quantity. 

\item	the \attr{precision} attribute is meant to express the
	number of significant digits, either as a number of
	decimal places (e.g. \attrval{precision}{F2} or equivalently
	\attrval{precision}{2} to express 2 significant figures
	after the decimal point), or as a number of significant figures
	(e.g. \attrval{precision}{E5} indicates a relative precision
	of $10^{-5}$).
\end{itemize}
\iffalse
But there is a semantic difference between a number written as
\value{5.12} and one that is written \value{5.1200}, in that
the former implies three significant digits of accuracy, and the
latter five digits. Therefore the number of digits to show is not
purely a presentation matter, but part of the metadata content of the
number.
\fi

The existence and presentation of the special {\em null} value of 
a field (when the actual value of the field is unknown) is
another aspect of the numerical accuracy, which is part of the
\elem{VALUES} sub-element (see \Aref{sec:values}{below}).

\subsection{Units}
\label{sec:unit}

The quantities in a column of the table may be expressed in
some physical unit,
which is specified by the {\attr{unit}}
attribute of the {\elem{FIELD}}.
The  syntax of the {\em unit} string is defined in reference [3];
it is basically written as a string without blanks or spaces,
where the symbols {\bf.} or {\bf*} indicate a multiplication,
{\bf/} stands for the division, and no special symbol is required
for a power.
Examples are \attrval{unit}{m2} for m$^2$,
\attrval{unit}{cm-2.s-1.keV-1} for cm$^{-2}$s$^{-1}$keV$^{-1}$,
or \attrval{unit}{erg/s} for erg\,s$^{-1}$.
The references [3] provides also the list of the valid symbols,
which is essentially restricted to the {\em Syst\`eme International}
(SI) conventions, plus a few astronomical extensions concerning
units used for time, angular, distance and energy measurements.

%\begin{verbatim}
%      units="cm-2.s-1.keV-1"
%      units="erg.s-1"
%\end{verbatim}
%
%The syntax of this string is defined in reference [3].

\subsection{Unified Content Descriptors}
\label{sec:ucd}

The Unified Content Descriptors (UCD) can be viewed as a 
hierarchical glossary of the scientific meanings of the data 
contained in the astronomical tables.
The initial version was created at CDS, but the UCD definition
is currently evolving [4].

%The CDS in Strasbourg has used the metadata from thousands of
%astronomical tables to make a hierarchical glossary of the scientific
%meanings of the data in those tables [4]. Of 1600 entries in the
%glossary, here are a few typical examples.

\noindent A few typical examples taken from the original UCD design:

\begin{tabular}{ll}
{\value{PHOT\_INT-MAG\_B}}     &  Integrated total blue magnitude \\
{\value{ORBIT\_ECCENTRICITY}}  &  Orbital eccentricity \\
{\value{STAT\_MEDIAN}}         &  Statistics Median Value \\
{\value{INST\_QE}}             &  Detector's Quantum Efficiency \\
\end{tabular}

%\noindent but the contents of the
%{\attr{ucd}} attribute will follow the evolution of the UCD
%standard.
%of the {\elem{FIELD}} element
%is to hold this information.

\subsection{The \attr{utype} attribute}
\label{sec:utype}
In some contexts, it can be important that \elem{FIELD}s or \elem{PARAM}eters
are explicitely designed as being {\em the} parameter performing some
well-defined role in 
some external data model.
For instance, it might be important for an application to know
that a given \elem{FIELD} expresses {\em the} surface brightness 
processed by an explicit method. None of the existing \attr{name}, \attr{ID}
or \attr{ucd} attributes can fill this role, and 
the \attr{utype} (usage-specific or {\em unique} type) attribute has 
been added in 
VOTable 1.1 to fill this gap. 

In order to avoid name collisions, the data model identification
should be introduced following the XML namespace conventions,
as \attrval{utype}{{\em datamodel\_identifier:role\_identifier}}.
The mapping of \value{datamodel\_identifier} to an xml-type attribute
is recommended, but not required.

\iffalse
The respective roles of these attributes are:
\begin{itemize}
\item	\attr{name}: a name freely defined by the data server
\item	\attr{ID}: an XML identifier designating uniquely a field
	within a document
\item	\attr{ucd}: the characterisation of the field contents,
	with a scope essentially devoted to interoperability
\item	\attr{utype}: the relation to an explicit parameter
	described in a data model. The contents of this
	attribute is moreover proposed to include a {\em namespace}
	part, as
	\attrval{utype}{{\em namespace}:datamodel-{\em parameter\_name}}
\end{itemize}

    It was effectively proposed during the discussions on UCDs that the
    \attr{ucd} attribute could be replaced by a pointer to some data model
    in the future; in practice it seems rather impossible to the UCD to
    play simultaneously a role of {\em global meaning} enabling global
    interoperability and to play the role of defining precisely which
    parameter is represents in the context of a data model.
    The \attr{utype} attribute is a simple solution to this dilemna.

\elem{FIELD} and \elem{PARAM} sharing the same set of attributes
(with the exception of the \attr{value} attribute), it is proposed
that the \elem{PARAM} entity can also exhibit a \attr{utype} attribute.
\fi

\subsection{VALUES element}
\label{sec:values}

The {\elem{VALUES}} element of the {\elem{FIELD}}
is designed to hold subsidiary information about the {\em domain} of the
data. For instance, in the example (\Aref{example1}{above})
we could rewrite the RA field definition as:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}
\else\begingroup\small\fi
\begin{verbatim}
      <FIELD name="RA" ID="col1" ucd="POS_EQ_RA_MAIN" ref="J2000" datatype="float"
             width="6" precision="2" unit="deg">
        <VALUES ID="RAdomain">
          <MIN value="0"/>
          <MAX value="360" inclusive="no"/>
        </VALUES>
      </FIELD>
\end{verbatim}
\ifhtx\End{TABULAR}\else\endgroup\fi

\label{elem:VALUES}
\label{elem:MIN}
\label{elem:MAX}
\label{elem:OPTION}
The \elem{VALUES} element may contain {\elem{MIN}} and {\elem{MAX}} elements, 
and it may contain {\elem{OPTION}} elements. 
The latter may itself contain more {\elem{OPTION}}
elements, so that a hierarchy of keyword-values pairs can be
associated with each field.

All three \elem{MIN}, \elem{MAX} and \elem{OPTION} sub-elements 
store their value corresponding to the minimum, maximum, or ``special value''
in a \attr{value} attribute. \elem{MIN} and \elem{MAX} elements
can have an \attr{inclusive} attribute to specify whether the \attr{value}
quoted belongs or not to the domain, and the  \elem{OPTION} element
can have a \attr{name} attribute to qualify the ``special'' quoted 
\attr{value}.

The \elem{VALUES} element may also have a \attr{null} attribute 
to define a non-standard value that is used to specify 
{\em``non-existent data''} -- for example \attrval{null}{-32768}.
When this value is found in the corresponding data, it is assumed that no data
exists for that table cell; the parser may choose to use this also
when unparsable data is found, and the null value will be substituted
instead.
\iftrue	% Added FO 2002-03-21

In the \elem{TABLEDATA} data representation,
the default representation of a ``null'' value is an empty column
(i.e. \verb+<TD></TD>+);
for fields containing arrays, individual ``null'' elements of the array
can be specified either by the value specified in the \attr{null}
attribute, or by the \value{NaN} or \value{nan} text in place of the expected
numeric value.

For the \elem{FITS} and \elem{BINARY} data representations, 
the {\em NaN} (not-a-number)
patterns are recommended to represent floating-point ``null'' values.
The ``null'' convention is therefore only necessary for primitive types
that do not have a natural ``null'' value: long, int, short, and byte datatypes.
\fi

\iffalse	%%% THIS IS WRONG -- does not match FITS rules
For fields containing arrays or complex numbers, the values specified
in the \attr{value} or \attr{null} attributes have to be compatible
with their datatype, and contain as many numbers separated by white space
as implied by the \attr{datatype} and \attr{arraysize} of the parent
\elem{FIELD}.
\fi

The scope of the domain described by the \elem{VALUES} element
can be qualified by \attrval{type}{actual}, if it is only applicable to
the data enclosed in the parent \elem{TABLE}. The domain of a valid
RA in the example above has the default 
\attrval{type}{legal} qualification.

Finally the \attr{ref} attribute of a \elem{VALUES} element
can be used to avoid a repetition of the domain definition,
by referring to a previously defined \elem{VALUES} element
having the referenced \attr{ID} attribute. 
When specified, the  \attr{ref} attribute defines completely
the domain without any other element or attribute, as e.g.
\elemdef{VALUES}{ \attrval{ref}{RAdomain}\slash}

\subsection{GROUPing FIELDs and PARAMeters}
\label{sec:group}
\label{elem:GROUP}

The \elem{GROUP} element was added in VOTable 1.1,
to group together a set of \elem{FIELDS}
which are logically correlated, like a value and its error. Each field
participating in a \elem{GROUP} can be defined either {\em physically} 
(the \elem{FIELD} contains a \attr{datatype} field), or {\em logically} 
(the \elem{FIELD} contains only a \attr{ref} attribute referencing
a field defined in the same parent \elem{TABLE}).
A physical field (i.e. a single column of the table)
may therefore participate (logically) to several groups.

A straightforward example of a group, based on the example of
\Aref{example1}{above}, can be to replace the definitions of columns
4 and 5 by the following:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}
\else\begingroup\small\fi
\begin{verbatim}
    <GROUP name="Velocity" ucd="VELOC_HC">
      <DESCRIPTION>Velocity and its error</DESCRIPTION>
      <FIELD name="RVel" ID="col4" ucd="VELOC_HC" datatype="float" 
             width="5" unit="km/s"/>
      <FIELD name="e_RVel" ID="col5" ucd="ERROR" datatype="float" 
             width="3" unit="km/s"/>
    </GROUP>
\end{verbatim}\ifhtx\End{TABULAR}\else\endgroup\fi

A {\em logical} definition of this group could alternatively be
achieved by inserting just before the \elem{DATA} element the following:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}
\else\begingroup\small\fi
\begin{verbatim}
    <GROUP name="Velocity">
      <DESCRIPTION>Velocity and its error</DESCRIPTION>
      <FIELD ref="col4"/>
      <FIELD ref="col5"/>
    </GROUP>
\end{verbatim}\ifhtx\End{TABULAR}\else\endgroup\fi


The \elem{GROUP} element can have the \attr{name}, \attr{ID}, \attr{ucd},
\attr{utype} and \attr{ref} attributes.
It can include a \elem{DESCRIPTION}, and any mixture of \elem{FIELD}s, 
\elem{PARAM}eters, and other \elem{GROUP}s -- 
this recursive grouping enabling a definition of
arbitrary complex structures.

The possibility of adding \elem{PARAM}eters in groups introduces also
a possibility of associating parameter(s) to describe  accurately 
the context of the data stored in the table: 
for instance,
it is possible to associate the actual frequency of a radio survey with
the following declaration:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}\fi
\begin{verbatim}
    <GROUP name="Flux" ucd="VELOC_HC">
      <DESCRIPTION>Flux measured at 352MHz</DESCRIPTION>
      <FIELD name="Flux" ucd="PHOT_FLUX_RADIO_400M" datatype="float" 
             width="6" precision="1" unit="mJy"/>
      <PARAM name="Freq" ucd="OBS_FREQUENCY" unit="MHz" datatype="float" value="352"/>
      <FIELD name="e_Flux" ucd="ERROR" datatype="float" width="4" 
             precision="1" unit="mJy"/>
    </GROUP>
\end{verbatim}\ifhtx\End{TABULAR}\fi

\par
Similarly, the \elem{GROUP} can be used to associate several parameters
to one or several \elem{FIELD}s: a filter may for instance be
characterized by the central wavelength and the FWHM of its transmission
curve; or several parameters of an instrument setup may be detailed.


\section{Data Content}
\label{sec:data}

While the bulk of the metadata of a VOTable document is in the
{\elem{FIELD}} elements, the data content of the table is 
in a single {\elem{DATA}} element. 
The data is organized in ``reading" order, so that
the content of each row appears in the same order as the order of the
{\elem{FIELD}} tags having a \attr{datatype} attribute, with each row 
having the same number of items as 
there are {\elem{FIELD}} tags having a \attr{datatype} attribute. 
Fields without a \attr{datatype} attribute have a \attr{ref}
attribute, and represent references to  ``true'' columns
(see \Aref{sec:field}{FIELD attributes}).

\label{Image1}
\ifhtx\tag{IMG SRC="serial.gif" NAME="Image1" ALIGN=BOTTOM BORDER=0}\else
\begin{figure}[hbt]
\includegraphics[width=\textwidth]{serial.jpg}
\caption{\label{fig:serialization}Data serialization}
\end{figure}
\fi

Each \elem{DATA} part of the VOTable document can be viewed as
a stream coming out of a pipeline.
The abstract table is first serialized by one of several
methods, then it may be encoded for compression or other reasons. The
result may be embedded in the XML file ({\it local} data), or it may
be {\it remote} data.

\Fref{fig:serialization}{The figure}
shows how the abstract table is rendered into the
VOTable document. First the data is {\it serialized}, either
as XML, a FITS binary table, or the VOTable
Binary format. This data stream may then be {\it encoded},
perhaps for compression or to convert binary to text. Finally, the
data stream may be put in a remote file with a URL-type pointer in
the VOTable document; or the table data may be embedded in the
VOTable. 


The serialization elements and their attributes are
described in the next sections.

%\subsection{Data Serialization}
\subsection{TABLEDATA Serialization}
\label{sec:TABLEDATA}
\label{elem:TD}

This element is a way to build the table in pure XML, and is the
only serialization method that does not allow an encoding or a remote
data stream. It contains {\elem{TR}}
elements, which in turn contain {\elem{TD}}
elements --- i.e. the same conventions as the familiar {\em HTML} ones. 
An example is contained in \Aref{example1}{section 3.1},
surrounded by in the \elemdef{TABLEDATA}{} and \elemdef{\slash TABLEDATA}{}
delimiters.

\iffalse
\begin{verbatim}
<TABLEDATA>
  <TR>
    <TD>Procyon</TD> <TD>114.827242</TD>
    <TD>5.227506</TD>  
  </TR>
  <TR>
    <TD>Vega</TD>    <TD>279.234106</TD>
    <TD>38.782992</TD> 
  </TR>
</TABLEDATA>
\end{verbatim}
\fi

The number of {\elem{TD}} elements should be in number equal to the
number of {\elem{FIELD}} elements having \attr{datatype} attributes
declaring the table; when there are
less {\elem{TD}}'s than expected, the corresponding values are set
to \value{null}s; superfluous {\elem{TD}}'s are ignored.

While this serialization has a high overhead in the number of
bytes, it has the advantage that XML tools can manipulate and present
the table data directly.

Each item in the {\elem{TD}}
tag is passed to a reader that is implicitly defined by the {\attr{datatype}}
attribute of the corresponding {\elem{FIELD}},
which attempts to read the object from it. If it reads a value that
is the same as the {\attr{null}}
value for that field, then the cell will contain that value, and is
therefore assumed to contain no data.

Valid representations of a number in a cell, depending on their
\attr{datatype}, are detailed in \Aref{sec:datatypes}{the complete
description of datatypes}.

\iffalse
The reader may not succeed, for example if we try to parse the
string \value{36.9H9}
into a float, where the alphabetic character is obviously a problem. 
In this case, the parser may choose to insert the {\attr{null}}
value (no data available), or it may use a NaN (not a number), or it
may throw an exception. It might however be
useful if the data provider would warn that invalid data patterns
could be used to designate non-existing data -- via an \attr{invalid}
attribute.
\fi

If a cell contains an array or complex number, 
it should be encoded as multiple numbers separated by
whitespace. However in the case of character and Unicode strings, no
separators are required. Here is an example of a table with a two
rows, that has arrays in the table cells:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}
\else\begingroup\small\fi
\label{example2}
\begin{verbatim}
<TABLE>
  <FIELD ID="aString" datatype="char" arraysize="10"/>
  <FIELD ID="Floats" datatype="float" arraysize="3"/>
  <FIELD ID="varComplex" datatype="floatComplex" arraysize="*"/>
  <DATA><TABLEDATA>
  <TR>
   <TD>Apple</TD><TD>1.62 4.56 3.44</TD>
   <TD>67 1.57  4 3.14  77 -1.57</TD>
  </TR><TR>
   <TD>Orange</TD><TD>2.33 4.66 9.53</TD>
   <TD>39 0  46 3.14</TD>
  </TR>
  </TABLEDATA></DATA>
</TABLE>
\end{verbatim}
\ifhtx\End{TABULAR}\else\endgroup\fi

The first entry is a fixed-length array of 10 characters; since
the value being presented ({\tt Apple}) has 5 characters, this
is padded with trailing blanks. The second cell is an
array of three floats.
The last cell contains a variable array of complex numbers, each complex
number being represented by its real part followed by at least a blank
and its imaginary part -- hence 6 numbers for 3 complex numbers,
or 4 numbers for 2 complex numbers.

\subsection{FITS Serialization}
\label{sec:FITS}
\label{elem:FITS}

The FITS format for binary tables [2] is in widespread in astronomy,
and its structure has a major influence on the VOTable specification.
Metadata is stored in a header section, followed by the data. The
metadata is substantially equivalent to the metadata of the VOTable
format. One important difference is that VOTable does not require
specification of the number of rows in the table, an important
freedom if the table is being created dynamically from a stream.

The VOTable specification does not define the behavior of parsers
with respect to this doubling of the metadata. A parser may ignore
the FITS metadata, or it may compare it with the VOTable metadata for
consistency, or other possibilities.

The following code shows a fragment that might have been created
by a FITS-to-VOTable converter. Each FITS keyword has been converted
to a \elem{PARAM}, and the data itself is remotely stored and gzipped at an
ftp site:

\begin{plain}\small
\elemdef{RESOURCE}{}\\
\hspace*{0.5em}\elemdef{PARAM}{ \attrval{name}{EPOCH} \attrval{datatype}{float} 
        \attrval{value}{1999.987}}\\
        \hspace*{1em}Original Epoch of the coordinates
\hspace*{0.5em}\elemdef{\slash PARAM}{} \\
\hspace*{0.5em}\elemdef{PARAM}{ \attrval{name}{TELESCOP} \attrval{datatype}{char} 
   \attrval{arraysize}{*} \attrval{value}{VTel} \slash}\\
\hspace*{0.5em}\elemdef{INFO}{ \attrval{name}{HISTORY}}\\
  \hspace*{1em}The very first Virtual Telescope observation made in 2002 \\
\hspace*{0.5em}\elemdef{\slash INFO}{} \\
\hspace*{0.5em}\elemdef{TABLE}{}
\hspace*{0.5em}\elemdef{FIELD}{\quad(insert field metadata here) }\\
\hspace*{0.5em}\elemdef{DATA}{}\elemdef{FITS}{ \attrval{extnum}{2}}\\
\hspace*{1em}\elemdef{STREAM}{ \attrval{encoding}{gzip} %Not for REMOTE data ?
           \attrval{href}{ftp://archive.cacr.caltech.edu/myfile.fit.gz}\slash}\\
\hspace*{0.5em}\elemdef{\slash FITS}{}\elemdef{\slash DATA}{} \\
\hspace*{0.5em}\elemdef{\slash TABLE}{}\\
\elemdef{\slash RESOURCE}{}
\end{plain}

The FITS file may contain many data objects (known as extensions, 
numbered from 1 up -- the main header being numbered 0), and the
\attr{extnum} attribute allows the VOTable to point to one of
these.


\subsection{BINARY Serialization}
\label{sec:BIN}

The binary format is intended to be easy to read by parsers, so
that additional libraries are not required. It is just a sequence of
bytes, the length of each sequence corresponding to the {\attr{datatype}}
and {\attr{arraysize}} attributes of the {\elem{FIELD}}
elements in the metadata. The binary format consists of a sequence of
records, with no header bytes, no alignment considerations, no block sizes.
The order of the bytes in multi-byte primitives (e.g. integers,
floating-point numbers) is Most Significant Byte first, i.e.
it follows the FITS convention.

Table cells may contain arrays of primitive types, each of which
may be of fixed or variable length. In the former case, the number of
bytes is the same for each instance of the item, as specified by the
{\attr{arraysize}}
attribute of the {\elem{FIELD}}.
If all the fields have a fixed {\attr{arraysize}},
then each record of the binary format has the same length
(the sum of {\attr{arraysize}}
times the length in bytes of the corresponding {\attr{datatype}}).


Variable-length arrays of primitives are preceded by a 4-byte integer
containing the number of items of the array.
The way the stream of bytes is arranged for the data of the 
example in \Aref{example2}{the above section} is illustrated in 
\Fref{fig:bin}{Figure 2}.
The parser can then compute the number of bytes taken
by the variable-length array by multiplying the size and number 
of the primitives.

%Binary format becomes more complex. Fixed-length cells are stored
%stored as is, has first a part for
%the fixed-length fields, (as well as four bytes for each of the
%variable-length fields), followed by a section for the variable
%length fields. The four bytes for the variable-length field is
%interpreted as a four-byte integer with
%the number of items in
%the variable-length array, as shown in the figure. The parser can
%then read the data by computing appropriate offsets. This is done by
%multiplying the size and number of the primitives in each table cell
%to get length in bytes, then
%adding these lengths from previous
%variable-length sections of the record.

\label{Image2}
\ifhtx\begin{TABULAR}{c}
\tag{IMG SRC="binary.gif" NAME="Image2" ALIGN=LEFT BORDER=0}\end{TABULAR}
\else\begin{center}
\begin{figure}[htb]
\includegraphics[width=0.8\textwidth]{binary}
\caption{\label{fig:bin}Data Storage in BINARY mode}
\end{figure}\end{center}
\fi

\iffalse
\par The figure shows the byte layout for this binary format for the same data
as the example in \Aref{sec:TABLEDATA}{section 6.1.1}.
Each record has a ten-byte character
array, followed by an array of three four-byte floats, and then a variable
number of complex numbers. The number of elements of these variable arrays 
are stored (3 and 2 in the two records shown), followed by the corresponding 
data (made of 24 and 16 bytes).
\fi


\subsection{Data Encoding}
\label{elem:STREAM}

As a result of the serialization, the table has been converted to
a byte stream, either text or binary. If the {\elem{TABLEDATA}}
serialization is used, then the table is represented as XML tags 
directly  embedded in the document,
document, and conventional tools can be used to encode the entire XML document.
However, VOTable also provides limited encoding of its own. 
A VOTable document may point to a remote data resource that is compressed; 
rather than decompressing before sending on the wire, it can be dynamically
decoded by the VOTable reader. We might also use the encoding facilities to 
convert a binary file to text (through base64 encoding), so that binary 
data can be used in the XML document.

In this version (1.1) of VOTable, it is not possible to encode
individual columns of the table: the whole table must be encoded in
the same way. The possibility of encoding selected table cells
is however being examined for future versions of VOTable
(see \Arefx{sec:b64}{appendix below}).

In order to use an encoding of the data, it must be enclosed in a
{\elem{STREAM}}
element, whose attributes define the nature of the encoding. The
{\attr{encoding}}
attribute is a string that should indicate to the parser how to undo
the encoding that has been applied. Parsers should understand and
interpret at the following values:
\begin{itemize}
        \item {\attrval{encoding}{gzip}} [RFC1952]
        implies that the data following has been compressed with the {\em gzip}
        filter, so that {\em gunzip} or similar should be applied.
        \item {\attrval{encoding}{base64}} [RFC2045]
        implies that the {\em base64} filter has been applied, to convert binary
        to text.
        \item {\attrval{encoding}{dynamic}}
	implies that the data is in a remote resource (see below), and the
	encoding will be delivered with the header of the data.
	This occurs with the http protocol, where the MIME header indicates 
	the type of encoding that has been used.
\end{itemize}

\noindent The default value of the encoding attribute is the null string, 
meaning that no encoding has been
applied. In future releases, we might allow more complex strings in
the encoding attribute, allowing combinations of encoding filters and
a way for the parser to find the software needed for the decoding.

\subsection{Remote Data}

If the encoding of the data produces text, or if the serialization
is naturally text-based, then it can be directly embedded into the
XML document, as for instance:
\begin{plain}
\hspace*{0.5em}\elemdef{DATA}{}\elemdef{BINARY}{}\\
\hspace*{1em}\elemdef{STREAM}{ \attrval{encoding}{base64}}\\
\hspace*{1.5em}
\verb+AAAAAj/yVZiDGSSUwFZ6ypR4yGkADwAcQV0euAAIAAJBmMzNwZWZmkGle4tBR3jVQT9ocwAA+\\
\hspace*{1.5em} $\cdots\cdots\cdots\cdots\cdots\cdots\cdots\cdots$\\
\hspace*{1em}\elemdef{\slash STREAM}{}\\
\hspace*{0.5em}\elemdef{\slash BINARY}{}\elemdef{\slash DATA}{}
\end{plain}

However, if the data is very large, it may be preferable to keep the data
separate from the metadata. The \attr{href} attribute of
the {\elem{STREAM}} element, if present, provides the location of the data
in a URL-type syntax, for example:

\begin{plain}
\elemdef{STREAM}{ \attrval{href}{ftp://server.com/mydata.dat}\slash}

\par\elemdef{STREAM}{ \attrval{href}{ftp://server.com/mydata.dat}
        \attrval{expires}{2004-02-29T23:59:59}\slash}

\par\elemdef{STREAM}{ \attrval{href}{httpg://server.com/mydata.dat} 
        \attrval{actuate}{onLoad}\slash}

\par\elemdef{STREAM}{ \attrval{href}{file:///usr/home/me/mydata.dat}\slash}
\end{plain}


The examples are the well-known anonymous ftp, and http protocols.
\value{httpg} is an example of a Grid-based access to data through httpg;
\value{file} finally a reference to a local file.
VOTable parsers are not required to understand arbitrary protocols,
but are required to understand the  three common protocols
\value{file:}, \value{http:} and \value{ftp:}.

There are further attributes of the {\elem{STREAM}}
element that may be useful. The {\attr{expires}}
attribute indicates the expiration time of the data:
this is useful when data are dynamically created and stored 
on some staging disk where files only persist for a specified 
lifetime and are then automatically deleted.
The {\attr{expires}}
attribute expresses when a remote resource ceases to become valid,
and is expressed in Universal Time in the same way as the FITS
specification [2], itself conforming to ISO 8601 standard.

%\begin{verbatim}
%<STREAM expires="2002-01-31T12:00:00">
%\end{verbatim}

The {\attr{rights}}
attribute expresses authentication information that may be necessary
to access the remote resource. If  the VOTable document is suitably
encrypted, this attribute could be used to store a password.

The {\attr{actuate}}
attribute is borrowed from the XML Xlink specification, expressing
when the remote link should be actuated. The default is {\value{onRequest}},
meaning that the data is only fetched when explicitly requested (like
a link on an HTML page), and the {\value{onLoad}}
value means that data should be fetched as soon as possible (like an
embedded image on an HTML page).

\section{Definitions of Primitive Datatypes}
\label{sec:datatypes}

This section describes the primitives summarized in 
\Tref{primitives}{the table of primitives}
and their representations in the \elem{BINARY}  
and in the \elem{TABLEDATA} serializations (see \Aref{sec:TABLEDATA}{above}).
In the following, the term ``hexadigit'' designates the ASCII numbers
\value{0} to \value{9}, or the ASCII lower- or upper-case letters
\value{a} to \value{f} (i.e. a digit in an hexadecimal representation
of a number).

\begin{itemize}
\item {\bf Logical}\quad If the value of the {\attr{datatype}}
attribute specifies data type {\value{boolean}},
the contents of the field{ }shall consist in the \elem{BINARY} serialization of
ASCII \value{T}, \value{t},  or \value{1} indicating true,
ASCII \value{F}, \value{f}, or \value{0} indicating false; 
the ``null'' value is indicated by an sacii NULL (hexadecimal 00),
a space (hexadecimal 20)
or a question mark \value{?} (hexadecimal 3F).
The acceptable representations in the \elem{TABLEDATA} serialization
include in addition any capitalisation variation of the 
strings \value{true}  and \value{false} (e.g. \value{tRUe} or \value{FalsE});
the default representation of a {\em null} value is an empty cell
(see \Aref{sec:values}{VALUES definitions above})

\item {\bf Bit Array} \quad If the value of the {\attr{datatype}}
attribute specifies data type {\value{bit}},
the contents of the field{ }in the \elem{BINARY} serialization shall consist of
a sequence of bits starting with the most significant bit; the bits
following shall be in order of decreasing significance, ending with
the least significant bit. A bit field shall be composed of the
smallest number of bytes that can accommodate the number of elements
in the field. Padding bits shall be 0.
The representation of a bit array in the \elem{TABLEDATA} serialization
is made by a sequence of ASCII \value{0} and \value{1} characters.

\item {\bf Byte}\quad If the value of the {\attr{datatype}}
attribute specifies data type {\value{unsignedByte}},
the field shall contain in the \elem{BINARY} serialization a byte 
(8-bits) representing a number in the
range 0 to 255. 
%no default {\em null} value exists, if necessary one
%has to be defined via the \attr{null} attribute of the \elem{VALUES} element
%(\Aref{sec:values}{section 4.6}).
In the case of an array of bytes (\attrval{arraysize}{*}),
also known as a ``blob", the bytes are stored consecutively.
The representation of a Byte in the \elem{TABLEDATA} serialization
can be its {\em decimal} representation (a number between {\tt0} and {\tt255})
or its {\em hexadecimal} representation when starting by {\tt0x} and 
followed by one or two hexadigits,
(e.g. {\tt0xff}), separated by at least one space from the next one
in the case of an array of bytes.
%the default representation of a {\em null} value is an empty cell
%(see \Aref{sec:values}{VALUES definitions above})
No default {\em null} value exists; if necessary one
has to be defined via the \attr{null} attribute of the \elem{VALUES} element
(\Aref{sec:values}{section 4.6}).

\item {\bf Character}\quad if the value of the {\attr{datatype}}
attribute specifies data type {\value{char}},
the field shall contain in the \elem{BINARY} serialization an ASCII 
(7-bit) character. 
The \attr{arraysize} attribute
indicates a character string composed of ASCII text. 
The \elem{BINARY} serialization follows the 
FITS rules for character strings,
and a character string may therefore be terminated by an ASCII 
NULL (hexadecimal code 00)
before the length specified in the \attr{arraysize} attribute:
in this case characters after the first ASCII NULL are not defined; 
and a string having the number of characters identical to
the \attr{arraysize} value is not NULL terminated. 
Characters should be represented in the \elem{TABLEDATA} serialization
using the normal rules for encoding XML text: 
%it is necessary to ensure 
%that special characters are not interpreted by XML: 
the ampersand (\&) can be written \verb+&amp;+ (symbolic representation)
or \verb+&#38;+ (decimal representation) or 
\verb+&#x26;+ (hexadecimal representation); the less-than ($<$) and greater-then ($>$) symbols should be coded \verb+&lt;+ and \verb+&gt;+ 
or \verb+&#x3C;+ and \verb+&#x3E;+ and a blank which would be interpreted
by XML as whitespace (e.g. several consecutive blanks) 
should be coded \verb+&nbsp;+ or \verb+&#x20;+.

\item {\bf Unicode Character}\quad If the value of the {\attr{datatype}}
attribute specifies data type {\value{unicodeChar}},
the field shall contain a Unicode character
The \attr{arraysize} attribute
indicates a string composed of Unicode text ---
which enables representation of text in many non-Latin alphabets.
Each Unicode character is represented in the \elem{BINARY} serialization by 
two bytes, using the big-endian UCS-2 encoding (ISO-10646-UCS-2).
%; no byte order mark should appear. 
The representation of a Unicode character in the  \elem{TABLEDATA} serialization
follows the XML specifications, 
%: special ASCII characters are escaped as in
%the case of Character datatype, and 
and e.g. the Cyrillic uppercase ``Ya'' can be written 
\verb+&#x042F;+ in UTF-8.


\item {\bf 16-Bit Integer}\quad If the value of the {\attr{datatype}}
attribute specifies datatype {\value{short}}, 
the data in the \elem{BINARY} serialization shall consist of
big-endian twos-complement signed 16-bit integers 
(the most significant byte first). 
The representation of a Short Integer in the \elem{TABLEDATA} serialization
is either its decimal representation between -32768 and 32767
   made of an optional {\tt-} or {\tt+} sign followed by digits,
   or its hexadecimal representation when starting by {\tt0x}
   and followed by 1 to 4 hexadigits.
No default {\em null} value exists; if necessary one
has to be defined via the \attr{null} attribute of the \elem{VALUES} element
(\Aref{sec:values}{section 4.6}).

\item {\bf 32-Bit Integer }\quad If the value of the {\attr{datatype}}
attribute specifies datatype {\value{int}},
the data in the \elem{BINARY} serialization shall consist of 
big-endian twos-complement signed 32-bit
integer --- contained in four bytes, with the most significant first, 
and subsequent bytes in order of decreasing significance. 
  The representation of an Integer in the \elem{TABLEDATA} serialization
  is either its decimal representation between -2147483648 and 2147483647
  made of an optional {\tt-} or {\tt+} sign followed by digits,
  or its hexadecimal representation when starting by {\tt0x}
  and followed by 1 to 8 hexadigits;
No default {\em null} value exists; if necessary one
has to be defined via the \attr{null} attribute of the \elem{VALUES} element
(\Aref{sec:values}{section 4.6}).

\item {\bf 64-Bit Integer}\quad If the value of the {\attr{datatype}}
attribute specifies datatype {\value{long}},
the data in the \elem{BINARY} serialization shall consist of 
big-endian twos-complement signed 64-bit integers
--- contained in eight bytes, with the most significant byte first,
and subsequent bytes in order of decreasing significance. 
The representation of a Long Integer in the \elem{TABLEDATA} serialization
  is either its decimal representation between -9223372036854775808
  and 9223372036854775807
  made of an optional {\tt-} or {\tt+} sign followed by digits,
  or its hexadecimal representation when starting by {\tt0x}
  and followed by 1 to 16 hexadigits;
No default {\em null} value exists; if necessary one
has to be defined via the \attr{null} attribute of the \elem{VALUES} element
(\Aref{sec:values}{section 4.6}).
%the default representation of a {\em null} value is an empty cell
%(see \Aref{sec:values}{VALUES definitions above})


\item {\bf Single Precision Floating Point}\quad If
the value of the {\attr{datatype}} attribute specifies datatype {\value{float}},
the data in the \elem{BINARY} serialization shall consist of 
ANSI/IEEE-754 32-bit floating point numbers in big-endian order. 
All IEEE special values are recognized. The IEEE {NaN}
pattern is used to represent ``null" values.
The representation of a Floating Point number in the 
\elem{TABLEDATA} serialization is made of an optional {\tt-} or {\tt+},
followed by the ASCII representation of a positive decimal number,
and followed eventually by the ASCII letter \value{E} or  \value{e}
introducing the base-10 exponent made of an optional {\tt-} or {\tt+}
followed by 1 or 2 digits. The number must be within the limits of the
IEEE floating-point definition (around $\pm3.4\cdot10^{38}$; numbers with
absolute value less than about $1.4\cdot10^{-45}$ are equated to zero);
the default representation of a {\em null} value is an empty cell
(see \Aref{sec:values}{VALUES definitions above}), and the special
values \value{+Inf}, \value{-Inf}, and \value{NaN} are accepted.


\item {\bf Double Precision Floating Point}\quad If
the value of the {\attr{datatype}}
attribute specifies datatype {\value{double}},
the data in the \elem{BINARY} serialization shall consist of ANSI/IEEE-754
64-bit double precision floating point numbers in big-endian order. 
All IEEE special values are recognized. The IEEE {NaN}
pattern is used to represent ``null" values.
The representation of a Double number in the 
\elem{TABLEDATA} serialization is made of an optional {\tt-} or {\tt+},
followed by the ASCII representation of a positive decimal number,
and followed eventually by the ASCII letter \value{E} or  \value{e}
introducing the base-10 exponent made of an optional {\tt-} or {\tt+}
followed by 1 or 2 digits. The number must be within the limits of the
IEEE floating-point definition (around $\pm1.7\cdot10^{308}$; numbers with
absolute value less than about $5\cdot10^{-324}$ are equated to zero);
the default representation of a {\em null} value is an empty cell
(see \Aref{sec:values}{VALUES definitions above}), and the special
values \value{+Inf}, \value{-Inf}, and \value{NaN} are accepted.

\item {\bf Single Precision Complex}\quad If the value of the {\attr{datatype}}
attribute specifies datatype {\value{floatComplex}},
the data in the \elem{BINARY} serialization shall consist of a sequence of 
pairs of 32-bit single precision floating point numbers in big-endian order. 
The first member of each
pair shall represent the real part of a complex number and the
second member shall represent the imaginary part of that complex
number. If either member contains a {NaN},
the entire complex value is ``null".
The representation of a Floating Complex number in the 
\elem{TABLEDATA} serialization is made of two representations
of a {\em  Single Precision Floating Point} numbers separated by at least
one blank, representing the real and imaginary part respectively;
the default representation of a {\em null} value is an empty cell
(see \Aref{sec:values}{VALUES definitions above})

\item {\bf Double Precision Complex}\quad If the
value of the {\attr{datatype}}
attribute specifies datatype {\value{doubleComplex}},
the data in the \elem{BINARY} serialization  shall consist of a
sequence of pairs of 64-bit double precision floating point numbers
in big-endian order.
The first member of each pair shall represent the real part of a
complex number and the second member of the pair shall represent the
imaginary part of that complex number. If either member contains a
{NaN}, the entire complex
value is ``null".
The representation of a Double Complex number in the 
\elem{TABLEDATA} serialization is made of two representations
of a {\em  Double Precision Floating Point} numbers separated by at least
one blank, representing the real and imaginary part respectively;
the default representation of a {\em null} value is an empty cell
(see \Aref{sec:values}{VALUES definitions above})
\end{itemize}


%\clearpage
\section{A simplified view of the VOTable 1.1 Schema}
\label{dtd}
\begin{quote} \em 
The XML Schema [8] defining the VOTable document
is available from 

http://vizier.u-strasbg.fr/xml/VOTable-1.1.xsd
\end{quote}

\subsection{Element Hierarchy}
The illustration of the XML schema uses the following conventions:
{\em italicized} text represents {\em optional} elements;
\order{} indicates that the order of the elements is mandatory,
while the open bullet \unorder{} indicates that the elements may 
occur in any order; the \choice{} symbol marks a choice
between alternatives. The dots $\cdots$ indicate than an element
may be repeated. The \underline{underlined elements} are explained
in a dedicated box.


\ifhtx\Beg{TABULAR}{\bg{LavenderBlush} CELLPADDING=4}{||l||}
\else\begin{quote}\small
\fi
%\inputverbatim{VOTable_v1.0.dtd}
\input{VOTable-1.1.elem.tex}
%\verbatiminput{VOTable_v1x.dtd}\normalsize
\ifhtx\End{TABULAR}\else\end{quote}
\fi

\iffalse
%\clearpage
%\section{Schema Diagram for VOTable}
%\ifhtx\tag{IMG SRC="diagram_v1.gif" NAME="VOTable Schema"
%   ALIGN=BOTTOM BORDER=0}\else
%%\begin{figure}[htbp]
%%\hspace*{-1cm}
%\includegraphics[width=\textwidth]{diagram_v1.jpg}
%%height=0.75\textheight
%%\caption{Schema Diagram for VOTable}
%%\end{figure}
\fi

\subsection{Attribute summary}
The list of the attributes is summarized in the table below;
attributes written in bold are \requiredattr{required attributes},
while the attributes written in a {fixed font} are \attr{optional}.
The {\em italicized} attributes are mentioned in the
Appendix, and are not part of VOTable 1.1

\ifhtx\Beg{TABULAR}{\bg{LavenderBlush} CELLPADDING=4}{||l||}
\else\begin{quote}\small
\fi
\input{VOTable-1.1.attr.tex}
\ifhtx\End{TABULAR}\else\end{quote}
\fi

%\clearpage
\section{Differences between versions 1.0 and 1.1}
\label{diff}
The differences between version 1.1 of VOTable and the preceding
version 1.0 are:

\ifhtx\Beg{TABULAR}{\bg{LavenderBlush} CELLPADDING=4}{||p||}
\fi
\begin{itemize}
\item  the introduction of \elem{GROUP} element (\Aref{sec:group}{description})
\item  the introduction of the \attr{utype} attribute in the \elem{FIELD},  
      \elem{PARAM} and \elem{GROUP} elements (\Aref{sec:utype}{description}) 
\item  generalisation of the description of a table as an unordered
	mixture of \elem{FIELD}, \elem{PARAM} and \elem{GROUP}
	elements
\item  the \elem{INFO} elements may exist in  \elem{TABLE} as well as
	\elem{RESOURCE} 
% the \attr{value} attribute is required in a \elem{PARAM} element
\item  the \elem{VALUE} element can have a \attr{ref} attribute
\item  the usage and scope of the \attr{null} attribute was clarified
\item  in the \elem{BINARY} serialization, there is no requirement
	of having fixed-length columns first.
\end{itemize}
\ifhtx\End{TABULAR}\fi

\section{References}
\noindent [1] Accomazzi {\it  et. al, Describing Astronomical Catalogues and
Query Results with XML
}\\ \hspace*{1em}{{http://vizier.u-strasbg.fr/doc/astrores.htx}}

\noindent [2] {\it FITS: Flexible Image Transport
Specification}, specifically the Binary Tables
Extension
\\ \hspace*{1em}{{http://fits.gsfc.nasa.gov/}}

\noindent [3] {\it Standards for Astronomical
Catalogues: Units, CDS Strasbourg}
\\ \hspace*{1em}{{http://vizier.u-strasbg.fr/doc/catstd-3.2.htx}}
\\ \hspace*{1em} {\it See also Section 4 in} {Greisen and Calabretta} 2002,
	A\&A {\bf 395}, 1061; and the IAU Recommendations concerning Units
	from the {\em IAU Style Manual} by  G.A. Wilkins (1989)
	available at
	http://www.iau.org/IAU/Activities/nomenclature/units.html

\noindent [4] {\it Unified Content
Descriptors}
\\ \hspace*{1em}{{http://vizier.u-strasbg.fr/doc/UCD.htx}} (UCD1)
\\ \hspace*{1em}{{http://www.ivoa.net/twiki/bin/view/IVOA/IvoaUCD}}

\noindent [5] {\it GLU: G\'{e}n\'{e}rateur de Liens Uniformes, CDS
Strasbourg}
\\ \hspace*{1em}{{http://simbad.u-strasbg.fr/glu/glu.htx}}

\noindent [6] {\it ASU: Astronomical Server URL, CDS
Strasbourg}
\\ \hspace*{1em}{{http://vizier.u-strasbg.fr/doc/asu.html}}

\noindent [7] {\it XDF: Extensible Data format, ADC}
\\ \hspace*{1em}{{http://xml.gsfc.nasa.gov/XDF/XDF\_home.html}}

\noindent [8] {\it XML Schema: W3C Document}
\\ \hspace*{1em}{{http://www.w3.org/XML/Schema}}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%	A P P E N D I C E S
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\ifhtx\par\thickrule\par\else\clearpage\fi
\appendix
\noindent {\bf\LARGE Appendices}

\bigskip

%\begin{quote}
%\em\fg{DarkBlue} 
\section{Possible VOTable extensions}
The definitions enclosed in this appendix
are {\bf not} part of VOTable 1.1, but are considered as candidates
for VOTable improvements. 
%This section is a short explanation on how Astrores defines
%the set of parameters and fields which can be qualified for a query --
%what could be defined as the contents of a {\bf form}.
%VOTable currently does not define the parameters available for a query;
%such definitions are delayed to the next version of VOTable, and could
%make use of the Web Services Description Language (WSDL) 
%\end{quote}


\subsection{VOTable LINK substitutions}
\label{LINK}

\begin{quote}\em \fg{DarkBlue}
  The \elem{LINK} element in Astrores [1]
  contains a mechanism for string substitution,
  which is a powerful way of defining a link to external data
  which adapts to each record contained in the table \elem{DATA}.
\end{quote}

When a {\elem{LINK}} element appears within a \elem{RESOURCE} or a
{\elem{TABLE}} element,
extra functionality is implied. The {\attr{href}}
or {\attr{gref}} attributes may not be a simple link, but instead 
a template for a link. If, in the  example of 
\Aref{example1}{myFavouriteGalaxies}, we add the link

\begin{verbatim}
  <LINK href="http://ivoa.net/lookup?Galaxy=${Name}&amp;RA=${RA}&amp;DE=${DE}"/>
\end{verbatim}

\noindent a substitution filter is applied in the context of a particular row.
For the first row of the table, the substitution would result in the URL

\begin{verbatim}
   http://ivoa.net/lookup?Galaxy=N++224&RA=010.68&DE=%2b41.27
\end{verbatim}

Whenever the pattern {\tt{\$\{...\}}}
is found in the original link, the part in the braces is compared
with the set of {\attr{ID}} (preferably) or \attr{name}
attributes of the fields of the table. If a match is found, then the
value from that field of the selected row is used in place of the
{\tt{\$\{...\}}}. If no match is found, no substitution is made. Thus the
parser makes available to the calling application a value of the {\attr{href}}
and {\attr{gref}}
attributes that depends on which row of the table has been selected.
Another way to think of it is that there is not a single link
associated with the table, but rather an implicitly defined new
column of the table. This mechanism can be used to connect each row
of the table to further information resources.

%The {\attr{action}} attribute is related to the Query mechanism described in
%the \Aref{query}{next section}.


The purpose of the link is defined by the {\attr{content-role}}
attribute. The allowed values are {\value{query}} 
(see \Aref{query}{query mechanism}), 
{\value{hints}} for information for use by the application,
and {\value{doc}} for  human-readable documentation.
%The first implies that string substitution should be used as defined
%above, and the latter two imply first that no substitution is needed,
%and that the link points to either information for use by the
%application ({\value{hints}})
%or human-readable documentation ({\value{doc}}).

The column names invoked in the pattern of the \attr{href} attribute
of the \elem{LINK} element should exist in the document to 
generate meaningful links. 
In the common case where the VOTable was generated from a query
of a database and contains only some of the columns in that
database, it might be necessary to include columns additional to
those requested in order to ensure that the LINKS in the VOTable
are operational.
\iffalse
This necessity may imply that a \elem{TABLE}
contains columns which were not asked, but have been included to
ensure that the quoted \elem{LINK}s are operational.
\fi
Such a \elem{FIELD} included ``by necessity'' is marked with 
by the attribute \attrval{type}{hidden}. The primary key of
a relational table is a typical example of a \elem{FIELD} 
which would carry the \attrval{type}{hidden} attribute.

\subsection{VOTable Query Extension}
\label{query}

\begin{quote}\em\fg{DarkBlue}
  The metadata part included in  a \elem{RESOURCE} contains
  all the details necessary to create a {\em form} for querying
  the resource. The addition of a link having the \attr{action} 
  attribute can turn VOTable into a powerful query interface.
\end{quote}

\noindent In Astrores [1], the details on the input parameters available in
queries are described by the 
{\elem{PARAM}} and {\elem{FIELD}} elements, and the syntax used
to generate the actual query is described in the ASU [6] procotol:
the {\elem{FIELD}} or \elem{PARAM} elements are
paired in the form {\it name}{{\tt=}}{\it value},
where {\it name} is the contents of the
\attr{name} attribute of a \elem{FIELD} or \elem{PARAM}, 
and  {\it value} represents a constraint
written with the ASU conventions (e.g. \value{$<8$}
 or {\value{12.0..12.5}}
which denotes a range of values). 
Such pairs are  appended to the
{\attr{action}} specified in the {\elem{LINK}}
element contained in the {{\elem{RESOURCE}}},
separated by the ampersand (\&) symbol --
in a way quite similar to the HTML syntax used to 
describe a {\elem{FORM}}.

A special \attrval{type}{no\_query} attribute of the
\elem{PARAM} or \elem{FIELD} elements marks the fields
which are {\em not} part of the form, i.e. are ignored 
in the collection of {\it name}{{\tt=}}{\it value} pairs.

The following is an example of a transformation of the VOTable
in \Aref{example1}{the example} into a form interface:
\label{form1}
\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}
\else\begingroup\small
\fi
\begin{verbatim}
<?xml version="1.0"?>
<VOTABLE version="1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 xsi:noNamespaceSchemaLocation="http://vizier.u-strasbg.fr/xml/VOTable.xsd">
  <DEFINITIONS>
  <COOSYS ID="J2000" equinox="2000." epoch="2000." system="eq_FK5"/>
  </DEFINITIONS>
  <RESOURCE name="myFavouriteGalaxies" type="meta">
    <PARAM name="-out.max" ucd="NUMBER" datatype="int" value="50">
      <DESCRIPTION>Maximal number of records to retrieve</DESCRIPTION>
    </PARAM>
    <LINK  content-role="query" action="myQuery?-source=myGalaxies&amp;" />
    <TABLE name="results">
      <DESCRIPTION>Velocities and Distance estimations</DESCRIPTION>
      <PARAM name="Epoch" datatype="float" ucd="TIME_EPOCH" 
             value="2003.875/">
      <FIELD name="RA"   ID="col1" ucd="POS_EQ_RA_MAIN" ref="J2000" datatype="float"
             width="6" precision="2" unit="deg"/>
      <FIELD name="Dec"  ID="col2" "POS_EQ_DEC_MAIN" ref="J2000" datatype="float"
             width="6" precision="2" unit="deg"/>
      <FIELD name="Name" ID="col3" ucd="ID_MAIN" datatype="char" arraysize="8*"/>
      <FIELD name="RVel" ID="col4" ucd="VELOC_HC" datatype="int"
             width="5" unit="km/s"/>
      <FIELD name="e_RVel" ID="col5" ucd="ERROR" datatype="int"
             width="3" unit="km/s"/>
      <FIELD name="R" ID="col6" ucd="PHYS_DISTANCE_TRUE" datatype="float"
             width="4" precision="1" unit="Mpc">
        <DESCRIPTION>Distance of Galaxy, assuming H=75km/s/Mpc</DESCRIPTION>
      </FIELD>
    </TABLE>
  </RESOURCE>
</VOTABLE>
\end{verbatim}
\ifhtx\End{TABULAR}
\else
%\caption{\label{example1}A simple VOTable example}
\endgroup
\fi

\noindent Note that the {\elem{RESOURCE}} displaying the parameters accessible 
for a query has the {\attrval{type}{meta}}
attribute; it is also assumed that only one {\elem{LINK}}
having the {\attrval{content-role}{query}}
attribute together with an {\attr{action}}
attribute exists within the current {\elem{RESOURCE}}.
The \elem{PARAM} with \attrval{name}{-out.max} has been added in this
example to control the size of the result.

A valid query generated by this VOTable could be:

\begin{verbatim}
  myQuery?-source=myGalaxies&-out.max=50&R=10..100
\end{verbatim}

%\subsection{Additional Propositions}

\subsection{Arrays of variable-length strings}
\label{sec:arraystring}
Following the FITS conventions, strings are defined as arrays of
characters. This definition raises problems for the definition
of arrays of strings, which have then to be defined as 2D-arrays
of characters -- but in this case only the slowest-varying dimension
(i.e. the number of strings) can be variable. %According to this
%limitation, the list of references given in the example above
%(\elemdef{FIELD}{\attrval{name}{ references}}) was assigned an arraysize
%of 20 to take into account the blank which separates two references
%made of 19 characters each.
This limitation becomes severe when a table column contains a set
of remarks, each being made of a variable number of characters as it
occurs in practice.

FITS invented the {\em Substring Array} convention (defined in an appendix,
i.e. not officially approved) which defines a {\em separator} character
used to denote the end of a string and the beginning of the next one.
In this convention ($r${\tt A:SSTR}$w$/$ccc$) the total size of the character
array is specified by $r$, $w$ defines the maximum length of one string,
and $ccc$ defines the separator character as its ASCII equivalent value.
The possible values for the separator includes the space and any printable
character, but excludes the control characters.

Such arrays of variable-length strings are frequently useful e.g.
to enumerate a list of properties of an observed source, each property being
represented by a variable-length string.
A convention similar to the FITS one could be introduced in 
VOTable in the \attr{arraysize}
attribute, using the {\bf s} followed by the separator character;
an example can be \attrval{arraysize}{100s,}
indicating a string made of up to 100 characters, where the comma
is used to separate the elements of the array.

\subsection{FIELDs as data pointers}
\label{location}

Rather than requiring that all data described in the set of \elem{FIELD}s
are contained in a single stream which follows the metadata part, 
it would be possible to let the \elem{FIELD} act as 
a {\em pointer} to the actual data, either in the form of a URI or of
a reference to a component of a multipart document.

Each component of the data described by a \elem{FIELD} may effectively
have different requirements: while text data or small lists of numbers
are quite efficiently represented in pure XML, long lists like spectra
or images generate poor performances if these are converted to XML.
The method available to gain efficiency is to use a
binary representation of the {\em whole data stream} by means of the
\elem{STREAM} element -- at the price of delivering data in a totally non-human
readable format.

%\subsection{The \attrval{type}{location} attribute}
The following options would allow more flexibility in the way the 
various \elem{FIELD}s can be accessed:

\begin{itemize}
\item	a \elem{FIELD} can be declared as being a {\em pointer}
	with the addition of a \attrval{type}{location} value,
	meaning that the field contains a way to access the data, 
	and not the actual data;
\item	a \elem{FIELD} can contain a \elem{LINK} element marked 
	\attrval{type}{location} which contains in its
	\attr{href} attribute the partial URI to which the contents
	of the column cell is appended in order to generate a
	fully qualified URI.
\end{itemize}
Note that the \elem{LINK} is not required -- a \elem{FIELD} declared
with \attrval{type}{location} and containing no \elem{LINK} element
is assumed to contain URIs.

An example of a table describing a set of spectra could look like the following:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}
\else\small\fi
\begin{verbatim}
<TABLE name="SpectroLog">
  <FIELD name="Target" ucd="ID_TARGET" datatype="char" arraysize="30*"/>
  <FIELD name="Instr" ucd="INST_SETUP" datatype="char" arraysize="5*"/>
  <FIELD name="Dur" ucd="TIME_EXPTIME" datatype="int" width="5" unit="s"/>
  <FIELD name="Spectrum" ucd="DATA_LINK" datatype="float" arraysize="*"
         unit="mW/m2/nm" type="location">
    <DESCRIPTION>Spectrum absolutely calibrated</DESCRIPTION>
    <LINK type="location" 
        href="http://ivoa.spectr/server?obsno="/>
  </FIELD>
  <DATA><TABLEDATA>
    <TR><TD>NGC6543</TD><TD>SWS06</TD><TD>2028</TD><TD>01301903</TD></TR>
    <TR><TD>NGC6543</TD><TD>SWS07</TD><TD>2544</TD><TD>01302004</TD></TR>
  </TABLEDATA></DATA>
</TABLE>
\end{verbatim}\ifhtx\End{TABULAR}\else\normalsize\fi

\noindent
The reading program has therefore to retrieve the data 
for this first row by resolving the URI
\begin{plain}
{\tt http://ivoa.spectr/server?obsno=01301903}
\end{plain}

\noindent
The same method could also be immediately applicable to  {\em Content-ID}s
which designate elements of a multipart message, using the protocol
prefix {\tt cid:} [RFC2111]

Note that the {\em VOTable LINK substitution} proposed in 
\Aref{LINK}{Appendix A} fills a similar functionality: 
generate a pointer which can incorporate in its address components
from the \elem{DATA} part for the VOTable.

\subsection{Encoding individual table cells}
\label{sec:b64}
Accessing binary data improves quite significantly the efficiency
both in storage and CPU usage, especially when one compares with the
XML-encoded data stream. But binary data cannot be included in the
same stream as the metadata description, unless a dedicated coding
filter is applied which converts the binary data into an ASCII representation.
The base64 is the most used filter which does this conversion, where 
3 bytes of data are coded as 4 ASCII characters, which implies an overhead of
33\% in storage, and some (small) computing time necessary for the reverse 
transformation.

In order to keep the full VOTable document in a unique stream,
VOTable 1.0 introduced the \attr{encoding} attribute in the
\elem{STREAM} element, meaning that the data, stored as binary records,
are converted into some ASCII representation compatible with the 
XML definitions. One drawback of this method is that the entire data
contents become non human-readable.
%it should also be noted that the
%binary encoding of the full records can result in a waste of storage
%when the data contains arrays which size can vary widely from record
%to record.

The addition of the \attr{encoding} attribute in the \elem{TD} element
allows the data server to decide, at the cell level, whether it is more
efficient to distribute the data as binary-encoded or as edited
values. The result may look like the following:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}\fi
\begin{verbatim}
<TABLE name="SpectroLog">
  <FIELD name="Target" ucd="ID_TARGET" datatype="char" arraysize="30*"/>
  <FIELD name="Instr" ucd="INST_SETUP" datatype="char" arraysize="5*"/>
  <FIELD name="Dur" ucd="TIME_EXPTIME" datatype="int" width="5" unit="s"/>
  <FIELD name="Spectrum" ucd="SPECT_FLUX_VALUE" datatype="float" arraysize="*"
         unit="mW/m2/nm" precision="E3"/>
  <DATA><TABLEDATA>
    <TR><TD>NGC6543</TD><TD>SWS06</TD><TD>2028</TD><TD encoding="base64">
    QJKPXECHvndAgMScQHul40CSLQ5ArocrQLxiTkC3XClAq0OWQKQIMUCblYFAh753QGij10BT
    Em9ARKwIQExqf0BqbphAieuFQJS0OUCJWBBAhcrBQJMzM0CmRaJAuRaHQLWZmkCyhytAunbJ
    QLN87kC26XlA1KwIQOu+d0DsWh1A5an8QN0m6UDOVgRAxO2RQM9Lx0Din75A3o9cQMPfO0C/
    dLxAvUeuQKN87kCXQ5ZAjFodQH0vG0B/jVBAgaHLQI7Ag0CiyLRAqBBiQLaXjUDYcrBA8p++
    QPcKPUDg7ZFAwcKPQLafvkDDlYFA1T99QM2BBkCs3S9AjLxqQISDEkCO6XlAmlYEQKibpkC5
    wo9AvKPXQLGBBkCs9cNAuGp/QL0euEC4crBAuR64QL6PXEDOTdNA2987QN9T+EDoMSdA8mZm
    QOZumEDDZFpAmmZmQGlYEEBa4UhAivGqQLel40Dgan9A4WBCQLNcKUCIKPZAk1P4QNWRaEEP
    kWhBKaHLQTkOVkFEan9BUWBCQVyfvg==
    </TD></TR>
  </TABLEDATA></DATA>
</TABLE>
\end{verbatim}
\ifhtx\End{TABULAR}\fi
\par

\noindent 
When decoded, the contents of the last column is the binary representation
of the spectrum, as defined in \Aref{sec:BIN}{the BINARY serialization};
no length prefix is required here, the total length of the array being
implicitely defined by the length of the encoded text.

\subsection{Additional \elem{TABLE} attributes}	% Suggested TomMcGlynn
% Nov.2003, Mails 7978 + 8073
The \elem{GROUP} introduced in version 1.1 can be qualified
by \attr{ucd} and \attr{utype} attributes. These attributes 
could similarly bring useful additional metadata to qualify the
\elem{TABLE} structure.

\subsection{A new \elem{XMLDATA} serialization}
% Following discussions Tony Linde / Roy Williams
% in January 2004 on the VOTable group
In order to facilitate the usage of the standard XML query tools
which are easier to handle when each parameter has its individual
tag, the \elem{XMLDATA} serialization introduces the designation of
each  \elem{FIELD} by a dedicated tag. An example could look like
the following:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}\fi
\begin{verbatim}
<TABLE name="Messier">
  <FIELD name="Number" ID="M" ucd="ID_NUMBER" datatype="int" >
    <DESCRIPTION>Messier Number</DESCRIPTION>
  </FIELD>
  <FIELD name="R.A.2000" ID="RA" ucd="POS_EQ_RA_MAIN" ref="J2000" unit="deg" 
     datatype="float" width="5" precision="1" />
  <FIELD name="Dec.2000" ID="DE" ucd="POS_EQ_DEC_MAIN" ref="J2000" unit="deg" 
     datatype="float" width="5" precision="1" />
  <FIELD name="Name" ID="N" ucd="ID_ALTERNATIVE" datatype="char" arraysize="*">
    <DESCRIPTION>Common name used to designate the Messier object</DESCRIPTION>
  </FIELD>
  <FIELD ID="T" name="Classification" datatype="char" arraysize="10*" 
     ucd="CLASS_OBJECT">
     <DESCRIPTION>Classification (galaxy, glubular cluster, etc)</DESCRIPTION>
  </FIELD>
  <DATA><XMLDATA>
    <TR>
      <M>3</M>
      <RA>205.5</RA>
      <DE>+28.4</DE>
      <N/>
      <T>Globular Cluster</T>
    </TR>
    <TR>
      <M>31</M>
      <RA>010.7</RA>
      <DE>+41.3</DE>
      <N>Andromeda Galaxy</N>
      <T>Galaxy</T>
    </TR>
  </XMLDATA></DATA>
</TABLE>
\end{verbatim}
\ifhtx\End{TABULAR}\fi
\par

\noindent The full document would need an XML-Schema definition of the tags
\elem{M}, \elem{RA}, \elem{DE}, \elem{N} and \elem{T}; these being
derived directly from the \attr{ID} attribute of the \elem{FIELD}
element, their definition can be generated automatically from the set of
\elem{FIELD} definitions.
\end{document}