%++++++++
%.IDENTIFICATION VOTable.tex
%.PURPOSE        Specifications of VOTABLE.
%.AUTHOR         Roy Williams, Francois Ochsenbein
%.VERSION  0.96  25-Mar-2002
%.VERSION  0.97  29-Mar-2002
%.VERSION  0.99  09-Apr-2002
%.VERSION  1.0   15-Apr-2002
%.VERSION  1.x   27-sep-2003 François Ochsenbein
%--------
\def\ifhtx{\iffalse}    % Lines used only for the HTML version
\def\Version{1x}	% 
\def\Vdate{27 Sep 2003}
\ifhtx			% Declarations for HTX (HTML)
\begin{head}
  \tag{META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=iso-8859-1"}
  \begin{title}VOTable \Version\ Proposal\end{title}
  \tag{META NAME="robots" content="index"}
  \tag{META NAME="description" content="VOTable Proposal"}
  \tag{META REV="made" HREF="mailto:VOTable@us-vo.org"}
  \tag{META NAME="GENERATOR" CONTENT="cgiprint"}
  \tag{META NAME="AUTHOR" CONTENT="Francois Ochsenbein, Roy Williams"}
  \tag{META NAME="CREATED" CONTENT="20030927;23390000"}
% \tag{META NAME="CHANGEDBY" CONTENT="Francois Ochsenbein"}
% \tag{META NAME="CHANGED" CONTENT="20020315"}
\end{head}
\Beg{body}{\bg{white}}
\else			% Declarations for LaTeX
\documentclass{article}
\usepackage[pdftex]{graphicx}	% Accept Images
\usepackage{color}		% Accept Colors
\usepackage{verbatim}		% Accept Colors
\let\fg=\color			% fg = foreground color
\let\Beg=\begin
\topmargin=-1cm
\raggedbottom
\oddsidemargin=0cm
\textwidth=17.5cm		% The default width is too small...
\textheight=23.5cm		% The default width is too small...
\arrayrulewidth=0.75pt\renewcommand{\arraystretch}{1.2}
\definecolor{DarkRed}{rgb}{0.5,0,0}
\definecolor{DarkBlue}{rgb}{0,0,0.5}
\definecolor{DarkGreen}{rgb}{0,0.5,0}
\definecolor{DarkMagenta}{rgb}{0.5,0,0.5}
\definecolor{DarkGoldenrod}{rgb}{0.72,0.5,0.05}
\def\slash {{\fg{blue}/}}
\def\attr#1{{\tt{\fg{DarkRed}#1}}}
\def\elem#1{{\tt{\fg{DarkRed}#1}}}
\def\attrval#1#2{{\tt{\fg{DarkRed}#1}="{\fg{DarkMagenta}#2}"}}
\def\elemdef#1#2{{\fg{blue}$<$}{\tt{\fg{DarkRed}#1}#2}{\fg{blue}$>$}}
\def\xtag#1{\elemdef{#1}{}}
\def\Aref#1#2{section~\ref{#1}}
\def\value#1{{\tt"}{\fg{DarkMagenta}#1}{\tt"}}
%\def\inputverbatim#1{%
%\begingroup \catcode``=13 \@noligs \tt \let\do\@makeother \dospecials
%\obeylines \obeyspaces \frenchspacing \input #1 \endgroup }
\def\thickrule{\noindent\rule{\textwidth}{1pt}}
\def\Plain#1{{\sf #1}}
\begin{document}
\newenvironment{TABULAR}[2]{\begin{tabular}{#2}}{\end{tabular}}
\newenvironment{plain}{\begin{quote}}{\end{quote}}
\fi

\begin{center}
{{\Large\bf Proposed Extensions to VOTable 1.0 }}
\bigskip

\bigskip
{\bf\fg{DarkGoldenrod}Document \Version{ }(\Vdate)}
\par
\begin{tabular}{rl}
{\em Document repository:}& http://cdsweb.u-strasbg.fr/doc/VOTable/\\
{\em Comments:} & votable@ivoa.net\\
\end{tabular}
\end{center}

\ifhtx
\par\thickrule\par
\tableofcontents
\fi
\par\thickrule\par


\section{Introduction}

The VOTable format is a proposed XML standard for representing 
tabular data in the context of the Virtual Observatory (VO); its version 1.0,
available from http://www.ivoa.net/twiki/bin/view/IVOA/IvoaVOTable,
defines the basic layout and the relations with the already existing
data formats like FITS tables.

In our context, the specificity of a  VOTable consists in the
way the {\em metadata} (data describing the data) are organized,
aiming at an  automatized interpretation by the tools of the VO
of the data coming from various horizons.
The example below, a bit expanded from the example of the
version 1.0, shows the metadata (located between
the \xtag{TABLE} and \xtag{DATA} markers) essentially consisting in
a set of \xtag{FIELD}s; the data are following,
expressed in the example as an XML-formatted \xtag{TABLEDATA} structure.
In terms of the semantic web community, the organization of
a VOTable can be viewed as a definition of the {\em properties} of 
the tabular entity (the metadata), followed by the {\em values} of these
properties in the many {\em instances} of the entity (the rows).

\subsection{Example}

This simple example of a VOTable document lists 3 galaxies with their
velocity, distance, and literature references where velocity measurements
have been  published.
%
\label{example1}
\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}\fi
%	<TR><TD>003.73</TD><TD>-39.19</TD><TD>N   55</TD><TD>140</TD>
%	    <TD>1.3</TD>
%        </TR>
%	<TR><TD>011.89</TD><TD>-25.29</TD><TD>N  253</TD><TD>249</TD>
%	    <TD>3.0</TD>
%        </TR>
%
\begin{verbatim}
<?xml version="1.0"?>
<VOTABLE version="1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 xsi:noNamespaceSchemaLocation="http://vizier.u-strasbg.fr/xml/VOTable.xsd">
  <DEFINITIONS>
  <COOSYS ID="myJ2000" equinox="2000." epoch="2000." system="eq_FK5"/>
  </DEFINITIONS>
  <RESOURCE name="myFavouriteGalaxies">
    <TABLE name="results">
      <DESCRIPTION>Velocities and Distance estimations</DESCRIPTION>
      <FIELD name="RA" ucd="POS_EQ_RA_MAIN" ref="J2000" datatype="float" 
             width="6" precision="2" unit="deg"/>
      <FIELD name="Dec" "POS_EQ_DEC_MAIN" ref="J2000" datatype="float" 
             width="6" precision="2" unit="deg"/>
      <FIELD name="Name" ucd="ID_MAIN" datatype="char" arraysize="8*"/>
      <FIELD name="RVel" ucd="VELOC_HC" datatype="float" 
             width="5" unit="km/s"/>
      <FIELD name="R" ucd="PHYS_DISTANCE_TRUE" datatype="float" 
             width="4" precision="1" unit="Mpc">
        <DESCRIPTION>Distance of Galaxy, assuming H=75km/s/Mpc</DESCRIPTION>
      </FIELD>
      <FIELD name="references" ucd="REFER_BIBCODE" datatype="char" 
             arraysize="20x*"/>
      <DATA>
        <TABLEDATA>
        <TR><TD>010.68</TD><TD>+41.27</TD><TD>N  224</TD><TD>-192</TD>
	    <TD>0.7</TD><TD>1995ApJS...98..477H 1997ApJS..112..315H</TD>
	</TR>
	<TR><TD>287.43</TD><TD>-63.85</TD><TD>N 6744</TD><TD>842</TD>
	    <TD>10.4</TD><TD>1995ApJS...96..123T</TD>
        </TR>
	<TR><TD>023.48</TD><TD>+30.66</TD><TD>N  598</TD><TD>-182</TD>
	    <TD>0.7</TD><TD>1997ApJS..112..315H 1973UGC...C......0N</TD>
        </TR>
        </TABLEDATA>
      </DATA>
    </TABLE>
  </RESOURCE>
</VOTABLE>
\end{verbatim}
\ifhtx\End{TABULAR}\fi

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%	Futures 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\iffalse
\subsection{Futures}
We also expect XSchema to allow better modularization of the
document schema, so that, for example, users might put whatever
serialized objects they wish into the table cells. In this way, we
expect to use VOTable for handling the flow of large data objects
through Grid resources, objects such as FITS files or XDF [7]
documents. Also, it would also mean, for example, that the description of a
table could contain arbitrary HTML instead of the current version --
plain text with paragraph markers; or that an XML definition of
non-standard astronomical coordinate systems could be seamlessly
integrated.

VOTable is derived from Astrores, which is specified not only as a way to
write a data table, but also as a way to specify how to address a
{\it request}  to data tables. We
expect to sharpen and formalize this dichotomy with the benefit of
experience, building into VOTable the ways of making
sophisticated querying mechanisms and protocols.

We expect to add features for efficiency in the future also: to
specify that the data stream has a particular sort order, to specify
that a column in one table is a key in to another table; to specify
that one table is an index into another. The binary format will be 
extended to facilitate large-scale streaming.

\section{Data Model}

In this section we define the data model of a VOTable, and in the
next sections its syntax when expressed as XML. The data model of
VOTable can be expressed as:    

\begin{tabular}{ll}
\hspace{3em}&{\bf VOTable} = hierarchy of {\bf Metadata} + {\bf Tables}\\
&{\bf Metadata} = {\bf Parameters} + {\bf Infos} + {\bf Descriptions}
                + {\bf {\fg{black}Links + Fields}}\\
&{\bf Table} = list of {\bf Fields + Data}\\
&{\bf Data} = stream of {\bf Rows}\\
&{\bf Row} = list of {\bf Cells}\\
&{\bf Cell} = {\bf Primitive} \\
&       \begin{tabular}{ll}
        \hspace*{2em}&or variable-length list of {\bf Primitives} \\
        \hspace*{2em}&or multidimensional array of {\bf Primitives}\\
        \end{tabular}\\
&{\bf Primitive} = integer, character, float, floatComplex, etc
(see table below).
\end{tabular}
\fi

\section{Proposed Extensions}
VOTable1.0 proved to be very useful, and the discussions
concerning the limitations found in its usage lead to propose the
following modifications or additions:
\begin{itemize}
\item	the semantical meaning of a \xtag{FIELD} or \xtag{PARAM}
	is currently mainly characterized by the \attr{ucd} attribute, 
	which provides some uniformized {\em class of data}.
	%to assess meaningful data intercomparisons -- or briefly to
	%ensure some minimal {\em interoperability}.
	In some applications the role of the \attr{ucd} was found to
	lack the required accuracy.

\item	\xtag{FIELD}s are described as independent columns in VOTable1.0;
	important dependencies exist among the fields of a table, 
	as in the typical example of a value and its associated error.

\item	expressings arrays of variable-length strings

\item	the data can be accessed in VOTable1.0 either in the input stream
	(the data are actually embedded between the \xtag{DATA} and
	\xtag{/DATA} tags), or by a remote access to another stream
	specified by the \xtag{STREAM} element. But the description and
	access to complex data can be much more efficient if some
	columns representing e.g. large images could contain just
	URI (link to the actual data) or the designation of an element
	of a multipart message. In other terms, instead of a
	single input or remote stream containing all data,
	some of the fields would just contain a {\em pointer} to the
	actual data.
\end{itemize}

The first 3 items are actually additional features to better express
the {\em metadata} part, while the last expresses a wish to use VOTable
as in interface to data coded in various forms.

\section{Metadata improvements}

\subsection{ The \attr{utype} attribute}
In some contexts, it may be important that \xtag{FIELD}s are 
explicitely designed as being the parameter of a given data model.
For instance, it might be important for an application to know
that a given \xtag{FIELD} expresses {\em the} surface brightness 
processed by an explicit method. None of the existing \attr{name}, \attr{ID}
or \attr{ucd} attributes can fill this role, and we therefore propose
the addition of a \attr{utype} attribute. The respective roles of these
attributes are:
\begin{itemize}
\item	\attr{name}: a name freely defined by the data server
\item	\attr{ID}: an XML identifier designating uniquely a field
	within a document
\item	\attr{ucd}: the characterisation of the field contents,
	with a scope essentially devoted to interoperability
\item	\attr{utype}: the relation to an explicit parameter
	described in a data model. The contents of this
	attribute is moreover proposed to include a {\em namespace}
	part, as
	\attrval{utype}{{\em namespace}:datamodel-{\em parameter\_name}}
\end{itemize}

    It was effectively proposed during the discussions on UCDs that the
    \attr{ucd} attribute could be replaced by a pointer to some data model
    in the future; in practice it seems rather impossible to the UCD to
    play simultaneously a role of {\em global meaning} enabling global
    interoperability and to play the role of defining precisely which
    parameter is represents in the context of a data model.
    The \attr{utype} attribute is a simple solution to this dilemna.

\xtag{FIELD} and \xtag{PARAMETER} sharing the same set of attributes
(with the exception of the \attr{value} attribute), it is proposed
that the \xtag{PARAMETER} entity can also exhibit a \attr{utype} attribute.

\subsection{ The \xtag{GROUP} proposal}

The \xtag{GROUP} tag is proposed to group together a set of \xtag{FIELDS}
which are logically correlated, like a value and its error. The fields
participating to a \xtag{GROUP} can be defined either {\em physically} 
(the field is defined within a group), or {\em logically} for fields
just referenced in the group via the \attr{ref} attribute (referencing the
\attr{ID} attribute of another field): a same
physical field (i.e. a single column of the table) may therefore 
participate in several groups.

A straightforward example of a group is:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}\fi
\begin{verbatim}
    <GROUP name="Velocity" ucd="VELOC_HC">
      <DESCRIPTION>Velocity and its error</DESCRIPTION>
      <FIELD name="RVel" ucd="VELOC_HC" datatype="float" 
             width="5" unit="km/s"/>
      <FIELD name="e_RVel" ucd="ERROR" datatype="float" 
             width="3" unit="km/s"/>
    </GROUP>
\end{verbatim}\ifhtx\End{TABULAR}\fi
\par

The \xtag{GROUP} entity can have the \attr{name}, \attr{ID}, \attr{ucd},
\attr{utype} and \attr{ref} attributes.
It can include a \xtag{DESCRIPTION}, \xtag{FIELD}s, \xtag{PARAMETER}s,
and other \xtag{GROUP}s -- this recursive grouping enabling a definition of
arbitrary complex structures.

The possibility of adding \xtag{PARAMETER}s in groups introduces also
a possibility of describing more accurately parameters, and is an alternative
to the proposal of {\em parametrized UCDs}. For instance,
it is possible to describe the actual frequency of a radio survey with:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}\fi
\begin{verbatim}
    <GROUP name="Flux" ucd="VELOC_HC">
      <DESCRIPTION>Flux measured at </DESCRIPTION>
      <FIELD name="Flux" ucd="PHOT_FLUX_RADIO_400M" datatype="float" 
             width="6" precision="1" unit="mJy"/>
      <PARAMETER name="Freq" ucd="OBS_FREQUENCY" unit="MHz" datatype="float"
             value="352"/>
      <FIELD name="e_Flux" ucd="ERROR" datatype="float" width="4" 
             precision="1" unit="mJy"/>
    </GROUP>
\end{verbatim}\ifhtx\End{TABULAR}\fi

\par
Similarly, the \xtag{GROUP} can be used to associate several parameters
to one or several \xtag{FIELD}s: a filter may for instance be
characterized by the central wavelength and the FWHM of its transmission
curve; or several parameters of an instrument setup may be detailed.

\subsection{Arrays of variable-length strings}
Following the FITS conventions, strings are defined as arrays of
characters. This definition raises problems for the definition
of arrays of strings, which have then to be defined as 2D-arrays
of characters -- but in this case only the slowest-varying dimension
(i.e. the number of strings) can be variable. According to this
limitation, the list of references given in the example above
(\elemdef{FIELD}{\attrval{name}{ references}}) was assigned an arraysize
of 20 to take into account the blank which separates two references
made of 19 characters each.

FITS invented the {\em Substring Array} convention (defined in an appendix,
i.e. not officially approved) which defines a {\em separator} character
used to denote the end of a string and the beginning of the next one.
In this convention ($r${\tt A:SSTR}$w$/$ccc$) the total size of the character
array is specified by $r$, $w$ defines the maximal length of one string,
and $ccc$ defines the separator character as its ascii equivalent value.
The possible values for the separator includes the space and any printable
character, but excludes the control characters.

Such arrays of variable-length strings being frequently used;
a similar convention can be introduced in VOTable in the \attr{arraysize}
attribute, using the {\bf s} followed by the separator character;
an example can be \quad \attrval{arraysize}{100s,}\quad
indicating a string made of up to 100 characters, where the comma
is used to separate the elements of the array.

\section{Diversified data streaming}

Rather than requiring that all data described in the set of \xtag{FIELD}s
are contained in a single stream which follows the metadata part, 
it is proposed to let the \xtag{FIELD} act as 
a {\em pointer} to the actual data, either in the form of a URI or of
a reference to a component of a multipart document.

Each component of the data described by a \xtag{FIELD} may effectively
have different requirements: while text data or small lists of numbers
are quite efficiently represented in pure XML, long lists like spectra
or images generate poor performances if these are converted to XML.
The method proposed in VOTable1.0 to gain efficiency is to use a
binary representation of the {\em whole data stream} by means of the
\xtag{STREAM} element -- at the price of delivering data totally non-human
readable.

\subsection{The \attrval{type}{location} attribute}
In order to enable more flexibility in the way the various \xtag{FIELD}s
can be accessed, it is proposed the following additions:

\begin{itemize}
\item	a \xtag{FIELD} can be declared as being a {\em pointer}
	with the addition of a \attrval{type}{location} value,
	meaning that the field contains a way to access the data, 
	and not the actual data.
\item	a \xtag{FIELD} can contain a \xtag{LINK} element marked 
	\attrval{type}{location} which contains in its
	\attr{href} attribute the partial URI to which the contents
	of the column cell is appended in order to generate a
	fully qualified URI.
\end{itemize}
Note that the \xtag{LINK} is not required -- a \xtag{FIELD} declared
with \attrval{type}{location} and containing no \xtag{LINK} element
is assumed to contain URIs.

An example of a table describing a set of spectra looks like the following:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}\fi
\begin{verbatim}
<TABLE name="SpectroLog">
  <FIELD name="Target" ucd="ID_TARGET" datatype="char" arraysize="30*"/>
  <FIELD name="Instr" ucd="INST_SETUP" datatype="char" arraysize="5*"/>
  <FIELD name="Dur" ucd="TIME_EXPTIME" datatype="int" width="5" unit="s"/>
  <FIELD name="Spectrum" ucd="DATA_LINK" datatype="float" arraysize="*"
         unit="mW/m2/nm" type="location">
    <DESCRIPTION>Spectrum absolutely calibrated</DESCRIPTION>
    <LINK type="location" 
        href="http://ivoa.spectr/server?obsno="/>
  </FIELD>
  <DATA><TABLEDATA>
    <TR><TD>NGC6543</TD><TD>SWS06</TD><TD>2028</TD><TD>01301903</TD></TR>
    <TR><TD>NGC6543</TD><TD>SWS07</TD><TD>2544</TD><TD>01302004</TD></TR>
  </TABLEDATA></DATA>
</TABLE>
\end{verbatim}\ifhtx\End{TABULAR}\fi
The reading program has therefore to retrieve the data by resolving the URI
\plain{\tt http://ivoa.spectr/server?obsno=01301903}.

The same method could also be immediately applicable to  {\em Content-ID}s
which designate elements of a multipart message, using the protocol
prefix {\tt cid:} (RFC 2111)


\subsection{The \attr{encoding} attribute in \xtag{TD}}
Accessing binary data improves quite significantly the efficiency
both in storage and CPU usage, especially when one compares with the
XML-encoded data stream. But binary data cannot be included in the
same stream as the metadata description, unless a dedicated coding
filter is applied which converts the binary data into an ascii representation.
The base64 is the most used filter which does this conversion, where 
3 bytes of data are coded as 4 ascii characters, which implies an overhead of
33\% in storage, and some (small) computing time necessary for the reverse 
transformation.

In order to keep the full VOTable document in a unique stream,
VOTable1.0 introduced the \attr{encoding} attribute in the
\xtag{STREAM} element, meaning that the data, stored as binary records,
are converted into some ascii representation compatible with the 
XML definitions. One drawback of this method is that the entire data
contents becomes non human-readable.
%it should also be noted that the
%binary encoding of the full records can result in a waste of storage
%when the data contains arrays which size can vary widely from record
%to record.

The addition of the \attr{encoding} attribute in the \xtag{TD} element
allows the data server to decide, at the cell level, whether it is more
efficient to distribute the data as binary-encoded or as fully edited
values. The result may look like the following:

\ifhtx\Beg{TABULAR}{\bg{LightCyan} CELLPADDING=5}{||l||}\fi
\begin{verbatim}
<TABLE name="SpectroLog">
  <FIELD name="Target" ucd="ID_TARGET" datatype="char" arraysize="30*"/>
  <FIELD name="Instr" ucd="INST_SETUP" datatype="char" arraysize="5*"/>
  <FIELD name="Dur" ucd="TIME_EXPTIME" datatype="int" width="5" unit="s"/>
  <FIELD name="Spectrum" ucd="SPECT_FLUX_VALUE" datatype="float" arraysize="*"
         unit="mW/m2/nm"/>
  <DATA><TABLEDATA>
    <TR><TD>NGC6543</TD><TD>SWS06</TD><TD>2028</TD><TD encoding="base64">
    QJKPXECHvndAgMScQHul40CSLQ5ArocrQLxiTkC3XClAq0OWQKQIMUCblYFAh753QGij10BT
    Em9ARKwIQExqf0BqbphAieuFQJS0OUCJWBBAhcrBQJMzM0CmRaJAuRaHQLWZmkCyhytAunbJ
    QLN87kC26XlA1KwIQOu+d0DsWh1A5an8QN0m6UDOVgRAxO2RQM9Lx0Din75A3o9cQMPfO0C/
    dLxAvUeuQKN87kCXQ5ZAjFodQH0vG0B/jVBAgaHLQI7Ag0CiyLRAqBBiQLaXjUDYcrBA8p++
    QPcKPUDg7ZFAwcKPQLafvkDDlYFA1T99QM2BBkCs3S9AjLxqQISDEkCO6XlAmlYEQKibpkC5
    wo9AvKPXQLGBBkCs9cNAuGp/QL0euEC4crBAuR64QL6PXEDOTdNA2987QN9T+EDoMSdA8mZm
    QOZumEDDZFpAmmZmQGlYEEBa4UhAivGqQLel40Dgan9A4WBCQLNcKUCIKPZAk1P4QNWRaEEP
    kWhBKaHLQTkOVkFEan9BUWBCQVyfvg==
    </TD></TR>
  </TABLEDATA></DATA>
</TABLE>
\end{verbatim}\ifhtx\End{TABULAR}\fi
\par

\thickrule\par
\begin{TABULAR}{lr}
%Roy {\bf Williams}, California Institute of Technology, USA \\
Fran\c cois {\bf Ochsenbein}& Observatoire Astronomique de Strasbourg, France \\
%Clive {\bf Davenhall}, University of Edinburgh, UK \\
%Daniel {\bf Durand}, Canadian Astronomy Data Centre, Canada \\
%Pierre {\bf Fernique}, Observatoire Astronomique de Strasbourg, France \\
%David {\bf Giaretta}, Rutherford Appleton Laboratory, UK \\
%Robert {\bf Hanisch},  Space Telescope Science Institute, USA \\
%Tom {\bf McGlynn}, NASA Goddard Space Flight Center, USA \\
%Alex {\bf Szalay}, Johns Hopkins University, USA \\
%Andreas {\bf Wicenec}, European Southern Observatory, Germany \\
\end{TABULAR}
\end{document}


